Step1
打开有道翻译 通过审查元素,找到下图内容
Step2 上代码
import urllib.request
import urllib.parse
import json
url = 'http://fanyi.youdao.com/translate?smartresult=dict&smartresult=rule'
data = {}
data = {}
data['i'] = 'i love you'
data['from'] = 'AUTO'
data['to'] = 'AUTO'
data['smartresult'] = [dict]
data['client'] = 'fanyideskweb'
data['salt'] = '1536832138651'
data['sign'] = 'd01d0881f67f7d556a6c6d2bb441478e'
data['doctype'] = 'json'
data['version'] = '2.1'
data['keyfrom'] = 'fanyi.web'
data['action'] = 'FY_BY_CLICKBUTTION'
data['typoResult'] = 'false'
data = urllib.parse.urlencode(data).encode('utf-8')
respones = urllib.request.urlopen(url,data)
html = respones.read().decode('utf-8')
target = json.loads(html)
target = target['translateResult'][0][0]['tgt']
print(target)
Step3 优化代码 并且修改headers 模拟正常浏览器
1.在Request对象生成之前 添加headers字典
import urllib.request
import urllib.parse
import json
content = input('请输入待查询的内容:')
url = 'http://fanyi.youdao.com/translate?smartresult=dict&smartresult=rule'
data = {}
data = {}
data['i'] = content
data['from'] = 'AUTO'
data['to'] = 'AUTO'
data['smartresult'] = [dict]
data['client'] = 'fanyideskweb'
data['salt'] = '1536832138651'
data['sign'] = 'd01d0881f67f7d556a6c6d2bb441478e'
data['doctype'] = 'json'
data['version'] = '2.1'
data['keyfrom'] = 'fanyi.web'
data['action'] = 'FY_BY_CLICKBUTTION'
data['typoResult'] = 'false'
data = urllib.parse.urlencode(data).encode('utf-8')
headers = {}
headers['User-Agent'] = 'Mozilla/5.0 (Windows NT 10.0; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/63.0.3239.132 Safari/537.36'
req = urllib.request.Request(url,data,headers)
respones = urllib.request.urlopen(req)
html = respones.read().decode('utf-8')
target = json.loads(html)
target = target['translateResult'][0][0]['tgt']
print(target)
2.或者动态追加headers
import urllib.request
import urllib.parse
import json
content = input('请输入待查询的内容:')
url = 'http://fanyi.youdao.com/translate?smartresult=dict&smartresult=rule'
data = {}
data = {}
data['i'] = content
data['from'] = 'AUTO'
data['to'] = 'AUTO'
data['smartresult'] = [dict]
data['client'] = 'fanyideskweb'
data['salt'] = '1536832138651'
data['sign'] = 'd01d0881f67f7d556a6c6d2bb441478e'
data['doctype'] = 'json'
data['version'] = '2.1'
data['keyfrom'] = 'fanyi.web'
data['action'] = 'FY_BY_CLICKBUTTION'
data['typoResult'] = 'false'
data = urllib.parse.urlencode(data).encode('utf-8')
req = urllib.request.Request(url,data)
req.add_header('User-Agent','Mozilla/5.0 (Windows NT 10.0; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/63.0.3239.132 Safari/537.36')
respones = urllib.request.urlopen(req)
html = respones.read().decode('utf-8')
target = json.loads(html)
target = target['translateResult'][0][0]['tgt']
print(target)
当然以上代码还只是个雏形,因为你频繁的用同一个ID访问,服务器还是可能会把你频闭掉,两个解决办法
one
通过time 模块延迟访问 减少访问频率
import urllib.request
import urllib.parse
import json
import time
while 1:
content = input('请输入待查询的内容(输入"q!"退出程序):')
if content == 'q!':
break
url = 'http://fanyi.youdao.com/translate?smartresult=dict&smartresult=rule'
data = {}
data = {}
data['i'] = content
data['from'] = 'AUTO'
data['to'] = 'AUTO'
data['smartresult'] = [dict]
data['client'] = 'fanyideskweb'
data['salt'] = '1536832138651'
data['sign'] = 'd01d0881f67f7d556a6c6d2bb441478e'
data['doctype'] = 'json'
data['version'] = '2.1'
data['keyfrom'] = 'fanyi.web'
data['action'] = 'FY_BY_CLICKBUTTION'
data['typoResult'] = 'false'
data = urllib.parse.urlencode(data).encode('utf-8')
req = urllib.request.Request(url,data)
req.add_header('User-Agent','Mozilla/5.0 (Windows NT 10.0; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/63.0.3239.132 Safari/537.36')
respones = urllib.request.urlopen(req)
html = respones.read().decode('utf-8')
target = json.loads(html)
target = target['translateResult'][0][0]['tgt']
print(target)
time.sleep(4)
two
代理
a.参数是一个字典{‘类型’ :'代理ip:端口号'}
proxy_support = urllib.request.ProxyHandler({})
b.定制创建一个opener
opener = urllib.request.build_opener(proxy_support)
c.安装opener
urllib.request.install_opener(opener)
如果只是临时用下那个opener 可以不用安装,直接调用就行了 opener.open(url)
当然这里还可以通过opener.addheaders = [('User-Agent','Mozilla/5.0 (Windows NT 10.0; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/63.0.3239.132 Safari/537.36')]来私人定制标头 让服务器觉得是浏览器在访问
还有这里需要解释一下opener,其实之前大家看到的urlopen()函数就已经接触到opener了 不信看urlopen()源代码