对图片进行二值化处理
import pytesseract
from PIL import Image
def shibie(image):
# 打开图片
img = Image.open(image)
# 图片转化为灰度图片
img = img.convert('L')
# 二值化处理
threshold = 140
table = []
for i in range(256):
if i < threshold:
table.append(0)
else:
table.append(1)
out = img.point(table, '1')
img = img.convert('RGB')
return pytesseract.image_to_string(img)
实现代码
import requests
from bs4 import BeautifulSoup
import time
# 创建一个会话
from shibie import shibie
s = requests.Session()
i = 1
while 1:
# 将验证码写入到本地
get_url = 'https://so.gushiwen.org/user/login.aspx?from=http://so.gushiwen.org/user/collect.aspx'
headers = {
'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/63.0.3239.108 Safari/537.36',
}
r_get = s.get(url=get_url,headers=headers)
# 通过bs4得到图片
soup = BeautifulSoup(r_get.text, 'lxml')
image_src = 'https://so.gushiwen.org' + soup.find('img',id='imgCode')['src']
# 通过会话进行请求
r_img = s.get(image_src)
# 将图片写到文件中
with open('code.png','wb') as fq:
fq.write(r_img.content)
# 获取两个隐藏值
viewstate = soup.find('input', id='__VIEWSTATE')['value']
viewg = soup.find('input',id='__VIEWSTATEGENERATOR')['value']
post_url = 'https://so.gushiwen.org/user/login.aspx?from=http%3a%2f%2fso.gushiwen.org%2fuser%2fcollect.aspx'
# 调用函数 自动识别
code = shibie('code.png')
data = {
'__VIEWSTATE': viewstate,
'__VIEWSTATEGENERATOR': viewg,
'from': 'http://so.gushiwen.org/user/collect.aspx',
'email': '123456@qq.com',
'pwd': '123456',
'code': code,
'denglu': '登录',
}
r_post = s.post(url=post_url,headers=headers,data=data)
# 判断是否登陆成功
if '退出登录' in r_post.text:
print('登陆成功')
break
print('第%s次登陆失败' % i)
i += 1
time.sleep(2)