可以使用pytesseract模块和PIL模块解决不太复杂的验证码问题
image.png
简单网页验证码解决思路:
截屏整个页面
获得验证码坐标数据
根据坐标数据抠图
使用pytesseract模块进行验证
import time
import re
from PIL import Image
from selenium import webdriver
import pytesseract
def test1():
browser = webdriver.Chrome()
browser.get('http://localhost:8188/jpress/user/register')
browser.maximize_window()
#获取验证码图片
t = time.time()
picture_name1 = str(t)+'.png'
browser.save_screenshot(picture_name1)
ce = browser.find_element_by_id("captchaimg")
print(ce.location)
left = ce.location['x']
top = ce.location['y']
right = ce.size['width'] + left
height = ce.size['height'] + top
#普通屏幕:
#im = Image.open(picture_name1)
#crop抠图
#img = im.crop((left,top,right,height))
#如果是retina屏幕需要这样:
dpr = browser.execute_script('return window.devicePixelRatio')
print(dpr)
im = Image.open(picture_name1)
img = im.crop((left * dpr, top * dpr, right * dpr, height * dpr))
t = time.time()
picture_name2 = str(t)+'.png'
img.save(picture_name2)
browser.close()
def test2():
image1 = Image.open('test.jpg')
str = pytesseract.image_to_string(image1)
result = re.sub(u"([^\u4e00-\u9fa5\u0030-\u0039\u0041-\u005a\u0061-\u007a])", "", str) # 去除识别出来的特殊字符
#result_four = result[0:4] # 只获取前4个字符
print(result)