python爬虫之selenium知乎小Demo

from selenium import webdriver
import time
from PIL import Image
from io import BytesIO
import base64
from zheye import zheye
from pytesseract import image_to_string
from selenium.webdriver import ActionChains
from selenium.common.exceptions import NoSuchElementException
driver = webdriver.Chrome(executable_path='/home/ljh/桌面/driver/chromedriver')
# driver = webdriver.Firefox(executable_path='/home/ljh/桌面/driver/geckodriver')
driver.get('https://www.zhihu.com/signup?next=%2F')
driver.find_element_by_xpath('//div[@class="SignContainer-    switch"]/span').click()
time.sleep(3)

driver.find_element_by_xpath('//input[@name="username"]').send_keys('18518753265')

driver.find_element_by_xpath('//input[@name="password"]').send_keys('ljh12345678')
driver.find_element_by_xpath('//button[@class="Button SignFlow-submitButton Button--primary Button--blue"]').click()
time.sleep(3)
  try:
    image_element = driver.find_element_by_xpath('//img[@class="Captcha-chineseImg"]')
location = image_element.location
size = image_element.size
image_data = image_element.get_attribute('src').replace('data:image/jpg;base64,','').replace('%0A','\n')
chineseImg = base64.b64decode(image_data)
with open('chineseImg.gif','wb') as file:
    file.write(chineseImg)
#识别倒立文字验证码
z = zheye()
#获得识别结果
postions = z.Recognize('chineseImg.gif')
print('zheye',postions)
for location in postions:
    print(location)
ActionChains(driver).move_to_element_with_offset(image_element,int(location[1])/2,int(location[0])/2).click().perform()
    time.sleep(1)
time.sleep(5)
driver.find_element_by_xpath('//button[@class="Button SignFlow-submitButton Button--primary Button--blue"]').click()
except NoSuchElementException:
print('不是倒立验证码,是正常的英文字母验证码')
# 图形验证码
image_element = driver.find_element_by_xpath('//img[@class="Captcha-englishImg"]')
image_data = image_element.get_attribute('src').replace('data:image/jpg;base64,', '').replace('%0A', '\n')
image_data = base64.b64decode(image_data)
with open('englishImg.gif','wb') as file:
    file.write(image_data)
image = Image.open('englishImg.gif')
image_str = image_to_string(image)
print(image_str)
#这里采用打码平台识别就可以了
python爬虫之selenium知乎小Demo

推荐阅读更多精彩内容