from selenium import webdriver
import time
from PIL import Image
from io import BytesIO
import base64
from zheye import zheye
from pytesseract import image_to_string
from selenium.webdriver import ActionChains
from selenium.common.exceptions import NoSuchElementException
driver = webdriver.Chrome(executable_path='/home/ljh/桌面/driver/chromedriver')
# driver = webdriver.Firefox(executable_path='/home/ljh/桌面/driver/geckodriver')
driver.get('https://www.zhihu.com/signup?next=%2F')
driver.find_element_by_xpath('//div[@class="SignContainer- switch"]/span').click()
time.sleep(3)
driver.find_element_by_xpath('//input[@name="username"]').send_keys('18518753265')
driver.find_element_by_xpath('//input[@name="password"]').send_keys('ljh12345678')
driver.find_element_by_xpath('//button[@class="Button SignFlow-submitButton Button--primary Button--blue"]').click()
time.sleep(3)
try:
image_element = driver.find_element_by_xpath('//img[@class="Captcha-chineseImg"]')
location = image_element.location
size = image_element.size
image_data = image_element.get_attribute('src').replace('data:image/jpg;base64,','').replace('%0A','\n')
chineseImg = base64.b64decode(image_data)
with open('chineseImg.gif','wb') as file:
file.write(chineseImg)
#识别倒立文字验证码
z = zheye()
#获得识别结果
postions = z.Recognize('chineseImg.gif')
print('zheye',postions)
for location in postions:
print(location)
ActionChains(driver).move_to_element_with_offset(image_element,int(location[1])/2,int(location[0])/2).click().perform()
time.sleep(1)
time.sleep(5)
driver.find_element_by_xpath('//button[@class="Button SignFlow-submitButton Button--primary Button--blue"]').click()
except NoSuchElementException:
print('不是倒立验证码,是正常的英文字母验证码')
# 图形验证码
image_element = driver.find_element_by_xpath('//img[@class="Captcha-englishImg"]')
image_data = image_element.get_attribute('src').replace('data:image/jpg;base64,', '').replace('%0A', '\n')
image_data = base64.b64decode(image_data)
with open('englishImg.gif','wb') as file:
file.write(image_data)
image = Image.open('englishImg.gif')
image_str = image_to_string(image)
print(image_str)
#这里采用打码平台识别就可以了
python爬虫之selenium知乎小Demo
©著作权归作者所有,转载或内容合作请联系作者
平台声明:文章内容(如有图片或视频亦包括在内)由作者上传并发布,文章内容仅代表作者本人观点,简书系信息发布平台,仅提供信息存储服务。
平台声明:文章内容(如有图片或视频亦包括在内)由作者上传并发布,文章内容仅代表作者本人观点,简书系信息发布平台,仅提供信息存储服务。
推荐阅读更多精彩内容
- 本文参加#致我们单纯的小美好#活动,本人承诺,文章内容为原创,且未在其他平台发表过。 他是我没有说出来的喜欢,是我...