运行环境 python3.7.6
@Date:2021-06-14
由于知乎的滑动验证码没有验证成功后的原图,需要通过opencv(pip install opencv-python)识别出缺口位置。并且知乎存在识别chromedriver的问题,我们可使用 undetected-chromedriver(pip install undetected-chromedrive)库进行绕过。但有几个问题需要我们注意:
- 在undetected-chromedriver的过程中会自动下载chromerdriver且为最新版本,会放在运行目录下,下次运行不会重复下载,不用我们手动去主动下载chromedriver了,但要注意浏览器与 chromerdriver 版本匹配。
- 启动undetected-chromedriver偶尔会失败,如果一次启动chrome失败可以多试一下。
以下是完整的代码,一些需要注意的细节代码中都有详细的注释
# 使用opencv进行知乎的滑动验证识别并模拟登录
from selenium.webdriver.common.by import By
from selenium.webdriver.support import expected_conditions as Ec
from selenium.webdriver.support.wait import WebDriverWait
from selenium.webdriver.common.action_chains import ActionChains
import time
import requests
import os
import random
import cv2
import numpy as np
import undetected_chromedriver.v2 as uc
"""
pip 安装 undetected_chromedriver包,可绕过知乎对chromedriver识别的问题,
在undetected-chromedriver的过程中会自动下载chromerdriver且为最新版本,放在运行目录下,下次运行不会重复下载,
大家不用去主动下载chromedriver了,但要注意浏览器与 chromerdriver 版本匹配
"""
class Code(object):
"""滑动验证识别"""
def __init__(self, slider_ele=None, background_ele=None, count=1, save_image=False):
'''
:param slider_ele: 滑块元素
:param background_ele: 背景图元素
:param count: 验证重试次数
:param save_image: 是否保存验证中产生的图片, 默认 不保存
'''
self.count = count
self.save_images = save_image
self.slider_ele = slider_ele
self.background_ele = background_ele
def get_slide_locus(self, distance):
# 计算出一个滑动轨迹防止被识别出是机器行为
distance += 8
v = 0
t = 0.3
# 保存0.3内的位移
tracks = [] # 存放每段移动的位移
current = 0
mid = distance * 4 / 5
while current <= distance:
if current < mid:
a = 2
else:
a = -3
v0 = v
s = v0 * t + 0.5 * a * (t ** 2)
current += s
tracks.append(round(s))
v = v0 + a * t
# 由于计算机计算的误差,导致模拟人类行为时,会出现分布移动总和大于真实距离,这里就把这个差添加到tracks中,也就是最后进行一步左移。
# tracks.append(-(sum(tracks) - distance * 0.5))
# tracks.append(10)
return tracks
def slide_verification(self, driver, slide_element, distance):
'''
:param driver: driver对象
:param slide_element: 滑块元素
:type webelement
:param distance: 滑动距离
:return:+
'''
print('滑动距离是: ', distance)
# 根据滑动的距离生成滑动轨迹
locus = self.get_slide_locus(distance)
print('生成的滑动轨迹为:{},轨迹的距离之和为{}'.format(locus, distance))
# 按下鼠标左键
ActionChains(driver).click_and_hold(slide_element).perform()
time.sleep(0.5)
# 遍历轨迹进行滑动
for loc in locus:
time.sleep(0.01)
# 此处记得修改selenium的源码 selenium\webdriver\common\actions\pointer_input.py中将DEFAULT_MOVE_DURATION改为50,否则滑动很慢
ActionChains(driver).move_by_offset(loc, random.randint(-5, 5)).perform()
ActionChains(driver).context_click(slide_element)
# 释放鼠标
ActionChains(driver).release(on_element=slide_element).perform()
def onload_save_img(self, url, filename="image.png"):
'''
下载图片并保存
:param url: 图片网址
:param filename: 图片名称
:return:
'''
try:
response = requests.get(url)
except Exception as e:
print('图片下载失败')
raise e
else:
with open(filename, 'wb') as f:
f.write(response.content)
def get_element_slide_distance(self, slider_ele, background_ele, correct=0):
'''
根据传入滑块, 和背景的节点, 计算滑块的距离
:param slider_ele: 滑块节点参数
:param background_ele: 背景图的节点
:param correct:
:return:
'''
# 获取验证码的图片
slider_url = slider_ele.get_attribute('src')
background_url = background_ele.get_attribute('src')
# 下载验证码链接
slider = 'slider.jpg'
background = 'background.jpg'
self.onload_save_img(slider_url, slider)
self.onload_save_img(background_url, background)
# 进行色度图片, 转化为numpy 中的数组类型数据
slider_pic = cv2.imread(slider, 0)
background_pic = cv2.imread(background, 0)
# 获取缺口数组的形状
width, height = slider_pic.shape[::-1]
# 将处理之后的图片另存
slider01 = 'slider01.jpg'
slider02 = 'slider02.jpg'
background01 = 'background01.jpg'
cv2.imwrite(slider01, slider_pic)
cv2.imwrite(background01, background_pic)
# 读取另存的滑块
slider_pic = cv2.imread(slider01)
# 进行色彩转化
slider_pic = cv2.cvtColor(slider_pic, cv2.COLOR_BGR2GRAY)
# 获取色差的绝对值
slider_pic = abs(255 - slider_pic)
# 保存图片
cv2.imwrite(slider02, slider_pic)
# 读取滑块
slider_pic = cv2.imread(slider02)
# 读取背景图
background_pic = cv2.imread(background01)
time.sleep(3)
# 比较两张图的重叠部分
result = cv2.matchTemplate(slider_pic, background_pic, cv2.TM_CCOEFF_NORMED)
# 通过数组运算,获取图片缺口位置
top, left = np.unravel_index(result.argmax(), result.shape)
# 背景图缺口坐标
print('当前滑块缺口位置', (left, top, left + width, top + height))
# 判读是否需求保存识别过程中的截图文件
if self.save_images:
loc = [(left + correct, top + correct), (left + width - correct, top + height - correct)]
self.image_crop(background, loc)
else:
# 删除临时文件
os.remove(slider01)
os.remove(slider02)
os.remove(background01)
os.remove(background)
os.remove(slider)
print("临时图片文件已删除")
# 返回需要移动的位置距离
return left
def image_crop(self, image, loc):
cv2.rectangle(image, loc[0], loc[1], (7, 249, 151), 2)
cv2.imshow('Show', image)
cv2.waitKey(0)
cv2.destroyAllWindows()
class Login(object):
def __init__(self, user, password, retry):
self.browser = uc.Chrome()
self.wait = WebDriverWait(self.browser, 20)
self.url = 'https://www.zhihu.com/signin'
self.sli = Code()
self.user = user
self.password = password
self.retry = retry # 重试次数
def login(self):
# 请求网址
self.browser.get(self.url)
login_element = self.browser.find_element_by_xpath(
'//*[@id="root"]/div/main/div/div/div/div[1]/div/form/div[1]/div[2]')
# 知乎禁用了元素点击,通过执行JS执行点击
self.browser.execute_script("arguments[0].click();", login_element)
# 输入账号
username = self.wait.until(
Ec.element_to_be_clickable((By.CSS_SELECTOR, '.SignFlow-account input'))
)
username.send_keys(self.user)
# 输入密码
password = self.wait.until(
Ec.element_to_be_clickable((By.CSS_SELECTOR, '.SignFlow-password input'))
)
password.send_keys(self.password)
# 登录框
submit = self.wait.until(
Ec.element_to_be_clickable((By.CSS_SELECTOR, '.Button.SignFlow-submitButton'))
)
submit.click()
time.sleep(3)
k = 1
# while True:
while k < self.retry:
# 1. 获取背景大图链接
bg_img = self.wait.until(
Ec.presence_of_element_located((By.CSS_SELECTOR, '.yidun_bgimg .yidun_bg-img'))
)
# 获取滑块链接
front_img = self.wait.until(
Ec.presence_of_element_located((By.CSS_SELECTOR, '.yidun_bgimg .yidun_jigsaw'))
)
# 获取验证码滑动距离
distance = self.sli.get_element_slide_distance(front_img, bg_img)
print('滑动距离是', distance)
# 2. 乘缩放比例(知乎的滑动验证没有缩放),减去去滑块前面的距离
distance = distance - 4
print('实际滑动距离是', distance)
# 滑块对象
element = self.browser.find_element_by_css_selector(
'.yidun_slider')
# 滑动函数
self.sli.slide_verification(self.browser, element, distance)
# 滑动之后的url链接
time.sleep(4)
# 登录框
try:
submit = self.wait.until(
Ec.element_to_be_clickable((By.CSS_SELECTOR, '.Button.SignFlow-submitButton'))
)
submit.click()
time.sleep(3)
except:
pass
end_url = self.browser.current_url
print(end_url)
if end_url == "https://www.zhihu.com/": # 若登录成功,将跳转到此页面
return self.get_cookies()
else:
time.sleep(3)
k += 1
return None
def get_cookies(self):
'''
登录成功后 保存账号的cookies
:return:
'''
cookies = self.browser.get_cookies()
cookies_dict = {}
for cookie in cookies:
cookies_dict[cookie["name"]] = cookie["value"]
print("cookie值为:", cookies_dict)
return cookies_dict
def __del__(self):
self.browser.close()
print('界面关闭')
if __name__ == "__main__":
login = Login("********", "********", 5)
login.login()
以上代码,只要我们配置好相关的运行环境及依赖库就可以直接运行,完成模拟登录并获取到cookie值。