使用opencv完成知乎的滑动验证并获取cookie值

运行环境 python3.7.6
@Date:2021-06-14
由于知乎的滑动验证码没有验证成功后的原图,需要通过opencv(pip install opencv-python)识别出缺口位置。并且知乎存在识别chromedriver的问题,我们可使用 undetected-chromedriver(pip install undetected-chromedrive)库进行绕过。但有几个问题需要我们注意:

  1. 在undetected-chromedriver的过程中会自动下载chromerdriver且为最新版本,会放在运行目录下,下次运行不会重复下载,不用我们手动去主动下载chromedriver了,但要注意浏览器与 chromerdriver 版本匹配。
  2. 启动undetected-chromedriver偶尔会失败,如果一次启动chrome失败可以多试一下。
    以下是完整的代码,一些需要注意的细节代码中都有详细的注释
# 使用opencv进行知乎的滑动验证识别并模拟登录

from selenium.webdriver.common.by import By
from selenium.webdriver.support import expected_conditions as Ec
from selenium.webdriver.support.wait import WebDriverWait
from selenium.webdriver.common.action_chains import ActionChains
import time
import requests
import os
import random
import cv2
import numpy as np
import undetected_chromedriver.v2 as uc

"""
pip 安装  undetected_chromedriver包,可绕过知乎对chromedriver识别的问题,
在undetected-chromedriver的过程中会自动下载chromerdriver且为最新版本,放在运行目录下,下次运行不会重复下载,
大家不用去主动下载chromedriver了,但要注意浏览器与 chromerdriver 版本匹配
"""


class Code(object):
    """滑动验证识别"""

    def __init__(self, slider_ele=None, background_ele=None, count=1, save_image=False):
        '''
        :param slider_ele: 滑块元素
        :param background_ele: 背景图元素
        :param count:  验证重试次数
        :param save_image:  是否保存验证中产生的图片, 默认 不保存
        '''

        self.count = count
        self.save_images = save_image
        self.slider_ele = slider_ele
        self.background_ele = background_ele

    def get_slide_locus(self, distance):
        # 计算出一个滑动轨迹防止被识别出是机器行为
        distance += 8
        v = 0
        t = 0.3
        # 保存0.3内的位移
        tracks = []  # 存放每段移动的位移
        current = 0
        mid = distance * 4 / 5
        while current <= distance:
            if current < mid:
                a = 2
            else:
                a = -3
            v0 = v
            s = v0 * t + 0.5 * a * (t ** 2)
            current += s
            tracks.append(round(s))
            v = v0 + a * t
        # 由于计算机计算的误差,导致模拟人类行为时,会出现分布移动总和大于真实距离,这里就把这个差添加到tracks中,也就是最后进行一步左移。
        # tracks.append(-(sum(tracks) - distance * 0.5))
        # tracks.append(10)
        return tracks

    def slide_verification(self, driver, slide_element, distance):
        '''

        :param driver: driver对象
        :param slide_element: 滑块元素
        :type   webelement
        :param distance: 滑动距离
        :return:+
        '''

        print('滑动距离是: ', distance)
        # 根据滑动的距离生成滑动轨迹
        locus = self.get_slide_locus(distance)
        print('生成的滑动轨迹为:{},轨迹的距离之和为{}'.format(locus, distance))
        # 按下鼠标左键
        ActionChains(driver).click_and_hold(slide_element).perform()
        time.sleep(0.5)
        # 遍历轨迹进行滑动
        for loc in locus:
            time.sleep(0.01)
            # 此处记得修改selenium的源码 selenium\webdriver\common\actions\pointer_input.py中将DEFAULT_MOVE_DURATION改为50,否则滑动很慢
            ActionChains(driver).move_by_offset(loc, random.randint(-5, 5)).perform()
            ActionChains(driver).context_click(slide_element)

        # 释放鼠标
        ActionChains(driver).release(on_element=slide_element).perform()

    def onload_save_img(self, url, filename="image.png"):
        '''
        下载图片并保存
        :param url: 图片网址
        :param filename: 图片名称
        :return:
        '''
        try:
            response = requests.get(url)
        except Exception as e:
            print('图片下载失败')
            raise e
        else:
            with open(filename, 'wb') as f:
                f.write(response.content)

    def get_element_slide_distance(self, slider_ele, background_ele, correct=0):
        '''
        根据传入滑块, 和背景的节点, 计算滑块的距离
        :param slider_ele: 滑块节点参数
        :param background_ele:  背景图的节点
        :param correct:
        :return:
        '''
        # 获取验证码的图片
        slider_url = slider_ele.get_attribute('src')
        background_url = background_ele.get_attribute('src')

        # 下载验证码链接
        slider = 'slider.jpg'
        background = 'background.jpg'
        self.onload_save_img(slider_url, slider)
        self.onload_save_img(background_url, background)

        # 进行色度图片, 转化为numpy 中的数组类型数据
        slider_pic = cv2.imread(slider, 0)
        background_pic = cv2.imread(background, 0)

        # 获取缺口数组的形状
        width, height = slider_pic.shape[::-1]

        # 将处理之后的图片另存
        slider01 = 'slider01.jpg'
        slider02 = 'slider02.jpg'
        background01 = 'background01.jpg'

        cv2.imwrite(slider01, slider_pic)
        cv2.imwrite(background01, background_pic)

        # 读取另存的滑块
        slider_pic = cv2.imread(slider01)

        # 进行色彩转化
        slider_pic = cv2.cvtColor(slider_pic, cv2.COLOR_BGR2GRAY)

        # 获取色差的绝对值
        slider_pic = abs(255 - slider_pic)
        # 保存图片
        cv2.imwrite(slider02, slider_pic)
        # 读取滑块
        slider_pic = cv2.imread(slider02)

        # 读取背景图
        background_pic = cv2.imread(background01)
        time.sleep(3)

        # 比较两张图的重叠部分
        result = cv2.matchTemplate(slider_pic, background_pic, cv2.TM_CCOEFF_NORMED)

        # 通过数组运算,获取图片缺口位置
        top, left = np.unravel_index(result.argmax(), result.shape)

        # 背景图缺口坐标
        print('当前滑块缺口位置', (left, top, left + width, top + height))

        # 判读是否需求保存识别过程中的截图文件
        if self.save_images:
            loc = [(left + correct, top + correct), (left + width - correct, top + height - correct)]
            self.image_crop(background, loc)

        else:
            # 删除临时文件
            os.remove(slider01)
            os.remove(slider02)
            os.remove(background01)
            os.remove(background)
            os.remove(slider)
            print("临时图片文件已删除")

        # 返回需要移动的位置距离
        return left

    def image_crop(self, image, loc):
        cv2.rectangle(image, loc[0], loc[1], (7, 249, 151), 2)
        cv2.imshow('Show', image)
        cv2.waitKey(0)
        cv2.destroyAllWindows()


class Login(object):
    def __init__(self, user, password, retry):
        self.browser = uc.Chrome()
        self.wait = WebDriverWait(self.browser, 20)
        self.url = 'https://www.zhihu.com/signin'
        self.sli = Code()
        self.user = user
        self.password = password
        self.retry = retry  # 重试次数

    def login(self):
        # 请求网址

        self.browser.get(self.url)
        login_element = self.browser.find_element_by_xpath(
            '//*[@id="root"]/div/main/div/div/div/div[1]/div/form/div[1]/div[2]')
        # 知乎禁用了元素点击,通过执行JS执行点击
        self.browser.execute_script("arguments[0].click();", login_element)

        # 输入账号
        username = self.wait.until(
            Ec.element_to_be_clickable((By.CSS_SELECTOR, '.SignFlow-account input'))
        )
        username.send_keys(self.user)
        # 输入密码
        password = self.wait.until(
            Ec.element_to_be_clickable((By.CSS_SELECTOR, '.SignFlow-password input'))
        )
        password.send_keys(self.password)

        # 登录框
        submit = self.wait.until(
            Ec.element_to_be_clickable((By.CSS_SELECTOR, '.Button.SignFlow-submitButton'))
        )
        submit.click()
        time.sleep(3)

        k = 1
        # while True:
        while k < self.retry:
            # 1. 获取背景大图链接
            bg_img = self.wait.until(
                Ec.presence_of_element_located((By.CSS_SELECTOR, '.yidun_bgimg .yidun_bg-img'))
            )
            # 获取滑块链接
            front_img = self.wait.until(
                Ec.presence_of_element_located((By.CSS_SELECTOR, '.yidun_bgimg .yidun_jigsaw'))
            )

            # 获取验证码滑动距离
            distance = self.sli.get_element_slide_distance(front_img, bg_img)
            print('滑动距离是', distance)

            # 2. 乘缩放比例(知乎的滑动验证没有缩放),减去去滑块前面的距离
            distance = distance - 4
            print('实际滑动距离是', distance)

            # 滑块对象
            element = self.browser.find_element_by_css_selector(
                '.yidun_slider')
            # 滑动函数
            self.sli.slide_verification(self.browser, element, distance)

            # 滑动之后的url链接
            time.sleep(4)
            # 登录框
            try:
                submit = self.wait.until(
                    Ec.element_to_be_clickable((By.CSS_SELECTOR, '.Button.SignFlow-submitButton'))
                )
                submit.click()
                time.sleep(3)
            except:
                pass

            end_url = self.browser.current_url
            print(end_url)

            if end_url == "https://www.zhihu.com/":  # 若登录成功,将跳转到此页面
                return self.get_cookies()
            else:
                time.sleep(3)
                k += 1

        return None

    def get_cookies(self):
        '''
        登录成功后 保存账号的cookies
        :return:
        '''
        cookies = self.browser.get_cookies()
        cookies_dict = {}
        for cookie in cookies:
            cookies_dict[cookie["name"]] = cookie["value"]
        print("cookie值为:", cookies_dict)
        return cookies_dict

    def __del__(self):
        self.browser.close()
        print('界面关闭')


if __name__ == "__main__":
    login = Login("********", "********", 5)
    login.login()

以上代码,只要我们配置好相关的运行环境及依赖库就可以直接运行,完成模拟登录并获取到cookie值。

©著作权归作者所有,转载或内容合作请联系作者
【社区内容提示】社区部分内容疑似由AI辅助生成,浏览时请结合常识与多方信息审慎甄别。
平台声明:文章内容(如有图片或视频亦包括在内)由作者上传并发布,文章内容仅代表作者本人观点,简书系信息发布平台,仅提供信息存储服务。

相关阅读更多精彩内容

友情链接更多精彩内容