selenium在应对需要登录验证的网页爬取上是个很好的工具,但是在登录过后拿到cookie,就可以直接使用cookie访问
以下是一个cookie登录的案例;目标网站:https://aso100.com/
导入相关库
from selenium import webdriver
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.common.by import By
from selenium.webdriver.support import expected_conditions as EC
实现登录
userName = 'magic111'
passWord = '121314'
driver = webdriver.Chrome()
driver.get('https://aso100.com/account/signin')
# 等待20秒直到访问成功
WebDriverWait(driver, 20).until(EC.presence_of_element_located((By.XPATH, '//*[@id="username"]')))
user_name = driver.find_element_by_xpath('//*[@id="username"]')
user_name.send_keys(userName)
pass_word = driver.find_element_by_xpath('//*[@id="password"]')
pass_word.send_keys(passWord)
submit = driver.find_element_by_xpath('//*[@id="submit"]')
submit.click()
# 等待20秒直到访问成功
WebDriverWait(driver, 20).until(EC.presence_of_element_located((By.XPATH, '//*[@id="search-kw"]')))
# 获取cookies
cookie_list = driver.get_cookies()
print(cookie_list)
driver.close()
driver.quit()
1、使用selenium设置cookie登录
driver = webdriver.Chrome()
# 要先访问一次这个域名
driver.get('https://aso100.com')
for item in cookie_list: driver.add_cookie({
'domain': '.aso100.com',
'name': item['name'],
'value': item['value'],
'path': '/',
'expires': None
})
driver.get('https://aso100.com/account/setting/type/dataCenter')
input('是否有效')
driver.close()
driver.quit()
2、使用requests设置cookie登录
import requests
cookies = ";".join([item["name"] + "=" + item["value"] + "" for item in cookie_list])
print(cookies)
session = requests.Session()
# cookie要放到headers里
headers = {
'Cookie': cookies
}
html = session.get(url='https://aso100.com/account/setting/type/dataCenter', headers=headers).content.decode()
print(html)