模拟登录,常用的分为cookie方式的和webdriver方式的,本篇记录一下webdriver方式的模拟登录,测试网站为豆瓣。
导包
from selenium import webdriver
from selenium.webdriver.support.wait import WebDriverWait
关于webdriver的安装大家自行搜索一下即可
创建一个driver:
option = webdriver.ChromeOptions()
# option.add_argument('headless') #无头模式
driver = webdriver.Chrome(
executable_path='F:/ruanjian/python/chromedriver',
chrome_options=option
)
发起模拟访问
url='https://www.douban.com/'
driver.get(url)
我们选择密码登录(用户名+密码),需要切换下窗口
driver.switch_to.frame(driver.find_elements_by_tag_name("iframe")[0]) #切换frame
driver.find_element_by_xpath('/html/body/div[1]/div[1]/ul[1]/li[2]').click() #点击密码登录
找到登录框并进行输入:
driver.find_element_by_xpath('//*[@id="username"]').clear()
driver.find_element_by_id("username").send_keys('1891054xxxx')
driver.find_element_by_id("password").clear()
driver.find_element_by_id("password").send_keys('xxxx')
driver.find_element_by_class_name("btn-account").click()
或者下面代码均可:
search_button = WebDriverWait(driver, 5).until(
lambda d: d.find_element_by_xpath('//ul[@class="tab-start"]/li[2]'))
search_button.click()
driver.implicitly_wait(5)
search_content = WebDriverWait(driver, 5).until(
lambda d: d.find_element_by_xpath('//input[@id="username"]')
# EC.presence_of_element_located((By.XPATH, '//input[@id="kw"]'))
)
search_content.send_keys('189105xxxxx')
search_content = WebDriverWait(driver, 5).until(
lambda d: d.find_element_by_xpath('//input[@id="password"]')
# EC.presence_of_element_located((By.XPATH, '//input[@id="kw"]'))
)
search_content.send_keys('xxxxx')
search_button = WebDriverWait(driver, 5).until(
lambda d: d.find_element_by_class_name("btn-account")).click()
# search_button.click()
driver.implicitly_wait(5)
点击一下 ‘读书’连接
driver.find_element_by_xpath('//*[@id="db-global-nav"]/div/div[4]/ul/li[2]/a').click() #点击‘读书’连接
driver.implicitly_wait(5)#设置加载时间
切换窗口,否则还是旧窗口的缓存
driver.switch_to.window(driver.window_handles[1])#切换当前页面标签
rul2 = driver.current_url#打印当前页面URL
print(rul2)
获取一下书名
此处也是使用xpath语法(find_elements_by_xpath),但是与普通xpath语法不同的是获取标签属性的时候不能直接‘/@属性’,需要使用get_attribute("属性")。
books=driver.find_elements_by_xpath('//div[@class="carousel"]//div[@class="cover"]/a')
for book in books:
print(book.get_attribute("title"))
效果截图
完整参考代码
from selenium import webdriver
from selenium.webdriver.support.wait import WebDriverWait
option = webdriver.ChromeOptions()
# option.add_argument('headless') #无头模式
driver = webdriver.Chrome(
executable_path='F:/ruanjian/python/chromedriver',
chrome_options=option
)
url='https://www.douban.com/'
driver.get(url)
# res=driver.find_elements_by_tag_name("iframe")
# print(res)
driver.switch_to.frame(driver.find_elements_by_tag_name("iframe")[0]) #切换frame
driver.find_element_by_xpath('/html/body/div[1]/div[1]/ul[1]/li[2]').click() #点击密码登录
'''
# driver.implicitly_wait(5) #等待5s
driver.find_element_by_xpath('//*[@id="username"]').clear()
driver.find_element_by_id("username").send_keys('18910546746')
driver.find_element_by_id("password").clear()
driver.find_element_by_id("password").send_keys('pzt123aa')
driver.find_element_by_class_name("btn-account").click()
'''
search_button = WebDriverWait(driver, 5).until(
lambda d: d.find_element_by_xpath('//ul[@class="tab-start"]/li[2]'))
search_button.click()
driver.implicitly_wait(5)
search_content = WebDriverWait(driver, 5).until(
lambda d: d.find_element_by_xpath('//input[@id="username"]')
# EC.presence_of_element_located((By.XPATH, '//input[@id="kw"]'))
)
search_content.send_keys('18910546746')
search_content = WebDriverWait(driver, 5).until(
lambda d: d.find_element_by_xpath('//input[@id="password"]')
# EC.presence_of_element_located((By.XPATH, '//input[@id="kw"]'))
)
search_content.send_keys('pzt123aa')
search_button = WebDriverWait(driver, 5).until(
lambda d: d.find_element_by_class_name("btn-account")).click()
# search_button.click()
driver.implicitly_wait(5)
#登陆后,会变页面,同时url也会改变
# url_1 = driver.current_url#获取翻新页面后的url
# print(url_1)
# driver.get(url_1)#获取页面
# driver.implicitly_wait(10)#设置加载时间
driver.find_element_by_xpath('//*[@id="db-global-nav"]/div/div[4]/ul/li[2]/a').click() #点击‘读书’连接
driver.implicitly_wait(5)#设置加载时间
driver.switch_to.window(driver.window_handles[1])#切换当前页面标签(切换窗口)
rul2 = driver.current_url#打印当前页面URL
print(rul2)
# html = driver.page_source#获取新标签html
# print(html)
books=driver.find_elements_by_xpath('//div[@class="carousel"]//div[@class="cover"]/a') #获取a标签
for book in books:
print(book.get_attribute("title")) #取属性title,即书名
# 打印当前网页源码
# print(driver.page_source)
# 打印当前页面标题
# print(driver.title)
总结:通过selenium模拟方式较为简单,比使用cookie或者user+passwd方式都要容易一些。