启动webdriver
try+catch+except+finally的用法顺序,可以在error抛出时候继续运行脚本。
支持多种选择器,推荐xpath或css选择器
从xpath里传递变量的方法:http://stackoverflow.com/questions/32874539/using-a-variable-in-xpath-in-python-selenium
不完善的爬cnki期刊数据的脚本:
主要作用是从xls文件中读取期刊名字搜索,点击后进入期刊具体信息页面
利用xpath寻找2017年所到刊数,返回写入到新的xls文件中去。
<pre>
coding=utf-8
import xlrd
import xlwt
import sys
import time
from selenium import webdriver
from selenium.webdriver.common.keys import Keys
from selenium.webdriver.common.by import By
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC
def DragInfo(keyword):
elem=driver.find_element_by_id("txt_1_value1")
elem.send_keys(keyword)
elem.send_keys(Keys.RETURN)
try:
wait=WebDriverWait(driver,20)
wait.until(EC.presence_of_element_located((By.XPATH,'//[@id="searchResult"]/dl/dd[1]/div[2]/h1/a')))
result=driver.find_element_by_xpath('//[@id="searchResult"]/dl/dd[1]/div[2]/h1/a')
# if driver.find_element_by_xpath('//*[@id="searchResult"]/div[1]/span[1]/em').text=='0':
# print 0
# raise e
result.click()
#####获取当前页面句柄
default_window = driver.current_window_handle
#####获取所有页面句柄
handles = driver.window_handles
#####如果new_window句柄不是当前句柄,用switch_to_window方法切换
for handle in handles:
if handle != default_window:
driver.switch_to_window(handle)
wait=WebDriverWait(driver,20)
wait.until(EC.presence_of_element_located((By.XPATH,'//*[@id="2017_Year_Issue"]/dd')))
result2017=driver.find_elements_by_xpath('//a[contains(@id,"2017")]')
#version=driver.find_element_by_xpath("//p[text()='出版周期']/span").text
count=len(result2017)
driver.close()
#driver.switch_to_window(handles[0])
return count
except Exception,e:
print Exception,":",e
finally:
driver.switch_to_window(handles[0])
elem.clear()
def SolveXls():
menu=""
count=0
version=""
# info=[count,version]
data=xlrd.open_workbook('search_list.xls')
new_data=xlwt.Workbook(encoding='utf-8')
new_table=new_data.add_sheet('A Test Sheet')
table=data.sheets()[0]
nrows=table.nrows
ncols=table.ncols
search_list=table.col_values(0)
for i in range(nrows):
menu= table.row_values(i)[0]
print menu
count=DragInfo(menu)
new_table.write(i,0,menu)
new_table.write(i,1,count)
print count
new_data.save('result_test.xls')
if name == 'main':
driver = webdriver.Chrome()
driver.get("http://navi.cnki.net/")
elem=driver.find_element_by_id("txt_1_value1")
# reload(sys)
# sys.setdefaultencoding('utf8')
SolveXls()
</pre>