import requests
import json
from lxmlimport etree
import xlwt
info_list=[]
headers = {
'User-Agent':'Mozilla/5.0 (Windows NT 10.0; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/55.0.2883.87 Safari/537.36 Query String Parameters view source view URL encoded',
'Cookie':'__utmt=1; __utma=212754963.192174688.1557973843.1559182595.1560477585.3; __utmb=212754963.5.10.1560477585; __utmc=212754963; __utmz=212754963.1557973843.1.1.utmcsr=(direct)|utmccn=(direct)|utmcmd=(none); PageState=%7B%22dictionary%22%3A%22uk%22%2C%22lookup%22%3A%22%22%2C%22advboxopen%22%3Afalse%2C%22hideOffensiveWords%22%3Afalse%2C%22showMore%22%3Afalse%2C%22c%22%3A%7B%22301%22%3Atrue%7D%2C%22pageSize%22%3A%2240%22%2C%22q%22%3A%22%22%7D',
'Host':'vocabulary.englishprofile.org',
'Referer':'http://vocabulary.englishprofile.org/dictionary/search/uk/?c=301&c=301&pageSize=40&q=&wl=301&p=3',
'Authorization':'Basic ZW5nbGlzaHByb2ZpbGU6dm9jYWJ1bGFyeQ=='
}
url='http://vocabulary.englishprofile.org/dictionary/search/uk/?c=301&c=301&c=301&pageSize=40&q=&wl=301&p=4'
def get_info(url):
res = requests.get(url, headers=headers)
html = etree.HTML(res.text)
vocabluary_infos = html.xpath('//div[@class="search-block"]//li')
for infoin vocabluary_infos:
# // *[ @ id = "search-results"] / div[3] / ul / li[1] / a / span
a = info.xpath('a')
base = info.xpath('a/span/span[@class="base"]')
pos = info.xpath('a/span/span[@class="pos"]')
gw = info.xpath('a/span/span[@class="gw"]')
info_list = [base[0].text,pos[0].text,gw[0].text,base[0].text]
#
#
#
if __name__ =='__main__':
book = xlwt.Workbook(encoding='utf-8')
sheet = book.add_sheet('Sheet1')
header = ['vocabulary','pos','gw']
for tin range(len(header)):
sheet.write(0, t, header[t])
urls = ['http://vocabulary.englishprofile.org/dictionary/search/uk/?c=301&c=301&c=301&pageSize=40&q=&wl=301&p={}'.format(str(i))for iin range(1,2)]
for urlin urls:
get_info(url)
i =1
for listin info_list:
j =0
for datain list:
sheet.write(i, j, data)
j +=1
i +=1
book.save('E:/python/第五讲/test.xlsx')