一:必须要懒
import requests
from bs4 import BeautifulSoup
import urllib
ip = input("请输入代理ip(无需要请输入127.0.0.1):")
proxies = {
'https': ip,
}
headers = {
'User-Agent': 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_13_5) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/67.0.3396.99 Safari/537.36',
'Connection': 'keep-alive',
'accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,image/apng,*/*;q=0.8'}
content = input('请输入搜索内容:')
initial_content = content
content_code = urllib.request.quote(content) # 解决中文编码的问题
var = 1
while var > 0:
print('输入 1 即退出')
page = int(input('请输入页数:')) - 1
pn = page * 10
url = 'https://www.baidu.com/s?wd=' + content_code + '&pn=' + str(pn)
r=requests.get(url,headers=headers,proxies=proxies)
soup = BeautifulSoup(r.text,features="html.parser")
#页数,需要改进
target = soup.find(id="page")
ye_list = target.find_all('a')
for y in ye_list:
s = y.find_all(class_='pc')
print(s)
url_list=soup.select('div.result > h3 > a')
for i in url_list:
print(i.getText())
print(i.attrs.get("href"))
var = page
else:
print("Good bye!")
小编QQ:944420535
Github
页数需要改进