requests在爬取网页时候时候中文显示乱码
res.encoding #获取res的编码格式
res.apparent_encoding #获取网页正确的编码格式
源码:
import requests
from bs4 import BeautifulSoup
l=[]
name=[]
com=[]
salary=[]
url='https://search.51job.com/list/000000,000000,0000,00,9,99,%2520,2,1.html?lang=c&stype=&postchannel=0000&workyear=99&cotype=99°reefrom=99&jobterm=99&companysize=99&providesalary=99&lonlat=0%2C0&radius=-1&ord_field=0&confirmdate=9&fromType=&dibiaoid=0&address=&line=&specialarea=00&from=&welfare='
res=requests.get(url)
res.encoding = res.apparent_encoding
soup = BeautifulSoup(res.text, 'html.parser')
divs1=soup.select("p.t1")
divs2=soup.select("span.t2")
divs3=soup.select("span.t4")
#print(divs)
for div in divs1[:]:
name1=div.select("span>a")[0].text.strip()
name.append(name1)
for div in divs2[1:]:
com1=div.select("a")[0].text.strip()
com.append(com1)
for div in divs3[1:]:
salary1=div.text.strip()
salary.append(salary1)
for i in range(len(name)):
l.append([name[i],com[i],salary[i]])
for i in l:
print(i)