#安装
pip install bs4
#导入
from bs4 import BeautifulSoup
class Job():
def __init__(self,jobname,salary,companyname):
self.jobname = jobname
self.salary = salary
self.companyname = companyname
import urllib.request
from bs4 import BeautifulSoup
from Item import Job
#接口地址
url = 'http://www.chinahr.com/sou/?keyword=python%E5%BC%80%E5%8F%91'
response = urllib.request.urlopen(url=url)
#获取到的数据
content = response.read().decode('utf-8')
#创建bs4对象
soup = BeautifulSoup(content, 'lxml')
#bs4解析
tr_list1 = soup.select('.jobList > ul > li[class="l1"]')
tr_list2 = soup.select('.jobList > ul > li[class="l2"]')
jobs = []
for i in range(len(tr_list1)):
jobname = tr_list1[i].find_all('span')[0].get_text()
companyname = tr_list1[i].find_all('span')[2].get_text().split('\n')[1]
salary = tr_list2[i].find_all('span')[1].get_text()
job = Job(jobname,salary,companyname)
#对象内部属性字典化
jobs.append(job.__dict__)
#写入文件
with open('data_中华英才.json','w',encoding='utf-8') as fp:
fp.write(str(jobs))