2021-05-30 周末作业

"""
Time:2021/5/30 10:22
Author:Second
"""
import csv
import requests
import json
import re


def get_net_data(url):
    headers = {
        'User-Agent': 'Mozilla / 5.0 (Windows NT 10.0; Win64; x64) AppleWebKit / 537.36(KHTML, like Gecko) Chrome / 88.0.4324.104 Safari/537.36'
    }
    response = requests.get(url, headers=headers)
    if response.status_code == 200:
        return response.text
    print(response.status_code)


def analysis_data(html: str):
    result = re.findall(r'__SEARCH_RESULT__\s*=\s*(\{.*\})\s*</script>', html)
    data = json.loads(result[0])
    all_data = []
    for item in data["engine_search_result"]:
        job_href = item.get('job_href', '暂无')
        job_name = item.get('job_name', '暂无')
        company_href = item.get('company_href', '暂无')
        company_name = item.get('company_name', '暂无')
        providesalary = item.get('providesalary_text', '暂无')
        workarea = item.get('workarea_text', '暂无')
        issuedate = item.get('issuedate', '暂无')
        companytype = item.get('companytype_text', '暂无')
        attribute = ' | '.join(item.get('attribute_text', '暂无'))
        companyind = item.get('companyind_text', '暂无')
        all_data.append([job_name, job_href, providesalary, company_name, workarea, company_href, issuedate, companytype, attribute, companyind])
    print(all_data)
    return all_data


def save_data(data):
    with open('51job数据分析.csv', 'a', newline='', encoding='utf-8_sig') as f:
        writer = csv.writer(f)
        writer.writerows(data)


with open('51job数据分析.csv', 'w', newline='', encoding='utf-8_sig') as f:
    writer = csv.writer(f)
    writer.writerow(['招聘职位', '招聘详情', '待遇', '招聘单位', '单位地址', '单位详情', '上传时间', '单位类型', '招聘要求', '单位主营'])
for x in range(1, 2001):
    html = get_net_data(f'https://search.51job.com/list/000000,000000,0000,00,9,99,%E6%95%B0%E6%8D%AE%E5%88%86%E6%9E%90,2,{x}.html??')
    data = analysis_data(html)
    save_data(data)
最后编辑于
©著作权归作者所有,转载或内容合作请联系作者
【社区内容提示】社区部分内容疑似由AI辅助生成,浏览时请结合常识与多方信息审慎甄别。
平台声明:文章内容(如有图片或视频亦包括在内)由作者上传并发布,文章内容仅代表作者本人观点,简书系信息发布平台,仅提供信息存储服务。

相关阅读更多精彩内容

友情链接更多精彩内容