【Python爬虫】Beautiful Soup爬号码段

#coding:utf-8
import requests,os,csv
from bs4 import BeautifulSoup

url='http://www.51hao.cc'
req=requests.get(url)
req.encoding="gb2312"
soup=BeautifulSoup(req.text,'lxml')
fkts=soup.find_all("div",class_="fkt")
for fkt in fkts:
    fkbj=fkt.find("div", class_="fkbj")
    if fkbj:
        city=fkbj.a.text
        #print(city)
        fklk = fkt.find("div", class_="fklk")
        shis=fklk.find_all("a")
        for shi in shis:
            shi_ming=shi.text
            shi_url=shi["href"]
            #print(city,shi_ming,shi_url)

            req2=requests.get(shi_url)
            req2.encoding = 'gb2312'
            soup2 = BeautifulSoup(req2.text, 'lxml')
            nums=soup2.find_all("div",class_="num_bg")
            ul=soup2.find_all("ul")
            #print(len(nums))
            #print(len(ul))
            for num in range(0,len(nums)):
                haoduans=nums[num]
                haoduanuls=ul[num]
                haoduan=haoduans.find("span").text
                haoduanul=haoduans.text
                #print(haoduan,haoduanul)
                li_list=haoduanuls.find_all("a")
                for li in li_list:
                    haoma=li.text
                    #print(haoma)
                    #print(city,shi_ming,shi_url,haoduan,haoduanul,haoma)

                    #写入CSV
                    base_dir=os.path.abspath(__file__)
                    #print(base_dir)
                    parent_dir=os.path.dirname(base_dir)
                    #print(parent_dir)
                    menu_dir=os.path.join(parent_dir,"号段查询")
                    if os.path.isdir(menu_dir):
                        pass
                    else:
                        os.mkdir(menu_dir)
                        os.chdir(menu_dir)
                        file_name='haoduan.csv'
                        file=os.path.join(menu_dir,file_name)
                        with open(file,"a",encoding="utf8") as fp:
                            write=csv.writer(fp)
                            write.writerow(city,shi_ming,shi_url,haoduan,haoduanul,haoma)
最后编辑于
©著作权归作者所有,转载或内容合作请联系作者
平台声明:文章内容(如有图片或视频亦包括在内)由作者上传并发布,文章内容仅代表作者本人观点,简书系信息发布平台,仅提供信息存储服务。

推荐阅读更多精彩内容