使用BeautifulSoup解析HTML,保存为excel。使用xlwt模块,想到了两种保存方式:一种是将全国的地区和区号放在一个sheet里面,另一种是把每个地区的单独放在一个sheet里。而且保存的excel单元格使用的全部是默认样式,xlwt模块支持给单元格设置样式,设置字体等操作,这次只是简单获取数据,如果下次还有兴趣,再继续完善。
python
# -*- coding:utf-8 -*-
import xlwt, urllib
from bs4 import BeautifulSoup
w = xlwt.Workbook(encoding = 'utf-8')
sheet = w.add_sheet('sheet 1')
sheet.write(0, 0, 'num')
sheet.write(0, 1, 'name')
sheet.write(0, 2, 'code')
url = 'http://dianhua.mapbar.com/'
index = urllib.urlopen(url)
soup = BeautifulSoup(index,"html.parser")
result = soup.find_all('div', {'class':'phonenum clr'})
n = 1
for each_div in result:
city_title = each_div.find('h2').get_text()
city_content = each_div.find_all('dl')
sheet.write(n, 0, city_title)
n = n + 1
for each_content in city_content:
city_name = each_content.find('dt').get_text()
city_code = each_content.find('dd').get_text()
sheet.write(n, 0, n - 1)
sheet.write(n, 1, city_name)
sheet.write(n, 2, city_code)
n = n + 1
w.save('D:/img/mimi.xls')
预览结果
python
# -*- coding:utf-8 -*-
import xlwt, urllib
from bs4 import BeautifulSoup
w = xlwt.Workbook(encoding = 'utf-8')
url = 'http://dianhua.mapbar.com/'
index = urllib.urlopen(url)
soup = BeautifulSoup(index,"html.parser")
result = soup.find_all('div', {'class':'phonenum clr'})
for each_div in result:
n = 1
city_title = each_div.find('h2').get_text()
city_content = each_div.find_all('dl')
sheet = w.add_sheet(city_title)
sheet.write(0, 0, 'num')
sheet.write(0, 1, 'name')
sheet.write(0, 2, 'code')
for each_content in city_content:
city_name = each_content.find('dt').get_text()
city_code = each_content.find('dd').get_text()
sheet.write(n, 0, n)
sheet.write(n, 1, city_name)
sheet.write(n, 2, city_code)
n = n + 1
w.save('D:/img/mini.xls')
预览结果