https://www.cnblogs.com/woaic/p/6024975.html
http://da7a389f.ngrok.io
# coding=utf-8
import sys
import json
reload(sys)
sys.setdefaultencoding("utf-8")
import requests
import re
from bs4 import BeautifulSoup
url = 'http://ggzy.njzwfw.gov.cn/njggzy/infodetail/?infoid=e46f641c-8651-46e2-b068-d27c689df435&categoryNum=001001001001'
wb_data = requests.get(url)
soup = BeautifulSoup(wb_data.text, 'lxml')
soup.body.div.clear()
times = soup.find_all(text=re.compile(u'访问次数'))
for time in times:
time.parent.clear()
items = soup.find_all(text=re.compile(u'南京市公共资源交易中心网'))
for item in items:
item.parent.clear()
print soup.body