天津二手房数据爬虫

#coding=UTF-8
import urllib2
from bs4 import BeautifulSoup
import sys
page=1
num=0
while page<100:
    pages = str(page)
    url = "http://tj.fangjia.com/ershoufang/--e-" + pages + "#pagelist"
    
    reload(sys)

    sys.setdefaultencoding('utf-8')
    html = urllib2.urlopen(url)
    soup = BeautifulSoup(html, "html.parser")
    house = soup.find("div", class_="house")

    for home in house.find_all("li", {"name": "__page_click_area"}):

        for title in home.find_all("span", class_="tit"):
            for text in title.stripped_strings:
                print text

        for address in home.find_all("span", class_="address"):
            print address.get_text()

        for attribute in home.find_all("span", class_="attribute"):
            print attribute.get_text()

        for price in home.find_all("span", class_="xq_aprice xq_esf_width"):
            print price.em.string + "万"
            print "--------------------"
            num=num+1
    page = page + 1
print "总计"+str(num)+"套房屋"

最后编辑于：2017.12.05 01:45:37

天津二手房数据爬虫

推荐阅读更多精彩内容