一丶介绍
获取某个api的数据,定制化格式提取;
http://restapi.amap.com/v3/place/text?&keywords=&types=120201&city=hangzhou&output=json&offset=50&page=1&key=4254ccdb6f119ac71f046022bbe73bfc&extensions=all
比如这个链接,我想从page 1 - >10抓取数据;
提取里面JSON的name,tel,address等数据进行定制化抓取;
二丶
代码
#!/usr/bin/python
#-*- coding: utf-8 -*-
#encoding=utf-8
import json
import requests
import os
import urllib2,urllib
import sys
reload(sys)
sys.setdefaultencoding('utf-8')
def wjxRead(urlStr):
response = urllib2.urlopen(urlStr).read()
s = json.loads(response)
poisArray = s['pois']
content = ""
for index,value in enumerate(poisArray):
if isinstance(value["tel"],unicode):
tmpContent = "------>\nname : " + value["name"] + "\n" + "address : " + value["address"] + "\n" + "tel : " + value["tel"] + "\n"
content = content + tmpContent
else:
tmpContent = "------>\nname : " + value["name"] + "\n" + "address : " + value["address"] + "\n" + "tel : " + \n
content = content + tmpContent
return content
def saveFile(content):
file = open("wjx.txt","wb")
file.write(content)
file.close()
wjxContent = ""
for x in xrange(1,10):
urlStr = 'http://restapi.amap.com/v3/place/text?&keywords=&types=120201&city=hangzhou&output=json&offset=50&page=' + str(x) +'&key=4254ccdb6f119ac71f046022bbe73bfc&extensions=all'
wjxContent = wjxContent + wjxRead(urlStr)
saveFile(wjxContent)
三丶总结
没有考虑并发