爬取不同分辨率下的不同地图图片数据
from math import *
import urllib
import urllib2
import requests
import os
url_list=[]
# 生成url,如/8/0/0.png,8/0/1.png.../8/0/255.png.../8/255/255.png
def create_url(first,second):
for y in range(int(pow(2,second))):
for z in range(int(pow(2,second))):
url_list.append(str(first)+'/'+str(y)+'/'+str(z)+'.png')
print str(first)+'/'+str(y)+'/'+str(z)+'.png'
return url_list
# 生成url对应的目录
def create_dirs(url_list,base_filepath):
for x in url_list:
x = x.split('/')
file_path = base_filepath + str(x[0]) + '/' + str(x[1])+'/'
if not os.path.exists(file_path):
print file_path
os.makedirs(file_path)
base_url = 'http://a.tile.openstreetmap.org/'
# 图片下载
def download_png(url_list,filepath):
for x in url_list:
url = base_url + x
urllib.urlretrieve(url, filename='d:/test/'+x)
# data = f.read()
# with open(filepath + x, "wb+") as code:
# code.write(data)
url_list=create_url(8,8)
download_png(url_list,'d:/test/')
#create_dirs(url_list,'d:/test/')
用法
- 先修改文件路径,分辨率等参数
- 然后先注释掉download_png函数调用,先调用create_dirs函数创建目录,然后取消注释开始下载图片
采用多进程爬取,并处理网络带来的IOError
from math import *
import urllib
import urllib2
import requests
import os
from exceptions import IOError
import logging
import logging
logging.basicConfig(level=logging.WARNING,
format='%(asctime)s %(filename)s[line:%(lineno)d] %(levelname)s %(message)s',
datefmt='%a, %d %b %Y %H:%M:%S',
filename='myapp.log',
filemode='w')
url_list=[]
filepath = 'd:/test/'
base_url = 'http://a.tile.openstreetmap.org/'
def create_url(start,rate):
for y in range(start,int(pow(2,rate))):
for z in range(int(pow(2,rate))):
url_list.append(str(rate)+'/'+str(y)+'/'+str(z)+'.png')
logging.warning(str(rate)+'/'+str(y)+'/'+str(z)+'.png')
return url_list
def create_dirs(url_list,base_filepath):
for x in url_list:
x = x.split('/')
file_path = base_filepath + str(x[0]) + '/' + str(x[1])+'/'
if not os.path.exists(file_path):
logging.warning(file_path)
os.makedirs(file_path)
def download_png(url_list,filepath):
for x in url_list:
try:
url = base_url + x
print url
logging.warning(url)
urllib.urlretrieve(url, filename=filepath+x)
except IOError as serr:
logging.error(serr)
time.sleep(180)
urllib.urlretrieve(url, filename=filepath+x)
import multiprocessing
import time
def worker_1(start,rate):
url_list = create_url(start, rate)
create_dirs(url_list, filepath)
download_png(url_list, filepath)
def worker_2(start,rate):
url_list = create_url(start, rate)
create_dirs(url_list, filepath)
download_png(url_list, filepath)
def worker_3(start,rate):
url_list = create_url(start, rate)
create_dirs(url_list, filepath)
download_png(url_list, filepath)
def worker_4(start,rate):
url_list = create_url(start, rate)
create_dirs(url_list, filepath)
download_png(url_list, filepath)
def worker_5(start,rate):
url_list = create_url(start, rate)
create_dirs(url_list, filepath)
download_png(url_list, filepath)
def worker_5(start,rate):
url_list = create_url(start, rate)
create_dirs(url_list, filepath)
download_png(url_list, filepath)
if __name__ == "__main__":
p1 = multiprocessing.Process(target = worker_1, args = (630,10))
p2 = multiprocessing.Process(target = worker_2, args = (700,10))
p3 = multiprocessing.Process(target = worker_3, args = (800,10))
p4 = multiprocessing.Process(target = worker_4, args = (900, 10))
p5 = multiprocessing.Process(target = worker_5, args = (1000, 10))
p1.start()
p2.start()
p3.start()
p4.start()
p5.start()