import codecs
import json
import osm2geojson
import geopandas
import xml.etree.ElementTreeas xee
from lxmlimport etree
import time
import requests
import geopandasas gpd
from shapely.geometryimport Point, LineString
import os
import pandasas pd
from tqdmimport tqdm
def osm_geojson(file_path,geofile):
with codecs.open(file_path,'r',encoding='utf-8')as data:
xml= data.read()
geojson= osm2geojson.xml2geojson(xml,filter_used_refs=False)
with open(geofile,'w')as fpath:
json.dump(geojson,fpath)
data2 = geopandas.read_file(geojson)
print("123131")
def osmProcess(file_path):
# 读取文件
domTree= xee.parse(file_path)
# 获得所有节点的内容
root= domTree.getroot()
# 获得所选区域的经纬度范围
# bound = root.findall("bounds")
# maxLat = float(bound[0].get("maxlat"))
# maxLon = float(bound[0].get("maxlon"))
# minLat = float(bound[0].get("minlat"))
# minLon = float(bound[0].get("minlon"))
# # 输出所选区域的经纬度范围
# print('Bounds:' + '\n' + 'minLat: ' + str(minLat) + '\n' + 'maxLat: ' + str(maxLat) + '\n' +
# 'minLon: ' + str(minLon) + '\n' + 'maxLon: ' + str(maxLon) + '\n' + 'Nodes: ')
#
# # 存储不在所选区域内的node ID
# IDlist = []
# 逐个检查node
IDlist={}
nodes= root.findall("node")
for nodein nodes:
# 当前节点的经纬度和ID
Lat= float(node.get("lat"))
Lon= float(node.get("lon"))
ID= node.get("id")
IDlist.update({str(ID):{"ID":ID,"Lat":Lat,"Lon":Lon,"geo":Point(Lat,Lon)}})
# 输出node信息
# print('nodeID:' + ID + ', Lat:' + str(Lat) + ', Lon:' + str(Lon) + ', Bound: ', end='')
# 判断
# if Lat < minLat or Lat > maxLat or Lon < minLon or Lon > maxLon:
# root.remove(node)
# IDlist.append(ID)
# # 输出bound比对情况,若Lat和Lon均不符合则只输出Lat
# if Lat < minLat:
# print('Lat < min')
# elif Lat > maxLat:
# print('Lat > max')
# elif Lon < minLon:
# print('Lon < min')
# elif Lon > maxLon:
# print('Lon > max')
# else:
# print('Satisfied')
root.remove(node)
# IDlist.append(ID)
# 输出bound比对情况,若Lat和Lon均不符合则只输出Lat
# if Lat < minLat:
# print('Lat < min')
# elif Lat > maxLat:
# print('Lat > max')
# elif Lon < minLon:
# print('Lon < min')
# elif Lon > maxLon:
# print('Lon > max')
# 删除不在所选区域内的node在后续道路的参照行
ways = root.findall("way")
# for ID in IDlist:
# for way in ways:
# refnodes = way.findall("nd")
# for node in refnodes:
# if node.get("ref") == ID:
# way.remove(node)
# 输出文件
domTree.write("out.osm",encoding="utf8")
def readosm(osm_path):
raw_osm_json=""
with open(osm_path,'r',encoding='utf-8')as a:
raw_osm_json= a.read()
# raw_osm_json = eval(raw)
# raw_osm_json = eval(raw)
# print(raw_osm_json)
# 抽取点图层并保存
points_contents= []
for elementin tqdm(raw_osm_json['elements'],desc='[{}]抽取点数据'.format("上海市")):
if element['type']== 'node':
points_contents.append((str(element['id']), element['lon'], element['lat']))
points= pd.DataFrame(points_contents,
columns=['id','lng','lat'])
points['geometry']= points.apply(lambda row: Point([row['lng'], row['lat']]),axis=1)
points= gpd.GeoDataFrame(points,crs='EPSG:4326')
# 构造{id -> 点数据}字典
id2points= {key: valuefor key, valuein zip(points['id'],
points['geometry'])}
# 保存线图层
ways_contents= []
for elementin tqdm(raw_osm_json['elements'],desc='[{}]抽取线数据'.format("上海市")):
if element['type']== 'way':
if element['nodes'].__len__()>= 2:
ways_contents.append((str(element['id']),LineString([id2points[str(_)]
for _in element['nodes']])))
ways = gpd.GeoDataFrame(pd.DataFrame(ways_contents,columns=['id','geometry']),
crs='EPSG:4326')
print("4333333333")
def fast_iter(context,*args,**kwargs):
"""
读取xml数据,并释放空间 context: etree.iterparse生成的迭代器"""
# 打开文件
with open('data/result.txt','a')as f:
"""
event:事件 elem:元素"""
# 处理xml数据
for event, elemin context:
list= []
for ein elem:
# 获取标签属性值,获取标签值
s1= e.get("name")+ ":" + e.text
# print(e.get("name") + ":" + e.text)
list= list+ [s1]
# 替换list的【】,变为一个 ,分隔的字符串
res= str(list).replace("[","").replace("]","").replace("'","")
f.write(res)# 写入
f.write('\n')
# 重置元素,清空元素内部数据
elem.clear()
# 选取当前节点的所有先辈(父、祖父等)节点,以及当前节点本身
for ftagin elem.xpath('doc'):
# 如果当前节点还有前一个兄弟,则删除父节点的第一个子节点。getprevious():返回当前节点的前一个兄弟或None。
while ftag.getprevious()is not None:
# 删除父节点的第一个子节点,getparent():返回当前节点的父元素或根元素或None。
del ftag.getparent()[0]
# 释放内存
del context
def process_element(elem):
"""
处理element
:params elem: Element
"""
# 储存基因列表
gene_list= []
for iin elem.xpath('add'):
# 获取基因名字
gene= i.text
# 添加到列表
gene_list.append(gene)
print('gene', gene_list)
if __name__== '__main__':
osm_path= r"F:/data/sh.osm"
# geo_path = r'F:/data/sh.geojson'
# osm_geojson(osm_path,geo_path)
# osmProcess(osm_path)
print('start', time.strftime('%Y-%m-%d %H:%M:%S', time.localtime()))
start= time.time()
# 需要处理的文件路径
infile= osm_path
# 通过迭代读取xml,带命名空间的要加上命名空间
# context = etree.iterparse(infile, events=('end',), encoding='UTF-8', tag='{http://uniprot.org/uniprot}doc')
context= etree.iterparse(infile,events=('end',),encoding='UTF-8',tag='doc')
# 快速读取xml数据
fast_iter(context,process_element)
print('stop', time.strftime('%Y-%m-%d %H:%M:%S', time.localtime()))
print('time', time.time()- start)
#https://www.jianshu.com/p/a2c65dc8c87b