1.gxdc 20201221~20201225.csv 是共享单车轨迹数据
2.gxdc_dd.csv 共享单车订单数据
3.gxdc_tcd.csv 共享单车停车点位(电子围栏)数据
4.正则表达式 地址 :https://regexr.com/
# 读取共享单车轨迹数据
import re
import pandas as pd
df_list = []
for i,tmp_f in enumerate(fl):
if re.findall('(gxdc_gj)[\w]+(.csv)',tmp_f):
tmp_f = os.path.join(root_path,tmp_f)
tmp_df = pd.read_csv(tmp_f)
print(i,':','*****'*4)
print(tmp_df.info())
df_list.append(tmp_df)
df_gj = pd.concat(df_list)
print("df_gj",':','*****'*4)
print(df_gj.info())
#tmp_df = pd.read_csv(os.)
#print(i)
# 2:3096835, 3:2569327, 4:2504938 ,5:2584768 ,6:1165017
按照单车ID和时间进行排序
df_gj = df_gj.sort_values(['BICYCLE_ID','LOCATING_TIME'])
路线可视化
import folium
m = folium.Map(location=[24.482426, 118.157606], zoom_start=12)
my_PolyLine=folium.PolyLine(locations=df_gj[df_gj['BICYCLE_ID'] == '000152773681a23a7f2d9af8e8902703'][['LATITUDE', 'LONGITUDE']].values,weight=5)
m.add_child(my_PolyLine)
路线可视化.png
共享单车停车点位(电子围栏)数据
import numpy as np
def bike_fence_format(s):
s = s.replace('[', '').replace(']', '').split(',')
s = np.array(s).astype(float).reshape(5, -1)
return s
# 共享单车停车点位(电子围栏)数据
bike_fence = pd.read_csv('./dcic2021_data/gxdc_tcd.csv')
bike_fence['FENCE_LOC'] = bike_fence['FENCE_LOC'].apply(bike_fence_format)
for data in bike_fence['FENCE_LOC'].values[:5]:
print(data[0])
import folium
import numpy as np
m = folium.Map(location=[24.482426, 118.157606], zoom_start=12)
for data in bike_fence['FENCE_LOC'].values[:100]:
folium.Marker(location=list(data[0, ::-1])).add_to(m)
m
电子围栏.png
Baseline
import os, codecs
import pandas as pd
import numpy as np
%pylab inline
from IPython.display import set_matplotlib_formats
set_matplotlib_formats('svg')
from matplotlib import font_manager as fm, rcParams
import matplotlib.pyplot as plt
# 读数据
bike_order = pd.read_csv('./dcic2021_data/gxdc_dd.csv')
bike_order = bike_order.sort_values(['BICYCLE_ID', 'UPDATE_TIME'])
import geohash
bike_order['geohash'] = bike_order.apply(lambda x:
geohash.encode(x['LATITUDE'], x['LONGITUDE'], precision=9), axis=1)
from geopy.distance import geodesic
bike_fence['MIN_LATITUDE'] = bike_fence['FENCE_LOC'].apply(lambda x: np.min(x[:, 1]))
bike_fence['MAX_LATITUDE'] = bike_fence['FENCE_LOC'].apply(lambda x: np.max(x[:, 1]))
bike_fence['MIN_LONGITUDE'] = bike_fence['FENCE_LOC'].apply(lambda x: np.min(x[:, 0]))
bike_fence['MAX_LONGITUDE'] = bike_fence['FENCE_LOC'].apply(lambda x: np.max(x[:, 0]))
bike_fence['FENCE_AREA'] = bike_fence.apply(lambda x: geodesic(
(x['MIN_LATITUDE'], x['MIN_LONGITUDE']), (x['MAX_LATITUDE'], x['MAX_LONGITUDE'])
).meters, axis=1)
bike_fence['FENCE_CENTER'] = bike_fence['FENCE_LOC'].apply(
lambda x: np.mean(x[:-1, ::-1], 0)
)
import geohash
bike_order['geohash'] = bike_order.apply(
lambda x: geohash.encode(x['LATITUDE'], x['LONGITUDE'], precision=6),
axis=1)
bike_fence['geohash'] = bike_fence['FENCE_CENTER'].apply(
lambda x: geohash.encode(x[0], x[1], precision=6)
)
# bike_order
geohash.encode(24.521156, 118.140385, precision=6), \
geohash.encode(24.521156, 118.140325, precision=6)
bike_order['UPDATE_TIME'] = pd.to_datetime(bike_order['UPDATE_TIME'])
bike_order['DAY'] = bike_order['UPDATE_TIME'].dt.day.astype(object)
bike_order['DAY'] = bike_order['DAY'].apply(str)
bike_order['HOUR'] = bike_order['UPDATE_TIME'].dt.hour.astype(object)
bike_order['HOUR'] = bike_order['HOUR'].apply(str)
bike_order['HOUR'] = bike_order['HOUR'].str.pad(width=2,side='left',fillchar='0')
bike_order['DAY_HOUR'] = bike_order['DAY'] + bike_order['HOUR']
# 按照经纬度数据聚合
bike_inflow = pd.pivot_table(bike_order[bike_order['LOCK_STATUS'] == 1],
values='LOCK_STATUS', index=['geohash'],
columns=['DAY_HOUR'], aggfunc='count', fill_value=0
)
bike_outflow = pd.pivot_table(bike_order[bike_order['LOCK_STATUS'] == 0],
values='LOCK_STATUS', index=['geohash'],
columns=['DAY_HOUR'], aggfunc='count', fill_value=0
)
bike_inflow.loc['wsk52r'].plot()
bike_outflow.loc['wsk52r'].plot()
plt.xticks(list(range(bike_inflow.shape[1])), bike_inflow.columns, rotation=40)
plt.legend(['Inflow', 'OutFlow'])
bike_inflow.loc['wsk596'].plot()
bike_outflow.loc['wsk596'].plot()
plt.xticks(list(range(bike_inflow.shape[1])), bike_inflow.columns, rotation=40)
plt.legend(['Inflow', 'OutFlow'])
bike_inflow = pd.pivot_table(bike_order[bike_order['LOCK_STATUS'] == 1],
values='LOCK_STATUS', index=['geohash'],
columns=['DAY'], aggfunc='count', fill_value=0
)
bike_outflow = pd.pivot_table(bike_order[bike_order['LOCK_STATUS'] == 0],
values='LOCK_STATUS', index=['geohash'],
columns=['DAY'], aggfunc='count', fill_value=0
)
bike_remain = (bike_inflow - bike_outflow).fillna(0)
bike_remain[bike_remain < 0] = 0
bike_remain = bike_remain.sum(1)
bike_fence['DENSITY'] = bike_fence['geohash'].map(bike_remain).fillna(0)
按照最近邻经纬度
按照订单计算与停车点的距离计算潮汐点;
import hnswlib
import numpy as np
p = hnswlib.Index(space='l2', dim=2)
p.init_index(max_elements=300000, ef_construction=1000, M=32)
p.set_ef(1024)
p.set_num_threads(14)
p.add_items(np.stack(bike_fence['FENCE_CENTER'].values))
index, dist = p.knn_query(bike_order[['LATITUDE','LONGITUDE']].values[:], k=1)
bike_order['fence'] = bike_fence.iloc[index.flatten()]['FENCE_ID'].values
bike_inflow = pd.pivot_table(bike_order[bike_order['LOCK_STATUS'] == 1],
values='LOCK_STATUS', index=['fence'],
columns=['DAY'], aggfunc='count', fill_value=0
)
bike_outflow = pd.pivot_table(bike_order[bike_order['LOCK_STATUS'] == 0],
values='LOCK_STATUS', index=['fence'],
columns=['DAY'], aggfunc='count', fill_value=0
)
bike_remain = (bike_inflow - bike_outflow).fillna(0)
bike_remain[bike_remain < 0] = 0
bike_remain = bike_remain.sum(1)
bike_density = bike_remain / bike_fence.set_index('FENCE_ID')['FENCE_AREA']
bike_density = bike_density.sort_values(ascending=False).reset_index()
bike_density = bike_density.fillna(0)
bike_density['label'] = '0'
bike_density.iloc[:100, -1] = '1'
bike_density['BELONG_AREA'] ='厦门'
bike_density = bike_density.drop(0, axis=1)
bike_density.columns = ['FENCE_ID', 'FENCE_TYPE', 'BELONG_AREA']
bike_density.to_csv('result.txt', index=None, sep='|')
打卡成绩.png