2019-12-29 14:30-17:11 数据浓度需要×10倍
2019-12-24 21:17:24.6-21:43:00.7 数据为每秒1个数 可以舍去
技术点:pd.TimedeltaIndex
之前想把时间项re_time和数值项mic连起来
两个直接相加不可能 写循环慢的要死 苦恼了好久
还想过把re_time和mic项设为str 连起来然后再通过pd.to_datetime转为时间项 但是这个方法有个大bug 有的秒是1 这样秒项就得进1 可是字符串连起来根本无法办到
百度后查到pd.TimedeltaIndex 还挺好用的 非常快就解决了
Pandas详解七之DatetimeIndex、PeriodIndex和TimedeltaIndex时间序列_yungeisme的博客-CSDN博客_datetimeindex timedelta
import numpy as np
import pandas as pd
import matplotlib as mpl
import gc
import os
import math
import glob
import datetime
from matplotlib import pyplot as plt
from datetime import datetime,timedelta
transform = lambda x:x.to_pydatetime().replace(microsecond=0)
path=r'D:\分组浓度-16group'
file=glob.glob(os.path.join(path, "*.csv"))
#### 1.Get BC data.
db=[]
for f in file:
db.append(pd.read_csv(f,sep=',',usecols=["DateTimewave_tmp"]))
index = pd.concat(db)
index[index["DateTimewave_tmp"].duplicated()]
index["re_time"] = pd.to_datetime(index["DateTimewave_tmp"])
index["mic"] = (index["re_time"].dt.microsecond)/100000 ##获取微秒为整数
index["mic"] = index["mic"].floordiv(1).astype(int) ## 提取微秒的整数部分 没有四舍五入
index["re_time"] = index["re_time"].map(transform) ## 去除微秒的小数部分
index.index = index["DateTimewave_tmp"]
index_02s = index.loc[:"2019-12-24 21:14:48.515624960",:]
index_02s.loc[index_02s["mic"]==1,"mic"]=2
index_02s.loc[index_02s["mic"]==3,"mic"]=4
index_02s.loc[index_02s["mic"]==5,"mic"]=6
index_02s.loc[index_02s["mic"]==7,"mic"]=8
index_02s.loc[index_02s["mic"]==9,"mic"]=10
index_02s["time"] = index_02s["re_time"].copy()
index_02s["time"] = index_02s["re_time"]+pd.TimedeltaIndex(index_02s["mic"]*100000,unit = "microseconds")
## 此过程可能需要多次检查并循环处理 慢慢慢
j=0
for i in np.arange(len(index_02s)):
if (index_02s.iloc[i-1,3]==index_02s.iloc[i,3]):
index_02s.iloc[i,3] = pd.to_datetime(index_02s.iloc[i,3])+timedelta(seconds=0.2)
j=j+1
print(j)
## 检查和检验语句 01s相似 不再赘述
index_02s[index_02s["time"].duplicated()]
index_02s[index_02s["time"].duplicated(keep=False)].to_csv(r"D:\分组浓度处理\index_02s_dup.csv")
####输出时去除多余的
index_02.drop(["DateTimewave_tmp","re_time","mic"],inplace=True,axis=1)
index_02.to_csv(r"D:\分组浓度处理\index_02s_20191224_211448.csv")
index_01s = index.loc["2019-12-24 22:00:00.414062080":,:]
index_01s["time"] = index_01s["re_time"].copy()
index_01s["time"] = index_01s["re_time"]+pd.TimedeltaIndex(index_01s["mic"]*100000,unit = "microseconds")
## 此过程可能需要多次检查并循环处理 并且比较慢 前期需要i-1减0.1s 后期需要i-1加0.1s
j=0
for i in np.arange(len(index_01s)):
if (index_01s.iloc[i-1,3]==index_01s.iloc[i,3]):
index_01s.iloc[i,3] = pd.to_datetime(index_01s.iloc[i,3])+timedelta(seconds=0.1)
j=j+1
print(j)
index_01s.drop(["DateTimewave_tmp","re_time","mic"],inplace=True,axis=1)
index_01s.to_csv(r"D:\分组浓度处理\index_01s_.csv")