孤立森林训练结果并可视化

1.取输入道路的每10min,或者20min,或者30min的平均速度和经过车辆数

def qushu(tablenames,filename1):
    result = []
    for table_name in tablenames:
        print(table_name)
        sql_str = "SELECT AVG(\"VELOCITY\"), count(*), count(distinct \"TM_SERIAL\") AS COUNT_TM, " \
                "TIMESTAMP WITH TIME ZONE \'epoch\' + " \
                "INTERVAL '1 second' * round(extract(\'epoch\' from \"ctime\") / 600) * 600 as timestamp " \
                "FROM public.\"{0}\" "\
                "where \"osm_id_new\" >= {1} and \"osm_id_new\" <= {2} " \
                "GROUP BY timestamp".format(table_name,start_road,end_road)
        print(sql_str)
        cursor.execute(sql_str)
        rows = cursor.fetchall()
        for row in rows:
            result.append(row)

    with open(filename1,'a',newline='') as f:
        csv_writer = csv.writer(f)
        for i in result:
            csv_writer.writerow(i)

2.对取出的数进行处理,取出每个时间段的 前一个时间段和后一个时间段的 速度和车辆数

def parsedata(contents,filename2):
    result1 = []
    for i in contents:
        content = i.replace("\n","").split(',')
        list1 = []
        avg_velocity = content[0]
        count_tm = content[2]
        time = content[3].replace("+08:00","")
        time = datetime.datetime.strptime(time,'%Y-%m-%d %H:%M:%S')
        hour = time.hour
        minute = time.minute
        hour_minute = int(hour)*6 + int(minute)/10
        list1.append(content[3])
        list1.append(time)
        list1.append(avg_velocity)
        list1.append(count_tm)
        list1.append(hour)
        list1.append(minute)
        list1.append(hour_minute)
        result1.append(list1)
    changdu = len(result1)
    result2 = []

    for i, val in enumerate(result1):
        hour_minute = val[6]
        if(i == 0):
            if((hour_minute + 1) == result1[i+1][6]):
                #与下一个时间点连续
                avg_velocity_qian = val[2]
                avg_velocity_hou = result1[i+1][2]
                count_tm_qian = val[3]
                count_tm_hou = result1[i+1][3]
            else:
                avg_velocity_qian = val[2]
                avg_velocity_hou = val[2]
                count_tm_qian = val[3]
                count_tm_hou = val[3]      
            val.append(float(avg_velocity_qian))
            val.append(float(avg_velocity_hou))
            val.append(int(count_tm_qian))
            val.append(int(count_tm_hou))
            result2.append(val)
        elif(i == changdu-1):
            if(hour_minute == result1[i-1][6]):
                #与上一个时间点连续
                avg_velocity_qian = avg_velocity_qian = result1[i-1][2]
                avg_velocity_hou = val[2]
                count_tm_qian = result1[i-1][2]
                count_tm_hou = val[3]           
            else:
                avg_velocity_qian = val[2]
                avg_velocity_hou = val[2]
                count_tm_qian = val[3]
                count_tm_hou = val[3]        
            val.append(float(avg_velocity_qian))
            val.append(float(avg_velocity_hou))
            val.append(int(count_tm_qian))
            val.append(int(count_tm_hou))
            result2.append(val)
        else:
            if((hour_minute + 1 == result1[i+1][6]) and (hour_minute -1 == result1[i-1][6])):
                #与上下两个时间点都连续
                avg_velocity_qian = result1[i-1][2]
                avg_velocity_hou = result1[i+1][2]
                count_tm_qian = result1[i-1][3]
                count_tm_hou = result1[i+1][3]          
            elif((hour_minute + 1 == result1[i+1][6]) and (hour_minute -1 != result1[i-1][6])):
                #与下一个时间节点连续,与上一个时间节点不连续
                avg_velocity_qian = val[2]
                avg_velocity_hou = result1[i+1][2]
                count_tm_qian = val[3]
                count_tm_hou = result1[i+1][3]
            elif((hour_minute + 1 != result1[i+1][6]) and (hour_minute -1 == result1[i-1][6])):
                #与上一个时间点连续,与下一个时间点不连续
                avg_velocity_qian = result1[i-1][2]
                avg_velocity_hou = val[2]
                count_tm_qian = result1[i-1][3]
                count_tm_hou = val[3]
            else:
                avg_velocity_qian = val[2]
                avg_velocity_hou = val[2]
                count_tm_qian = val[3]
                count_tm_hou = val[3]           
            val.append(float(avg_velocity_qian))
            val.append(float(avg_velocity_hou))
            val.append(int(count_tm_qian))
            val.append(int(count_tm_hou))
            result2.append(val)

    with open(filename2,'a',newline='') as f:
        csv_writer = csv.writer(f)
        title=['time','time2','avg_velocity','count_tm','hour','minute','hour_minute','avg_velocity_qian','avg_velocity_hou','count_tm_qian','count_tm_hou']
        csv_writer.writerow(title)
        for i in result2:
            csv_writer.writerow(i)

3.孤立森林训练

def score(filename2,filename3):
    dataset = pd.read_csv(filename2, engine='python')
    dataset = dataset.fillna(0)

    X_col = pd.DataFrame(dataset, columns=['avg_velocity_qian','avg_velocity','avg_velocity_hou','count_tm_qian','count_tm','count_tm_hou','hour','hour_minute'])
    X1_col = pd.DataFrame(dataset, columns=['avg_velocity','count_tm','hour','hour_minute'])
    X_col = X_col.values
    X1_col = X1_col.values


    rs = np.random.RandomState(64)
    lendata = dataset.shape[0]
    ifmodel = IsolationForest(n_estimators=500, verbose=2,n_jobs=2, max_samples=256, random_state=rs,max_features=8,contamination='auto')
    ifmodel1 = IsolationForest(n_estimators=500, verbose=2,n_jobs=2, max_samples=256, random_state=rs,max_features=4,contamination='auto')


    ifmodel.fit(X_col)
    ifmodel1.fit(X1_col)
    Iso_anomaly_score = ifmodel.decision_function(X_col)
    Iso_anomaly_score_8 = abs(Iso_anomaly_score - 0.5)
    Iso_anomaly_score1 = ifmodel1.decision_function(X1_col)
    Iso_anomaly_score1_4 = abs(Iso_anomaly_score1 - 0.5)

    Iso_predict = ifmodel.predict(X_col)
    ano_lable = np.column_stack(((dataset['time'],dataset['time2'],dataset['avg_velocity_qian'],dataset['avg_velocity'],dataset['avg_velocity_hou'],dataset['count_tm_qian'],dataset['count_tm'],dataset['count_tm_hou'],dataset['hour'],dataset['hour_minute'], Iso_anomaly_score, Iso_anomaly_score_8, Iso_anomaly_score1 , Iso_anomaly_score1_4, Iso_predict)))
    df = pd.DataFrame(data=ano_lable, columns=['time','time2','avg_velocity_qian','avg_velocity','avg_velocity_hou', 'count_tm_qian','count_tm','count_tm_hou','hour','hour_minute','Iso_anomaly_score', 'Iso_anomaly_score_8', 'Iso_anomaly_score1', 'Iso_anomaly_score1_4','Iso_predict'])
    df.to_csv(filename3)

4.完整代码


tablenames = []
with open("tablenames.csv","r") as f:
    a = f.readlines()
    for i in a:
        tablenames.append(i.replace("\n",""))

print(tablenames)

conn = psycopg2.connect(database="taxi", user="postgres",password="AdminVge100", host="127.0.0.1", port="5432")
cursor = conn.cursor()

with open('secondary.csv','r') as f:
    contents = f.readlines()

for osm_id_new in contents:
    print(osm_id_new)
    osm_id_new = osm_id_new.replace("\n","").split(",")
    start_road = osm_id_new[0]
    end_road = osm_id_new[1]
    filename1 = ".\\data\\10min_" + start_road + "-" + end_road + ".csv"
    filename2 = ".\\dataparse\\10min_" + start_road + "-" + end_road + "_test.csv"
    filename3 = ".\\result\\10min_" + start_road + "-" + end_road + "_result8.csv"       
    try:
        qushu(tablenames,filename1)
        with open(filename1,'r') as f:
            contents = f.readlines()
        parsedata(contents,filename2)
        score(filename2,filename3)
    except BaseException:
        pass

5.对异常结果进行汇总
要注意修改 汇总哪个异常分数
Score8:8个参数
Score4:4个参数

import pandas as pd
import os
from pandas.core.frame import DataFrame
import csv

filename = r"E:\李猛硕士毕设\实验部分\实验数据\2021-02-05\primary\primary\30min\result\30min_113000-113005_result8.csv"


result1 = []

file_dir = r"E:\李猛硕士毕设\实验部分\实验数据\2021-02-05\secondary\secondary\20min\result"
for root, dirs, files in os.walk(file_dir, topdown=False):
    files = files
result = []
for file_name in files:
    filename =  file_dir + "\\" + file_name
    dict1 = {}
    dict2 = {}
    with open(filename,'r') as f:
        next(f)
        contents = f.readlines()
        if(len(contents)<1800):
            continue
        for i in contents:
            try:
                i = i.replace("\n","").split(',')
                avg_velocity = i[4]
                hour = int(i[9])
                hour_minute = i[10]
                count_tm = i[7]
                result.append(i)
                if(hour > 6 and hour < 22 ):
                    if hour_minute not in dict1:
                        dict1[hour_minute] = []
                        dict1[hour_minute].append(float(avg_velocity))
                    else:
                        dict1[hour_minute].append(float(avg_velocity))
                    if hour_minute not in dict2:
                        dict2[hour_minute] = []
                        dict2[hour_minute].append(int(count_tm))
                    else:
                        dict2[hour_minute].append(int(count_tm))
            except BaseException:
                pass
    for key,value in dict1.items():
        value.sort()

    for key,value in dict2.items():
        value.sort()

    with open(filename,'r') as f:
        next(f)
        contents = f.readlines()
        for i in contents:
            try:
                i = i.replace("\n","").split(',')
                avg_velocity = i[4]
                count_tm = i[7]
                hour_minute = i[10]
                hour = int(i[9])
                Iso_anomaly_score1 = float(i[14])
                if(hour>6 and hour < 22 ):
                    avg_velocity_sort = dict1[hour_minute].index(float(avg_velocity))
                    count_tm_sort = dict2[hour_minute].index(int(count_tm))
                    i.append(avg_velocity_sort)
                    i.append(count_tm_sort)
                    i.append(file_name)
                    if(Iso_anomaly_score1 > 0.6):
                        result1.append(i)
            except BaseException:
                pass

print(len(result))
with open('20min_secondary_4_异常汇总.csv','a',newline='') as f:
    csv_writer = csv.writer(f)
    title=['id','time','time2','avg_velocity_qian','avg_velocity','avg_velocity_hou','count_tm_qian','count_tm','count_tm_hou','hour','hour_minute','Iso_anomaly_score','Iso_anomaly_score_8','Iso_anomaly_score1','Iso_anomaly_score1_4','predict','avg_velocity_sort','count_tm_sort','file_name']
    csv_writer.writerow(title)
    for i in result1:
        csv_writer.writerow(i)

6.为异常样本打上标签,并按照日期分割成多张csv

import csv
from datetime import datetime

result = []
with open('20min_4_label.csv','r') as f:
    next(f)
    contents = f.readlines()
    for i in contents:
        i = i.replace("\n","").split(",")
        result.append(i)

#为路段打标签
road_label=[]
for i in result:
    start = int(i[-2])
    end = int(i[-1])
    label = i[-3]
    time = i[0]
    #判断变快还是变慢
    if(label == '1' or label == '2' or label == '5' or label == '6'):
        #变慢
        change = 1
    else:
        #变快
        change = 2
    for j in range(start,end+1):
        list2 = []
        list2.append(j)
        list2.append(label)
        list2.append(change)
        list2.append(time)
        road_label.append(list2)

dict1 = {}
for i in road_label:
    osm_id_new = i[0]
    riqi = i[-1]
    change = i[-2]
    a = datetime.strptime(riqi, "%Y/%m/%d %H:%M").date()
    if(a not in dict1):
        dict1[a] = {}
        dict1[a][change]=[]
        dict1[a][change].append(osm_id_new)
    else:
        if(change not in dict1[a]):
            dict1[a][change]=[]
            dict1[a][change].append(osm_id_new)
        else:
            dict1[a][change].append(osm_id_new)

filepath = r".\\road_label\\"
for key1,value1 in dict1.items():
    key1 = key1.strftime('%Y%m%d')
    filename = filepath + key1 + '.csv'
    with open(filename,'a',newline='') as m:
        csv_writer3 = csv.writer(m)
        csv_writer3.writerow(['osm_id_new','change'])
        for key2,value2 in value1.items():
            value2 = list(set(value2))   
            for j in value2:
                list3 = []
                list3.append(j)
                list3.append(key2)
                csv_writer3.writerow(list3)


with open("road_label.csv","a",newline='') as t:
    csv_writer2 = csv.writer(t)
    title = ['osm_id_new','label','change','time']
    csv_writer2.writerow(title)
    for c in road_label:
        csv_writer2.writerow(c)
©著作权归作者所有,转载或内容合作请联系作者
  • 序言:七十年代末,一起剥皮案震惊了整个滨河市,随后出现的几起案子,更是在滨河造成了极大的恐慌,老刑警刘岩,带你破解...
    沈念sama阅读 216,402评论 6 499
  • 序言:滨河连续发生了三起死亡事件,死亡现场离奇诡异,居然都是意外死亡,警方通过查阅死者的电脑和手机,发现死者居然都...
    沈念sama阅读 92,377评论 3 392
  • 文/潘晓璐 我一进店门,熙熙楼的掌柜王于贵愁眉苦脸地迎上来,“玉大人,你说我怎么就摊上这事。” “怎么了?”我有些...
    开封第一讲书人阅读 162,483评论 0 353
  • 文/不坏的土叔 我叫张陵,是天一观的道长。 经常有香客问我,道长,这世上最难降的妖魔是什么? 我笑而不...
    开封第一讲书人阅读 58,165评论 1 292
  • 正文 为了忘掉前任,我火速办了婚礼,结果婚礼上,老公的妹妹穿的比我还像新娘。我一直安慰自己,他们只是感情好,可当我...
    茶点故事阅读 67,176评论 6 388
  • 文/花漫 我一把揭开白布。 她就那样静静地躺着,像睡着了一般。 火红的嫁衣衬着肌肤如雪。 梳的纹丝不乱的头发上,一...
    开封第一讲书人阅读 51,146评论 1 297
  • 那天,我揣着相机与录音,去河边找鬼。 笑死,一个胖子当着我的面吹牛,可吹牛的内容都是我干的。 我是一名探鬼主播,决...
    沈念sama阅读 40,032评论 3 417
  • 文/苍兰香墨 我猛地睁开眼,长吁一口气:“原来是场噩梦啊……” “哼!你这毒妇竟也来了?” 一声冷哼从身侧响起,我...
    开封第一讲书人阅读 38,896评论 0 274
  • 序言:老挝万荣一对情侣失踪,失踪者是张志新(化名)和其女友刘颖,没想到半个月后,有当地人在树林里发现了一具尸体,经...
    沈念sama阅读 45,311评论 1 310
  • 正文 独居荒郊野岭守林人离奇死亡,尸身上长有42处带血的脓包…… 初始之章·张勋 以下内容为张勋视角 年9月15日...
    茶点故事阅读 37,536评论 2 332
  • 正文 我和宋清朗相恋三年,在试婚纱的时候发现自己被绿了。 大学时的朋友给我发了我未婚夫和他白月光在一起吃饭的照片。...
    茶点故事阅读 39,696评论 1 348
  • 序言:一个原本活蹦乱跳的男人离奇死亡,死状恐怖,灵堂内的尸体忽然破棺而出,到底是诈尸还是另有隐情,我是刑警宁泽,带...
    沈念sama阅读 35,413评论 5 343
  • 正文 年R本政府宣布,位于F岛的核电站,受9级特大地震影响,放射性物质发生泄漏。R本人自食恶果不足惜,却给世界环境...
    茶点故事阅读 41,008评论 3 325
  • 文/蒙蒙 一、第九天 我趴在偏房一处隐蔽的房顶上张望。 院中可真热闹,春花似锦、人声如沸。这庄子的主人今日做“春日...
    开封第一讲书人阅读 31,659评论 0 22
  • 文/苍兰香墨 我抬头看了看天上的太阳。三九已至,却和暖如春,着一层夹袄步出监牢的瞬间,已是汗流浃背。 一阵脚步声响...
    开封第一讲书人阅读 32,815评论 1 269
  • 我被黑心中介骗来泰国打工, 没想到刚下飞机就差点儿被人妖公主榨干…… 1. 我叫王不留,地道东北人。 一个月前我还...
    沈念sama阅读 47,698评论 2 368
  • 正文 我出身青楼,却偏偏与公主长得像,于是被迫代替她去往敌国和亲。 传闻我的和亲对象是个残疾皇子,可洞房花烛夜当晚...
    茶点故事阅读 44,592评论 2 353

推荐阅读更多精彩内容