司马山哥关注
1.json模块介绍
Json 模块提供了四个方法: dumps、dump、loads、load
详情参见https://www.cnblogs.com/tjuyuan/p/6795860.html
2 json文件读取
user = open("user.json", encoding="utf-8").read()
userDict = json.loads(user)
userDict为字典格式的文件,通过字典操作读取字段值。
3 创建数据框DataFrame,便于保存为csv文件
CallCount = pd.DataFrame(
columns=['date_time', 'total_call_count', 'domestic_calls', 'local', 'called', 'dialing', 'max_time',
'max_time_number','frequently_number','frequently_number_calls'])
4 数据抽取并统计字段
for item in userDict["task_data"]["call_info"]:
date_time = item["call_cycle"]
total_call_count = item["total_call_count"]
CallLandType = {}
CallTypeName = {}
CallTime = []
CallOtherNumber = {}
for call in item["call_record"]:
call_land_type = call["call_land_type"]
call_type_name = call["call_type_name"]
call_time = call["call_time"]
call_other_number = call["call_other_number"]
CallLandType[call_land_type] = CallLandType.get(call_land_type, 0) + 1
CallTypeName[call_type_name] = CallTypeName.get(call_type_name, 0) + 1
CallTime.append(call_time)
CallOtherNumber[call_other_number] = CallOtherNumber.get(call_other_number, 0) + 1
MaxTime = max(CallTime)
MaxTimeNumber = item["call_record"][CallTime.index(MaxTime)]["call_other_number"]
FrequentlyNumber = max(CallOtherNumber, key=CallOtherNumber.get)
FrequentlyNumberCalls = CallOtherNumber[FrequentlyNumber]
5 数据保存
result = np.array([date_time, total_call_count, CallLandType['国内长途'], CallLandType['本地通话'], CallTypeName['被叫'],
CallTypeName['主叫'],MaxTime,MaxTimeNumber,FrequentlyNumber,FrequentlyNumberCalls])
CallCount.loc[count] = result
count = count + 1
CallCount.to_csv("callcount.csv",index=False,sep=',')
注释:CSV2JSON
#-*- coding:utf-8 -*-
import os
import os.path
import csv
rootdir = "/Users/ying/Documents" # folder stores csv files
for parent,dirnames,filenames in os.walk(rootdir):
for filename in filenames:
abs_path = os.path.join(parent,filename)
if ".csv" in abs_path:
print abs_path
#对每个文件进行处理
with open(abs_path, 'rb') as csvfile:
reader = csv.reader(csvfile)
rows = [row for row in reader]
header = rows[0]
for rowOne in rows[1:]:
json_row = {}
for i in range(0,len(rowOne)):
json_row[header[i]] = rowOne[i]
print json_row
注:更改csv存储的文件夹地址,即可方便的将csv转成Python
</article>