本文中主要是利用pandas、matplotlib、numpy等工具包,进行前10名时区的提取和绘制条形图:
import json
import pandas as pd
import matplotlib.pylab as plt
from pandas import DataFrame, Series
import numpy as np
path = "D:\Python\datalearning\利用Python进行数据分析\example.txt"
records = [json.loads(line) for line in open(path)]
frame = DataFrame(records)
# 处理缺失值或者未知值
clean_tz = frame['tz'].fillna("Missing")
clean_tz[clean_tz == ''] = 'Unknown'
tz_counts = clean_tz.value_counts()
print(tz_counts[:10].plot(kind='barh', rot=0)) # 水平条形图
plt.savefig('./time_zones.png', bbox_inches='tight') # 将图片保存到当地
plt.show()