绘制各类目对比柱状图
import pymongo
import charts
client = pymongo.MongoClient('localhost',27017)
local = client['local']
_58_infos = local['log_y']
def data_gen(type):
stats = {}
for info in _58_infos.find():
area = info['cates'][2]
stats[area] = 1 if area not in stats else stats[area] + 1
for k, v in stats.items():
yield {
'name': k
, 'data': [v]
, 'type': type
}
series = [i for i in data_gen('column')]
charts.plot(series,show='inline',options=dict(title=dict(text='发帖量')))
屏幕快照 2016-06-02 下午9.09.28.png
绘制发帖量折线图
import pymongo
import charts
from datetime import timedelta, date
import time
client = pymongo.MongoClient('localhost',27017)
local = client['local']
_58_infos = local['log_y']
for i in _58_infos.find().limit(10):
print(i['cates'][2])
def gen_days(date1, date2):
time1 = time.strptime(date1, '%Y.%m.%d')
time2 = time.strptime(date2, '%Y.%m.%d')
date1 = date(time1[0], time1[1], time1[2])
date2 = date(time2[0], time2[1], time2[2])
days = timedelta(days=1)
while date1 <= date2:
yield (date1.strftime('%Y.%m.%d'))
date1 = date1 + days
def data_gen(date1, date2, cates, type='line'):
days = [i for i in gen_days(date1, date2)]
stats = {}
for i in cates:
stats[i] = [0 for i in days]
for info in _58_infos.find({'pub_date':{'$gte':days[0],'$lte':days[-1]},'cates':{'$in':cates}}):
cate = info['cates'][2]
pub_date = info['pub_date']
stats[cate][days.index(pub_date)] +=1
for k, v in stats.items():
yield {
'name': k
, 'data': v
, 'type': type
}
print([i for i in data_gen('2015.12.24','2016.01.05',['朝阳'])])
options = {
'chart': {'zoomType': 'xy'}
, 'title': {'text': '发帖量统计'}
, 'subtitle': {'text': '可视化统计图表'}
, 'xAxis': {'categories': [i for i in gen_days('2015.12.24' , '2016.01.05') ]}
, 'yAxis': {'title': {'text': '数量'}}
}
series = [data for data in data_gen('2015.12.24' , '2016.01.05', ['北京二手家电', '北京二手台式机/配件', '北京二手笔记本'])]
charts.plot(series, show='inline', options=options)
屏幕快照 2016-06-02 下午9.16.00.png
- 学习了datetime库的一些知识。
- 使用pymongo.find方法过滤自己需要的数据。
绘制热销商品的分布饼图
import pymongo
import charts
client = pymongo.MongoClient('localhost', 27017)
local = client['local']
_58_infos = local['log_y']
def data_gen_1(date, time):
pipeline = [
{'$match': {'$and': [{'pub_date': date}, {'time': time}]}}
, {'$group': {'_id': {'$slice': ['$area', 1]}, 'counts': {'$sum': 1}}}
, {'$sort': {'counts': -1}}
, {'$limit': 100}
]
for i in _58_infos.aggregate(pipeline):
yield [i['_id'][0], i['counts']]
def data_gen_2(date, time):
pipeline = [
{'$match': {'$and': [{'pub_date': date}, {'time': time}]}}
, {'$group': {'_id': {'$slice': ['$area', 1]}, 'counts': {'$sum': '$price'}}}
, {'$sort': {'counts': -1}}
, {'$limit': 100}
]
for i in _58_infos.aggregate(pipeline):
yield [i['_id'][0], i['counts']]
options = {
'chart': {'zoomType': 'xy'}
, 'title': {'text': '发帖量统计'}
, 'subtitle': {'text': '2016.01.10二手物品在随后7天内,交易时长为1天的类目分布占比'}
}
series = [{
'type': 'pie'
, 'name': '交易数'
, 'data': [i for i in data_gen_1('2016.01.10', 1)]
}]
print(series)
charts.plot(series, options=options, show='inline')
屏幕快照 2016-06-02 下午9.18.57.png
屏幕快照 2016-06-02 下午9.22.12.png
- 学习mongo的高级特性pieline方式处理数据,加快数据处理速度
分析二手商品行情
import pymongo
import charts
client = pymongo.MongoClient('localhost', 27017)
local = client['local']
_58_infos = local['log_y']
def data_gen(area):
pipeline = [
{'$match': {'area': {'$in': [area]}}}
, {'$group': {'_id': {'$slice': ['$cates', 2, 1]}, 'counts': {'$sum': 1}}}
, {'$sort': {'counts': -1}}
, {'$limit': 3}
]
data = [0 for i in range(3)]
for i in _58_infos.aggregate(pipeline):
yield {
'name': i['_id'][0]
, 'data': i['counts']
}
def area_gen():
pipeline = [
{'$group': {'_id': {'$slice': ['$area', 1]}, 'counts': {'$sum': 1}}}
, {'$sort': {'counts': -1}}
]
for i in _58_infos.aggregate(pipeline):
yield {
'name': i['_id'][0]
, 'data': i['counts']
}
area = '朝阳'
data = [i for i in data_gen(area)]
series = [{
'name': '发帖量'
, 'data': [i['data'] for i in data]
, 'type': 'column'
}]
options = {
'chart': {'type': 'column'}
, 'title': {'text': '{}发帖最多的Top3类目'.format(area)}
, 'subtitle': {'text': '可视化统计图表'}
, 'xAxis': {'categories': [i['name'] for i in data]}
, 'yAxis': {'title': {'text': '发帖量'}}
}
print('{} -> {}'.format(series, options))
charts.plot(series, show='inline', options=options)
屏幕快照 2016-06-02 下午9.23.50.png
import pymongo
import charts
client = pymongo.MongoClient('localhost', 27017)
walden = client['walden']
_ganji_infos = walden['log_y']
for i in _ganji_infos.find().limit(10):
if i['price'] :
i['price'] = int(i['price'])
else:
i['price'] = -1
_ganji_infos.update_one({'_id':i['_id']},{'$set':{'price':i['price']}})
for i in _ganji_infos.find().limit(10):
print('{} {} {}'.format(i['cate'][3],i['newer'],i['price']))
def data_gen(cate):
pipeline = [
{'$match': {'$and': [{'cate': {'$in':[cate]}}, {'price': {'$gt': 0}}]}}
, {'$group': {'_id': '$newer', 'counts': {'$avg': '$price'}}}
, {'$sort': {'counts': -1}}
]
for i in _ganji_infos.aggregate(pipeline):
print(i)
yield {
'name': i['_id']
, 'data': i['counts']
}
def cate_gen():
pipeline = [
{'$match': {'price': {'$gt': 0}}}
, {'$group': {'_id': {'$slice': ['$cate', 2, 1]}, 'counts': {'$avg': '$price'}}}
, {'$sort': {'counts': -1}}
]
for i in _ganji_infos.aggregate(pipeline):
yield {
'name': i['_id'][0]
, 'data': i['counts']
}
print([i for i in cate_gen()])
cate = '北京二手家具'
data = [i for i in data_gen(cate)]
print(data)
series = [{
'name': '平均价格'
, 'data': [i['data'] for i in data]
}]
options = {
'chart': {'zoomType': 'xy'}
, 'title': {'text': '{}成色对应的平均价格'.format(cate)}
, 'subtitle': {'text': '可视化统计图表'}
, 'xAxis': {'categories': [i['name'] for i in data]}
, 'yAxis': {'title': {'text': '平均价格'}}
}
print('{} -> {}'.format(series, options))
charts.plot(series, show='inline', options=options)
屏幕快照 2016-06-02 下午9.24.38.png