- 前两篇我们分别爬取了电影数据,也将爬取到的数据存到了数据库;
- 接下来我们要对现有的数据进行分析,已获得一些有效信息;
- 我这里只是进行了简单的可视化分析,运用Echarts插件生成各种图标;
python连接mysql数据库查询电影信息,并生成json数据,存储到本地文件里,以供前端js读取生成可视化图表:
- 查询电影类型数量并返回json数据,其后写入文件里面
typeNameList = ['剧情','喜剧','动作','爱情','科幻','悬疑','惊悚','恐怖','犯罪',
'同性','音乐','歌舞','传记','历史','战争','西部','奇幻','冒险',
'灾难','武侠','情色']
def getMovieTypeJson():
typeNumList = []
for type in typeNameList:
sql = r"select count(type) from movie where type like '%{}%'".format(type)
dataM = getJsonData(sql)
typeNumList.append(int(str(dataM).strip(r'(').strip(r',)')))
return {'typeNameList' : typeNameList, 'typeNumList' : typeNumList}
def writeTypeJsonFile(path):
with open(path, 'w') as f:
json.dump(getMovieTypeJson(), f)
#执行写入操作
writeTypeJsonFile(r'C:\Users\Administrator\Desktop\books\movieType.txt')
- 对应前端页面:
<!DOCTYPE html>
<html style="height: 100%">
<head>
<meta charset="utf-8">
</head>
<body style="height: 100%; margin: 0">
<div id="container" style="height: 100%"></div>
<script type="text/javascript" src="http://echarts.baidu.com/gallery/vendors/echarts/echarts.min.js"></script>
<script type="text/javascript" src="http://echarts.baidu.com/gallery/vendors/echarts-gl/echarts-gl.min.js"></script>
<script type="text/javascript" src="http://echarts.baidu.com/gallery/vendors/echarts-stat/ecStat.min.js"></script>
<script type="text/javascript" src="http://echarts.baidu.com/gallery/vendors/echarts/extension/dataTool.min.js"></script>
<script type="text/javascript" src="http://echarts.baidu.com/gallery/vendors/echarts/map/js/china.js"></script>
<script type="text/javascript" src="http://echarts.baidu.com/gallery/vendors/echarts/map/js/world.js"></script>
<script type="text/javascript" src="http://api.map.baidu.com/api?v=2.0&ak=ZUONbpqGBsYGXNIYHicvbAbM"></script>
<script type="text/javascript" src="http://echarts.baidu.com/gallery/vendors/echarts/extension/bmap.min.js"></script>
<script type="text/javascript" src="http://echarts.baidu.com/gallery/vendors/simplex.js"></script>
<script type="text/javascript" src="C:\Users\Administrator\Desktop\books\jquery.min.js"></script>
<script type="text/javascript">
var dom = document.getElementById("container");
var myChart = echarts.init(dom);
var app = {};
option = null;
var typeNameList = [];
var typeNumList = []
$.ajaxSettings.async = false;
$.getJSON ("../Desktop/books/movieType.txt", function (data) {
typeNameList = data.typeNameList;
typeNumList = data.typeNumList;
//alert(typeNumList)
})
app.title = '豆瓣top250电影类型统计 - 条形图';
option = {
title: {
text: '豆瓣top250电影类型统计',
subtext: '数据来自豆瓣'
},
tooltip: {
trigger: 'axis',
axisPointer: {
type: 'shadow'
}
},
legend: {
data: ['电影类型数量', '2012年']
},
grid: {
left: '3%',
right: '4%',
bottom: '3%',
containLabel: true
},
xAxis: {
type: 'value',
boundaryGap: [0, 0.01]
},
yAxis: {
type: 'category',
data: typeNameList
},
series: [
{
name: '电影类型数量',
type: 'bar',
data: typeNumList
}
]
};
if (option && typeof option === "object") {
myChart.setOption(option, true);
}
</script>
</body>
</html>
-
生成图表结果:
按照type --> age --> country --> score --> movieLength --> title的顺序进行循环
def getMovieTreeJson():
jsonFinal = '{"types": ['
for type in typeNameList:
sql = r"select distinct age from movie where type like '%{}%' order by age desc".format(type)
ageList = getJsonData(sql)
jsonFinal += '{{"name":"{}", "children":['.format(type)
for age in getPureList(ageList):
sql = r"select distinct country from movie where age = '{}' and type like '%{}%'".format(age, type)
countryList = getJsonData(sql)
countryArr = []
jsonFinal += '{{"name":"{}", "children":['.format(age)
for country in getPureList(countryList):
if country.split(" ")[0] not in countryArr:
countryArr.append(country.split(" ")[0])
else:
continue
sql = r"select distinct score from movie where age = '{}' and type like '%{}%' and country like '{}%'" \
r"order by score desc".format(age, type, country.split(" ")[0])
scoreList = getJsonData(sql)
jsonFinal += '{{"name":"{}", "children":['.format(country.split(" ")[0])
for score in getPureList(scoreList):
sql = r"select distinct movieLength from movie where age = '{}' and type like '%{}%' and country like '{}%'" \
r"and score = '{}' order by score desc".format(age, type, country.split(" ")[0], score)
movieLengthList = getJsonData(sql)
jsonFinal += '{{"name":"分数{}", "children":['.format(score)
for movieLength in getPureList(movieLengthList):
jsonFinal += '{{"name":"时长{}", "children":['.format(movieLength)
sql = r"select title, note from movie where age = '{}' and type like '%{}%' and country like '{}%'" \
r"and score = '{}' and movieLength = '{}' order by score desc".format(
age, type, country.split(" ")[0], score, movieLength)
titleNoteList = getJsonData(sql)
# print(age, type, country.split(" ")[0], score, movieLength, str(titleNoteList[0]).strip(","))
for title, note in titleNoteList:
jsonFinal += '{{"name":"{}", "value":"{}"}},'.format(title, note)
# print(jsonFinal[:-1])
jsonFinal = jsonFinal[:-1] + ']},'
jsonFinal = jsonFinal[:-1] + ']},'
jsonFinal = jsonFinal[:-1] + ']},'
jsonFinal = jsonFinal[:-1] + ']},'
jsonFinal = jsonFinal[:-1] + ']},'
jsonFinal = jsonFinal[:-1] + ']},'
return jsonFinal[:-1]
def writeTreeJsonFile(path):
with open(path, 'w') as f:
json.dump(getMovieTreeJson(), f)
writeTreeJsonFile(r'C:\Users\Administrator\Desktop\books\movieTreeJson.txt')
- 对应html页面
<!DOCTYPE html>
<html style="height: 100%">
<head>
<meta charset="utf-8">
</head>
<body style="height: 100%; margin: 0">
<div id="container-0" style=" height: 300%;margin-bottom:100px;"></div>
<div id="container-1" style=" height: 300%; margin-bottom:100px;"></div>
<div id="container-2" style=" height: 300%;margin-bottom:100px;"></div>
<div id="container-3" style=" height: 300%; margin-bottom:100px;"></div>
<div id="container-4" style=" height: 300%;margin-bottom:100px;"></div>
<div id="container-5" style=" height: 300%; margin-bottom:100px;"></div>
<div id="container-6" style=" height: 300%;margin-bottom:100px;"></div>
<div id="container-7" style=" height: 300%; margin-bottom:100px;"></div>
<div id="container-8" style=" height: 300%;margin-bottom:100px;"></div>
<div id="container-9" style=" height: 300%; margin-bottom:100px;"></div>
<div id="container-10" style=" height: 300%;margin-bottom:100px;"></div>
<div id="container-11" style=" height: 300%; margin-bottom:100px;"></div>
<div id="container-12" style=" height: 300%;margin-bottom:100px;"></div>
<div id="container-13" style=" height: 300%; margin-bottom:100px;"></div>
<div id="container-14" style=" height: 300%;margin-bottom:100px;"></div>
<div id="container-15" style=" height: 200%; margin-bottom:100px;"></div>
<div id="container-16" style=" height: 300%;margin-bottom:100px;"></div>
<div id="container-17" style=" height: 300%; margin-bottom:100px;"></div>
<div id="container-18" style=" height: 300%;margin-bottom:100px;"></div>
<div id="container-19" style=" height: 300%;margin-bottom:100px;"></div>
<div id="container-20" style=" height: 300%;margin-bottom:100px;"></div>
<!-- <div id="container1" style="height: 100%"></div> -->
<script type="text/javascript" src="http://echarts.baidu.com/gallery/vendors/echarts/echarts.min.js"></script>
<script type="text/javascript" src="http://echarts.baidu.com/gallery/vendors/echarts-gl/echarts-gl.min.js"></script>
<script type="text/javascript" src="http://echarts.baidu.com/gallery/vendors/echarts-stat/ecStat.min.js"></script>
<script type="text/javascript" src="http://echarts.baidu.com/gallery/vendors/echarts/extension/dataTool.min.js"></script>
<script type="text/javascript" src="http://echarts.baidu.com/gallery/vendors/echarts/map/js/china.js"></script>
<script type="text/javascript" src="http://echarts.baidu.com/gallery/vendors/echarts/map/js/world.js"></script>
<script type="text/javascript" src="http://api.map.baidu.com/api?v=2.0&ak=ZUONbpqGBsYGXNIYHicvbAbM"></script>
<script type="text/javascript" src="http://echarts.baidu.com/gallery/vendors/echarts/extension/bmap.min.js"></script>
<script type="text/javascript" src="http://echarts.baidu.com/gallery/vendors/simplex.js"></script>
<script type="text/javascript" src="C:\Users\Administrator\Desktop\books\jquery.min.js"></script>
<script type="text/javascript">
var myChart;
var dataEnd = {};
// http://echarts.baidu.com/examples/data/asset/data/flare.json ../Desktop/books/movieTreeJson.txt
$.ajaxSettings.async = false;
$.get('../Desktop/books/movieTreeJson.txt', function (data) {
var subStr = data.substring(1,data.length-1).replace(/\\"/g, "\"");
//alert(subStr);
dataEnd = JSON.parse(subStr);
//alert(dataEnd)
});
for(var i=0; i<21; i++) {
initEcharts("container-"+i, i);
}
function initEcharts(name, index) {
myChart = echarts.init(document.getElementById(name));
option = null;
myChart.showLoading();
myChart.hideLoading();
myChart.setOption(option = {
tooltip: {
trigger: 'item',
triggerOn: 'mousemove'
},
series: [
{
type: 'tree',
data: [dataEnd.types[i]],
top: '18%',
bottom: '14%',
layout: 'radial',
symbol: 'emptyCircle',
symbolSize: 7,
initialTreeDepth: 3,
animationDurationUpdate: 750
}
]
});
if (option && typeof option === "object") {
myChart.setOption(option, true);
}
}
</script>
</body>
</html>
-
图标结果是21种电影类型,这里指贴出其中一张示例
查询年代得分:
def getAgeScoreJson():
ageScoreMap = {}
ageScoreMap['ages'] = ['Growth']
ageScoreMap['ageNames'] = []
sql = r'select DISTINCT age from movie ORDER BY age desc'
ageList = getPureList(getJsonData(sql))
# print(ageList)
for age in ageList:
avgScoreList = []
for type in typeNameList:
sql = r"select avg(score) from movie where age = '{}' and type like '%{}%'".format(age, type)
avgScore = str(getPureList(getJsonData(sql))).strip("['").strip("']")
if avgScore == 'None':
avgScore = 0
avgScoreList.append(round(float(avgScore)))
ageScoreMap[str(age)] = avgScoreList
ageScoreMap['ages'].append(str(age))
# ageScoreMap['ageNames'].append('result.type' + str(age))
ageScoreMap['names'] = typeNameList
return ageScoreMap
def writeAgeScoreJsonFile(path):
with open(path, 'w') as f:
json.dump(getAgeScoreJson(), f)
writeAgeScoreJsonFile(r'C:\Users\Administrator\Desktop\books\movieAgeScoreJson.txt')
- 前端页面:
<!DOCTYPE html>
<html style="height: 100%">
<head>
<meta charset="utf-8">
</head>
<body style="height: 150%; margin: 0">
<div id="container" style="height: 100%"></div>
<script type="text/javascript" src="http://echarts.baidu.com/gallery/vendors/echarts/echarts.min.js"></script>
<script type="text/javascript" src="http://echarts.baidu.com/gallery/vendors/echarts-gl/echarts-gl.min.js"></script>
<script type="text/javascript" src="http://echarts.baidu.com/gallery/vendors/echarts-stat/ecStat.min.js"></script>
<script type="text/javascript" src="http://echarts.baidu.com/gallery/vendors/echarts/extension/dataTool.min.js"></script>
<script type="text/javascript" src="http://echarts.baidu.com/gallery/vendors/echarts/map/js/china.js"></script>
<script type="text/javascript" src="http://echarts.baidu.com/gallery/vendors/echarts/map/js/world.js"></script>
<script type="text/javascript" src="http://api.map.baidu.com/api?v=2.0&ak=ZUONbpqGBsYGXNIYHicvbAbM"></script>
<script type="text/javascript" src="http://echarts.baidu.com/gallery/vendors/echarts/extension/bmap.min.js"></script>
<script type="text/javascript" src="http://echarts.baidu.com/gallery/vendors/simplex.js"></script>
<script type="text/javascript" src="C:\Users\Administrator\Desktop\books\jquery.min.js"></script>
<script type="text/javascript">
var dom = document.getElementById("container");
var myChart = echarts.init(dom);
var app = {};
option = null;
myChart.showLoading();
$.get('../Desktop/books/movieAgeScoreJson.txt', function (result) {
result = JSON.parse(result);
//alert(result)
var series = []
for (var i=1;i<=result.ages.length;i++) {
series.push({
name: result.ages[i],
type: 'bar',
data: result[result.ages[i]]
})
}
myChart.hideLoading();
option = {
tooltip : {
trigger: 'axis',
axisPointer: {
type: 'shadow',
label: {
show: true
}
}
},
toolbox: {
show : true,
feature : {
mark : {show: true},
dataView : {show: true, readOnly: false},
magicType: {show: true, type: ['line', 'bar']},
restore : {show: true},
saveAsImage : {show: true}
}
},
calculable : true,
legend: {
data: result.ages,
itemGap: 5
},
grid: {
top: '12%',
left: '1%',
right: '10%',
containLabel: true
},
xAxis: [
{
type : 'category',
data : result.names
}
],
yAxis: [
{
type : 'value',
name : 'average score',
axisLabel: {
formatter: function (a) {
//alert(a)
return a;
}
}
}
],
dataZoom: [
{
show: true,
start: 94,
end: 100
},
{
type: 'inside',
start: 94,
end: 100
},
{
show: true,
yAxisIndex: 0,
filterMode: 'empty',
width: 30,
height: '80%',
showDataShadow: false,
left: '93%'
}
],
series : series
};
myChart.setOption(option);
});;
if (option && typeof option === "object") {
myChart.setOption(option, true);
}
</script>
</body>
</html>
-
生成图表结果:
tips:
- 其实还可以生成词云图、折线图等各种其他形式图表;
- 本文只对电影表进行了分析,并没有对演员表、评论表、获奖表分析;
- 以后有时间再扩展;