利用Spark进行网站日志的分析与处理。
源数据及相关文档 链接: 链接: https://pan.baidu.com/s/19sbUpJsYfqypTIXmuI9DwQ 提取码: vwjk
代码共分为6个包 分别为spark, service, controller, model, dao, utils.
spark包:(CleanData)
CleanData.scala


service包: (
CityService ,
FlowService , UrlService)
CityService.scala

FlowService.scala

UrlService.scala

controller包:(CityDaoController, FlowDaoController, UrlDaoController)
CityDaoController.scala


FlowDaoController.scala


UrlDaoController.scala


model包:(City, Flow, Url)
City.scala
case class City (day:String, cmsId:Long, city:String, times:Long, times_rank:Long)
Flow.scala
case class Flow (day:String, cmsId:Long, flowTotal:Long)
Url.scala
case class Url(day:String, cmsId:Long, times:Long)
dao包:(CityDao, FlowDao, UrlDao)
CityDao.scala

FlowDao.scala

UrlDao.scala

utils包:(MysqlUtils, AllFieldsUtils, GetTimeUtils, MinuteUtils)
MysqlUtils.scala

AllFieldsUtils.scala


GetTimeUtils.scala

MinuteUtils.scala

Mysql建表
表格类型
code
video
ceping
learn
article
u
qadetail
comment
建数据库
CREATE DATABASE `SparkProject` CHARACTER SET utf8 COLLATE utf8_general_ci
建表
create table code(
day varchar(20) not null,
cmsId bigint(10) not null,
times bigint(10) not null,
primary key(day,cmsId)
)ENGINE=MyISAM DEFAULT CHARSET=utf8;
建表
create table code_flow (
day varchar(20) not null,
cmsId bigint(10) not null,
flowTotal bigint(10) not null
)ENGINE=MyISAM DEFAULT CHARSET=utf8;
建表
create table code_city (
day varchar(20) not null,
cmsId bigint(10) not null,
city varchar(20) not null,
times bigint(10) not null,
times_rank bigint(10) not null
)ENGINE=MyISAM DEFAULT CHARSET=utf8;
Jupyter画图
