利用Spark进行网站日志的分析与处理。
源数据及相关文档 链接: 链接: https://pan.baidu.com/s/19sbUpJsYfqypTIXmuI9DwQ 提取码: vwjk
代码共分为6个包 分别为spark, service, controller, model, dao, utils.
spark包:(CleanData)
CleanData.scala
service包: (
CityService ,
FlowService , UrlService)
CityService.scala
FlowService.scala
UrlService.scala
controller包:(CityDaoController, FlowDaoController, UrlDaoController)
CityDaoController.scala
FlowDaoController.scala
UrlDaoController.scala
model包:(City, Flow, Url)
City.scala
case class City (day:String, cmsId:Long, city:String, times:Long, times_rank:Long)
Flow.scala
case class Flow (day:String, cmsId:Long, flowTotal:Long)
Url.scala
case class Url(day:String, cmsId:Long, times:Long)
dao包:(CityDao, FlowDao, UrlDao)
CityDao.scala
FlowDao.scala
UrlDao.scala
utils包:(MysqlUtils, AllFieldsUtils, GetTimeUtils, MinuteUtils)
MysqlUtils.scala
AllFieldsUtils.scala
GetTimeUtils.scala
MinuteUtils.scala
Mysql建表
表格类型
code
video
ceping
learn
article
u
qadetail
comment
建数据库
CREATE DATABASE `SparkProject` CHARACTER SET utf8 COLLATE utf8_general_ci
建表
create table code(
day varchar(20) not null,
cmsId bigint(10) not null,
times bigint(10) not null,
primary key(day,cmsId)
)ENGINE=MyISAM DEFAULT CHARSET=utf8;
建表
create table code_flow (
day varchar(20) not null,
cmsId bigint(10) not null,
flowTotal bigint(10) not null
)ENGINE=MyISAM DEFAULT CHARSET=utf8;
建表
create table code_city (
day varchar(20) not null,
cmsId bigint(10) not null,
city varchar(20) not null,
times bigint(10) not null,
times_rank bigint(10) not null
)ENGINE=MyISAM DEFAULT CHARSET=utf8;