概述
DWS完成后,接下来就是进行ADS建模。统计各个一级品类下,品盘月单次复购率和多次复购率。
ODDA建模
1. 建表
-- 进入数据库
use mall;
-- 创建品牌复购率表
drop table ads_sale_tm_category1_stat_mn;
create table ads_sale_tm_category1_stat_mn
(
tm_id string comment '品牌id ' ,
category1_id string comment '1级品类id ',
category1_name string comment '1级品类名称 ',
buycount bigint comment '购买人数',
buy_twice_last bigint comment '两次以上购买人数',
buy_twice_last_ratio decimal(10,2) comment '单次复购率',
buy_3times_last bigint comment '三次以上购买人数',
buy_3times_last_ratio decimal(10,2) comment '多次复购率' ,
stat_mn string comment '统计月份',
stat_date string comment '统计日期'
) COMMENT '复购率统计'
row format delimited fields terminated by '\t'
location '/warehouse/mall/ads/ads_sale_tm_category1_stat_mn/'
;
执行建表语句:
hive -f /home/warehouse/sql/ads_sale_ddl.sql
2. 脚本
#!/bin/bash
# 定义变量方便修改
APP=mall
hive=hive
# 如果是输入的日期按照取输入日期;如果没输入日期取当前时间的前一天
if [ -n $1 ] ;then
log_date=$1
else
log_date=`date -d "-1 day" +%F`
fi
sql="
set hive.exec.dynamic.partition.mode=nonstrict;
insert into table "$APP".ads_sale_tm_category1_stat_mn
select
mn.sku_tm_id,
mn.sku_category1_id,
mn.sku_category1_name,
sum(if(mn.order_count>=1,1,0)) buycount,
sum(if(mn.order_count>=2,1,0)) buyTwiceLast,
sum(if(mn.order_count>=2,1,0))/sum( if(mn.order_count>=1,1,0)) buyTwiceLastRatio,
sum(if(mn.order_count>3,1,0)) buy3timeLast ,
sum(if(mn.order_count>=3,1,0))/sum( if(mn.order_count>=1,1,0)) buy3timeLastRatio ,
date_format('$log_date' ,'yyyy-MM') stat_mn,
'$log_date' stat_date
from
(
select od.sku_tm_id,
od.sku_category1_id,
od.sku_category1_name,
user_id ,
sum(order_count) order_count
from "$APP".dws_sale_detail_daycount od
where date_format(dt,'yyyy-MM')<=date_format('$log_date' ,'yyyy-MM')
group by od.sku_tm_id, od.sku_category1_id, od.sku_category1_name, user_id
) mn
group by mn.sku_tm_id, mn.sku_category1_id, mn.sku_category1_name;
执行脚本
./ads_sale.sh 2021-03-24
3. 结果
hive> select * from ads_sale_tm_category1_stat_mn limit 2;
OK
NULL NULL NULL 152 141 0.93 116 0.84 2021-03 2021-03-24
Time taken: 0.955 seconds, Fetched: 1 row(s)