概述
DWD完成后,接下来就是进行DWS建模。将具有相同分析主题的DWD层数据,聚合层宽表模型,便于数据分析和计算。
ODDA建模
从订单表、用户表和支付流水表,用户行为表
从订单表、订单详情表、商品表、用户表和支付流水表,用户购买商品明细表
1. 建表
-- 进入数据库
use mall;
-- 创建用户行为宽表
drop table if exists dws_user_action;
create external table dws_user_action
(
user_id string comment '用户 id',
order_count bigint comment '下单次数 ',
order_amount decimal(16,2) comment '下单金额 ',
payment_count bigint comment '支付次数',
payment_amount decimal(16,2) comment '支付金额 '
) COMMENT '每日用户行为宽表'
PARTITIONED BY ( `dt` string)
stored as parquet
location '/warehouse/mall/dws/dws_user_action/'
tblproperties ("parquet.compression"="snappy");
-- 创建用户购买商品明细表
drop table if exists dws_sale_detail_daycount;
create external table dws_sale_detail_daycount
( user_id string comment '用户 id',
sku_id string comment '商品 Id',
user_gender string comment '用户性别',
user_age string comment '用户年龄',
user_level string comment '用户等级',
order_price decimal(10,2) comment '订单价格',
sku_name string comment '商品名称',
sku_tm_id string comment '品牌id',
sku_category3_id string comment '商品三级品类id',
sku_category2_id string comment '商品二级品类id',
sku_category1_id string comment '商品一级品类id',
sku_category3_name string comment '商品三级品类名称',
sku_category2_name string comment '商品二级品类名称',
sku_category1_name string comment '商品一级品类名称',
spu_id string comment '商品 spu',
sku_num int comment '购买个数',
order_count string comment '当日下单单数',
order_amount string comment '当日下单金额'
) COMMENT '用户购买商品明细表'
PARTITIONED BY ( `dt` string)
stored as parquet
location '/warehouse/mall/dws/dws_user_sale_detail_daycount/'
tblproperties ("parquet.compression"="snappy");
执行建表语句:
hive -f /home/warehouse/sql/dws_ddl.sql
2. 脚本
#!/bin/bash
# 定义变量方便修改
APP=mall
hive=hive
# 如果是输入的日期按照取输入日期;如果没输入日期取当前时间的前一天
if [ -n $1 ] ;then
log_date=$1
else
log_date=`date -d "-1 day" +%F`
fi
# 用户行为宽表
function user_actions()
{
# 定义变量
APP=$1
hive=$2
log_date=$3
sql="
with
tmp_order as
(
select
user_id,
sum(oc.total_amount) order_amount,
count(*) order_count
from "$APP".dwd_order_info oc
where date_format(oc.create_time,'yyyy-MM-dd')='$log_date'
group by user_id
),
tmp_payment as
(
select
user_id,
sum(pi.total_amount) payment_amount,
count(*) payment_count
from "$APP".dwd_payment_info pi
where date_format(pi.payment_time,'yyyy-MM-dd')='$log_date'
group by user_id
)
insert overwrite table "$APP".dws_user_action partition(dt='$log_date')
select
user_actions.user_id,
sum(user_actions.order_count),
sum(user_actions.order_amount),
sum(user_actions.payment_count),
sum(user_actions.payment_amount)
from
(
select
user_id,
order_count,
order_amount ,
0 payment_count ,
0 payment_amount
from tmp_order
union all
select
user_id,
0,
0,
payment_count,
payment_amount
from tmp_payment
) user_actions
group by user_id;
"
$hive -e "$sql"
}
function user_sales()
{
# 定义变量
APP=$1
hive=$2
log_date=$3
sql="
set hive.exec.dynamic.partition.mode=nonstrict;
with
tmp_detail as
(
select
user_id,
sku_id,
sum(sku_num) sku_num ,
count(*) order_count ,
sum(od.order_price*sku_num) order_amount
from "$APP".dwd_order_detail od
where od.dt='$log_date' and user_id is not null
group by user_id, sku_id
)
insert overwrite table "$APP".dws_sale_detail_daycount partition(dt='$log_date')
select
tmp_detail.user_id,
tmp_detail.sku_id,
u.gender,
months_between('$log_date', u.birthday)/12 age,
u.user_level,
price,
sku_name,
tm_id,
category3_id ,
category2_id ,
category1_id ,
category3_name ,
category2_name ,
category1_name ,
spu_id,
tmp_detail.sku_num,
tmp_detail.order_count,
tmp_detail.order_amount
from tmp_detail
left join "$APP".dwd_user_info u
on u.id=tmp_detail.user_id and u.dt='$log_date'
left join "$APP".dwd_sku_info s on tmp_detail.sku_id =s.id and s.dt='$log_date';
"
$hive -e "$sql"
}
user_actions $APP $hive $log_date
3. 结果
hive> select * from dws_user_action limit 2;
OK
SLF4J: Failed to load class "org.slf4j.impl.StaticLoggerBinder".
SLF4J: Defaulting to no-operation (NOP) logger implementation
SLF4J: See http://www.slf4j.org/codes.html#StaticLoggerBinder for further details.
1 1 402 0 0 2021-03-24
10 1 205 1 205 2021-03-24
Time taken: 0.843 seconds, Fetched: 2 row(s)
hive> select * from dws_sale_detail_daycount limit 2;
OK
1 135 NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL 2 1 8958.0 2021-03-24
1 141 NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL 1 1 571.0 2021-03-24
Time taken: 0.119 seconds, Fetched: 2 row(s)