1、 CUPED介绍
(参考:https://blog.csdn.net/qq_38412868/article/details/128926536)

2、Hive SQL代码参考
with stats_info as (
--进组id 的消耗处理
select
sample_id --样本id
,version_id --实验分组
,1 as join_key
,x1_30d_bef --样本在实验进组前30天的累计值x1_30d_bef
,x1_td --样本在实验中的累计表现x1_td
from table
)
,theta_data as (
--计算整体的 theta,大盘系数
select
1 as join_key
,nvl(covar_samp(x1_td, x1_30d_bef)/variance(x1_30d_bef), 0) as x1_theta --系数
,avg(case when version_id = 'xxx' then coalesce(x1_30d_bef, 0) end) as x1_mean --对照组样本在进组前的30日均值
from stats_info
)
select
sample_id
,version_id
,x1_td - x1_theta*x1_bef_30d + x1_theta*x1_mean as cuped_x1
from stats_info aa left join theta_data bb
on aa.join_key = bb.join_key