有关开窗函数的基本语法参照 参考地址
1.COUNT
包括类似的SUM、AVG、MIN、MAX,都是用于实现分组内的统计
需求案例:一个目的地,用户可能通过三种路径到达,一天可以到达多次,统计出只通过A路径到达目标的人数、次数
用户 | 路径 | 目标 |
---|---|---|
uid1 | A | Target1 |
uid1 | B | Target1 |
uid2 | A | Target1 |
uid2 | A | Target2 |
uid3 | A | Target2 |
uid3 | B | Target2 |
uid3 | A | Target2 |
--建表
create table log (uid string,path string,target string);
insert into log values('uid1','A','Target1');
insert into log values('uid1','B','Target1');
insert into log values('uid2','A','Target1');
insert into log values('uid2','A','Target2');
insert into log values('uid3','A','Target2');
insert into log values('uid3','B','Target2');
insert into log values('uid3','A','Target2');
--统计单用户的目标到达次数
with push as (
select uid,path,target,count(*) as v1
from log
group by uid,path,target
),
--使用窗口函数统计单个目标到达的路径个数
stat as (
select uid,path,target,v1,count(path) over(partition by uid,target) as v2
from push
)
--筛选+汇总
select count(distinct uid),sum(v1)
from stat
where v2 = 1 and path= 'A'