全排序优化:
把所有具有相同的行最终都在一个reducer分区中,
再在一个reducer中排序。
cluster by column=(distribute by column+sort by colum)
select t3.*
from (select t2.*
from (select dt,
account_id,
sum(recharge_money) as total_money, --3.由2. ,分组求和
row_number() over(partition by dt order by sum(recharge_money) desc) rank --4.由2. ,分组 排序
from (select dt, account_id, recharge_money
from dw.dw_app
where dt >= '2016-09-01'
and dt <= '2016-09-18'
and app_id = '16099'
and msgtype = 'role.recharge'
cluster by dt,account_id -- 1. 安装dt,分区并排序;cluster by col = (distribute by col + sort by col)
) t
group by dt, account_id -- 2. 按col 分组
) t2
where t2.rank <= 10 --5.过滤,取前十
) t3
order by t3.dt asc, rank asc limit 300; --6.排序,取300.