SQL进阶笔记(Analytical Function)

Analytical Function

SELECT SALES_DATE, ORDER_ID, PRODUCT_ID, SALES_AMOUNT,

AVG(SALES_AMOUNT) OVER()AS AVG_SALE_AMNT,

AVG(SALES_AMOUNT) OVER(PARTITION BY SALES_DATE) AS AVG_BY_DATE,

AVG(SALES_AMOUNT) OVER(PARTITION BY TRUNC(SALES_DATE, 'MM')) AS AVG_BY_MONTH

FROM SALES;


SELECT SALES_DATE, ORDER_ID, PRODUCT_ID, SALES_AMOUNT,

SUM(SALES_AMOUNT) OVER (ORDER BY SALES_DATE) AS CUM_SUM

FROM SALES;

注意:要删除表中的重复字段,方式如下:

create table tmp_t3 as select distinct * from t3;

drop table t3;

alter table tmp_t2 rename to t3;


SELECT TRUNC(SALES_DATE, 'MON') AS SALES_MONTH,

SUM(SALES_AMOUNT) AS SALES_AMOUNT

FROM SALES

GROUP BY TRUNC(SALES_DATE, 'MON')

ORDER BY 1; 按第一列排序


RATIO_TO_REPORT() 括号中就是分子,over()中是分母,分母缺省就是整个占比。

SELECT TRUNC(SALES_DATE,'MON') AS SALES_MONTH,

SUM(SALES_AMOUNT) AS SALES_AMOUNT,

ROUND(RATIO_TO_REPORT(SUM(SALES_AMOUNT)) OVER() * 100,2) AS RATIO

FROM SALES

GROUP BY TRUNC(SALES_DATE, 'MON')

ORDER BY 1

SQL> SELECT

  2    empno,ename,ename,hiredate,sal,deptno,

  3    ratio_to_report(sal) over () as pct1l, 

  4    ratio_to_report(sal) over (partition by deptno) as pct2

  5  FROM emp;

 PCT1L是每个SAL占所有记录的SAL的百分比。比如EMPNO=7782的这行,2450/29025=.084409991

 PCT2是每个SAL占自己所在部门的百分比,还拿EMPNO=7782的这行, 2450/8750=0.28

ratio_to_report是不支持order by的。


RANK

SELECT TRUNC(SALES_DATE, 'MON') AS SALES_MONTH,

SP.FIRST_NAME,

SUM(SALES_AMOUNT) AS SALES_AMOUNT,

RANK() OVER(PARTITION BY TRUNC(SALES_DATE, 'MON') ORDER BY SUM(SALES_AMOUNT) DESC) AS RANK1

FROM SALES S, SALESPERSON SP

WHERE S.SALESPERSON_ID = SP.SALESPERSON_ID

GROUP BY TRUNC(SALES_DATE, 'MON'),SP.FIRST_NAME

ORDER BY 1;



TOP N ANALYSIS

SELECT * FROM

(

    SELECT TRUNC(SALES_DATE, 'MON') AS SALES_MONTH,

    SP.FIRST_NAME,

    SUM(TOTAL_AMOUNT) AS SALES_AMOUNT,

    RANK()

    OVER(PARTITION BY TRUNC(S.SALES_DATE, 'MON')

        ORDER BY SUM(SALES_AMOUNT) DESC) AS SALESPERSON_RANK_TOP

    FROM SALES S, SALESPERSON SP

    WHERE S.SALESPERSON_ID = SP.SALESPERSON_ID

    GROUP BY TRUNC(SALES_DATE, 'MON'),SP.FIRST_NAME

)

WHERE SALESPERSON_RANK_TOP <= 3


NTILE

SELECT SP.FIRST_NAME,

SUM(SALES_AMOUNT) AS SALES_AMOUNT,

NTILE(3) OVER (ORDER BY SUM(SALES_AMOUNT) DESC) AS BAND

FROM SALES S, SALESPERSON SP

WHERE S.SALESPERSON_ID = SP.SALESPERSON_ID

GROUP BY SP.FIRST_NAME

ORDER BY 3

注:因为已经使用了聚合函数,所以不能在over()中使用partition by来给数据分组。


LAG AND LEAD 

lag与lead函数是跟偏移量相关的两个分析函数,通过这两个函数可以在一次查询中取出同一字段的前N行的数据(lag)和后N行的数据(lead)作为独立的列,从而更方便地进行进行数据过滤。这种操作可以代替表的自联接,并且LAG和LEAD有更高的效率。

over()表示 lag()与lead()操作的数据都在over()的范围内,他里面可以使用partition by 语句(用于分组) order by 语句(用于排序)。partition by a order by b表示以a字段进行分组,再 以b字段进行排序,对数据进行查询。

例如:lead(field, num, defaultvalue) field需要查找的字段,num往后查找的num行的数据,defaultvalue没有符合条件的默认值。

SELECT TRUNC(SALES_DATE,'MON') AS SALES_MONTH,

SUM(SALES_AMOUNT) AS SALES_AMOUNT,

LAG(SUM(SALES_AMOUNT),1) OVER (ORDER BY TRUNC(SALES_DATE, 'MON')) AS PREVIOUS_MONTH,

LEAD(SUM(SALES_AMOUNT),1) OVER (ORDER BY TRUNC(SALES_DATE, 'MON')) AS NEXT_MONTH

FROM SALES 

GROUP BY TRUNC(SALES_DATE, 'MON')

ORDER BY 1


SALES GROWTH ACROSS TIME

SELECT SALES_MONTH,

SALES_AMOUNT,

PREVIOUS_MONTH,

ROUND((SALES_AMOUNT - PREVIOUS_MONTH)/PREVIOUS_MONTH * 100,2) AS GROWTH_PERC

FROM

(

SELECT TRUNC(SALES_DATE,'MON') AS SALES_MONTH,

SUM(SALES_AMOUNT) AS SALES_AMOUNT,

LAG(SUM(SALES_AMOUNT),1) OVER (ORDER BY TRUNC(SALES_DATE, 'MON')) AS PREVIOUS_MONTH,

LEAD(SUM(SALES_AMOUNT),1) OVER (ORDER BY TRUNC(SALES_DATE, 'MON')) AS NEXT_MONTH

FROM SALES S

GROUP BY TRUNC(SALES_DATE, 'MON')

)

最后编辑于
©著作权归作者所有,转载或内容合作请联系作者
平台声明:文章内容(如有图片或视频亦包括在内)由作者上传并发布,文章内容仅代表作者本人观点,简书系信息发布平台,仅提供信息存储服务。

推荐阅读更多精彩内容