场景
医疗服务系统中有一个表-指标明细表(metrics_detail_source_table),求:根据此表,统计每家医疗机构每个月的门诊人次增减率。
计算思路
门诊人次增减率:(本期门诊人次-上期门诊人次)/上期门诊人次×100%,即环比
数据来源表
-
如下图metrics_detail_source_table
-- 创建数据来源表
create table metrics_detail_source_table
(
yljgdm varchar(255),
calculate_time varchar(255),
dimension_list varchar(255),
dimension_id varchar(255),
dimension_value varchar(255),
metrics_id varchar(255),
metrics_value varchar(255),
task_id varchar(255)
);
-- 准备数据
insert into metrics_detail_source_table
values('420001234','2022-09-11','yljgdm,ghrq','yljgdm','420001234','MZJZ_02','13985','1'),
('420001234','2022-09-11','yljgdm,ghrq','ghrq','2022-09-11','MZJZ_02','13985','1'),
('420001234','2022-09-11','yljgdm,ghrq','yljgdm','420001234','MZJZ_02','13985','2'),
('420001234','2022-09-11','yljgdm,ghrq','ghrq','2022-09-11','MZJZ_02','13985','2'),
('420001234','2022-09-18','yljgdm,ghrq','yljgdm','420001234','MZJZ_02','2682','1'),
('420001234','2022-09-18','yljgdm,ghrq','ghrq','2022-09-18','MZJZ_02','2682','1'),
('420001234','2022-10-02','yljgdm,ghrq','yljgdm','420001234','MZJZ_02','29001','1'),
('420001234','2022-10-02','yljgdm,ghrq','ghrq','2022-10-02','MZJZ_02','29001','1'),
('420001234','2022-10-02','yljgdm,ghrq','yljgdm','420001234','MZJZ_02','29001','2'),
('420001234','2022-10-02','yljgdm,ghrq','ghrq','2022-10-02','MZJZ_02','29001','2'),
('420001234','2022-11-16','yljgdm,ghrq','yljgdm','420001234','MZJZ_02','7810','1'),
('420001234','2022-11-16','yljgdm,ghrq','ghrq','2022-11-16','MZJZ_02','7810','1'),
('420001234','2022-11-21','yljgdm,ghrq','yljgdm','420001234','MZJZ_02','1291','1'),
('420001234','2022-11-21','yljgdm,ghrq','ghrq','2022-11-21','MZJZ_02','1291','1'),
('420001234','2022-12-25','yljgdm,ghrq','yljgdm','420001234','MZJZ_02','91142','1'),
('420001234','2022-12-25','yljgdm,ghrq','ghrq','2022-12-25','MZJZ_02','91142','1'),
('419021921','2021-01-28','yljgdm,ghrq','yljgdm','419021921','MZJZ_02','2781','1'),
('419021921','2021-01-28','yljgdm,ghrq','ghrq','2021-01-28','MZJZ_02','2781','1'),
('419021921','2022-03-23','yljgdm,ghrq','yljgdm','419021921','MZJZ_02','7810','1'),
('419021921','2022-03-23','yljgdm,ghrq','ghrq','2022-03-23','MZJZ_02','7810','1'),
('419021921','2022-05-07','yljgdm,ghrq','yljgdm','419021921','MZJZ_02','2610','1'),
('419021921','2022-05-07','yljgdm,ghrq','ghrq','2022-05-07','MZJZ_02','2610','1'),
('420001234','2022-09-11','yljgdm,ghrq','yljgdm','420001234','CW_04','128','1'),
('420001234','2022-09-11','yljgdm,ghrq','ghrq','2022-09-11','CW_04','128','1');
-- 数据介绍
metrics_id为MZJZ_02的指标代表线下普通门诊人次数,即每家医疗机构每一天的门诊人数
-- 字段解释
yljgdm:医疗机构代码,代表此家医疗机构的唯一id
calculate_time:计算时间,代表指标结果计算的时间
dimension_list:维度列表,代表此指标是按照哪些维度去计算的
dimension_id:维度id,代表此指标计算的单个维度名称,指标明细表的格式是按照维度列表
拆分成每个维度的多行结果来保存的,本次列举的每家医院的计算维度为yljgdm和ghrq(即挂
号日期),本来只需要显示一行结果即可,但是业务需要拆分成2行,即yljgdm维度显示一行结
果值、ghrq维度显示一行结果值,两次的结果值一样
dimension_value:代表维度id的具体值
metrics_id:指标id,代表此指标的唯一id
metrics_vlue:指标值,代表此指标的计算结果
task_id:任务id,代表此次的指标结果是第几次被计算的
目标结果表
-- 目标结果表
create table target_table(
yljgdm varchar(255),
calculate_time varchar(255),
dimension_list varchar(255),
dimension_id varchar(255),
dimension_value varchar(255),
metrics_id varchar(255),
mzrc_rate varchar(255)
);
-- 目标结果表写入字段介绍
yljgdm:医疗机构代码
calculate_time:计算的门诊月份
dimension_list:'yljgdm,mzyf'
dimension_id:两行,即'yljgdm'、'mzyf'
dimension_value:对应dimension_id,写入具体的医疗机构代码或门诊月份
metrics_id:此次门诊人次增减率指标的id,'FHZB_06'
mzrc_rate:门诊人次增减率的结果保存字段
实现过程
- 如何算出门诊人次增减率?
-- tips:用到的函数 --
dense_rank() over()
substr()
case when then else end
group by
sum()
lag() over()
concat()
round()
- 具体实现过程
insert into target_table
select t1.yljgdm,
t1.yearmonth ,
'yljgdm,mzyf',
t1.dimension_id,
t1.dimension_value,
'FHZB_06',
concat(
round(
(t1.metrics_value-lag(t1.metrics_value,2) over(partition by t1.yljgdm order by t1.yearmonth))
/
lag(t1.metrics_value,2) over(partition by t1.yljgdm order by t1.yearmonth)*100
,2)
,'%')
from
(
select yljgdm,
yearmonth,
dimension_id,
dimension_value,
sum(metrics_value) as metrics_value
from
(
select yljgdm,
substr(calculate_time,1,7) as yearmonth,
case dimension_id when 'ghrq' then 'mzyf' else dimension_id end as dimension_id,
case dimension_value when calculate_time then substr(dimension_value,1,7) else dimension_value end as dimension_value,
metrics_value,
dense_rank() over(partition by yljgdm,calculate_time order by task_id desc) as rn
from metrics_detail_source_table where metrics_id = 'MZJZ_02'
) rn
where rn = 1
group by yljgdm,yearmonth,dimension_id,dimension_value
) t1;
- 结果展示
select * from target_table

target_table
- 知识点
1、lag(要返回的值,向下移动几行,如果为null默认返回的值) over(partition by order by )
示例:
select t1.yljgdm,
t1.yearmonth,
t1.dimension_id,
t1.dimension_value,
t1.metrics_value as this_month_metrics_value,
lag(t1.metrics_value,2) over(partition by t1.yljgdm order by t1.yearmonth) as last_month_metrics_value,
concat(
round(
(t1.metrics_value-lag(t1.metrics_value,2) over(partition by t1.yljgdm order by t1.yearmonth))
/
lag(t1.metrics_value,2) over(partition by t1.yljgdm order by t1.yearmonth)*100
,2)
,'%') as mzrc_rate
from
(
select yljgdm,
yearmonth,
dimension_id,
dimension_value,
sum(metrics_value) as metrics_value
from
(
select yljgdm,
substr(calculate_time,1,7) as yearmonth,
case dimension_id when 'ghrq' then 'mzyf' else dimension_id end as dimension_id,
case dimension_value when calculate_time then substr(dimension_value,1,7) else dimension_value end as dimension_value,
metrics_value,
dense_rank() over(partition by yljgdm,calculate_time order by task_id desc) as rn
from metrics_detail_source_table where metrics_id = 'MZJZ_02'
) rn
where rn = 1
group by yljgdm,yearmonth,dimension_id,dimension_value
) t1;

sql运行结果
2、row_number、rank、dense_rank的区别?
简单来说
row_number :1234,没有重复排名
rank:1224,有重复排名,会跳过重复排名
dense_rank:1223,有重复排名,不跳过重复排名
