找出全部夺得3连贯的队伍
+----+-----+
| team | year |
+----+-----+
活塞,1990
公⽜牛,1991
公⽜牛,1992
公⽜牛,1993
⽕火箭,1994
⽕火箭,1995
公⽜牛,1996
公⽜牛,1997
公⽜牛,1998
⻢马刺刺,1999
湖⼈人,2000
湖⼈人,2001
湖⼈人,2002
⻢马刺刺,2003
活塞,2004
⻢马刺刺,2005
热⽕火,2006
⻢马刺刺,2007
凯尔特⼈人,2008
湖⼈人,2009
湖⼈人,2010
+----+-----+
create table job1(
team string,
year int
)row format delimited fields terminated by ',';
*【方法一】思路:
- 上一行等于第一行的year
select *
from(
select
concat(year, '-', lag(year,2) over()) years,
team n1,
lag(team,1) over() n2,
lag(team,2) over() n3
from job1
) t1
where n1=n2 and n2=n3;
*【方法二】思路:
- 3次是可变,连续,team是相同,年份应该是递增的,需要先对年费进行排序
- 排序分组大致模型为
row_number() over(partition by num order by id)
- 完成上步,两个相减之后值是一样的,而且如果不连续的话相减值也不一样
select team
from (
select team,
(row_number() over(order by year)-row_number() over(partition by team order by year)) rank_
from job1
) tmp
group by rank_,team
having count(rank_)>=3;
找出每个id在一天之内所有的波峰与波⾕谷值
+----+-----+
| id | time | price |
+----+-----+-----+
sh66688,9:35,29.48
sh66688,9:40,28.72
sh66688,9:45,27.74
sh66688,9:50,26.75
sh66688,9:55,27.13
sh66688,10:00,26.30
sh66688,10:05,27.09
sh66688,10:10,26.46
sh66688,10:15,26.11
sh66688,10:20,26.88
sh66688,10:25,27.49
sh66688,10:30,26.70
sh66688,10:35,27.57
sh66688,10:40,28.26
sh66688,10:45,28.03
sh66688,10:50,27.36
sh66688,10:55,26.48
sh66688,11:00,27.41
sh66688,11:05,26.70
sh66688,11:10,27.35
sh66688,11:15,27.35
sh66688,11:20,26.63
sh66688,11:25,26.35
sh66688,11:30,26.81
sh66688,13:00,29.45
sh66688,13:05,29.41
sh66688,13:10,29.10
sh66688,13:15,28.24
sh66688,13:20,28.20
sh66688,13:25,28.59
sh66688,13:30,29.49
sh66688,13:35,30.45
sh66688,13:40,30.31
sh66688,13:45,30.17
sh66688,13:50,30.55
sh66688,13:55,30.75
sh66688,14:00,30.03
sh66688,14:05,29.61
sh66688,14:10,29.96
sh66688,14:15,30.79
sh66688,14:20,29.82
sh66688,14:25,30.09
sh66688,14:30,29.61
sh66688,14:35,29.88
sh66688,14:40,30.36
sh66688,14:45,30.88
sh66688,14:50,30.73
sh66688,14:55,30.76
sh88888,9:35,67.23
sh88888,9:40,66.56
sh88888,9:45,66.73
sh88888,9:50,67.43
sh88888,9:55,67.49
sh88888,10:00,68.34
sh88888,10:05,68.13
sh88888,10:10,67.35
sh88888,10:15,68.13
+----+-----+-----+
create table job2(
id string,
time string,
price double
)row format delimited fields terminated by ',';
思路:
- 按id分组,排名
(
select id, price, time, nvl(null, '波峰') feature
from (select id, price, time, dense_rank() over (partition
by id order by price desc) rank
from job2) tmp1
where rank = 1
)
union all
(
select id, price, time, nvl(null, '波谷') feature
from (select id, price, time, dense_rank() over (partition
by id order by price) rank
from job2) tmp2
where rank = 1
);
计算在线时长
934e8bee978a42c7a8dbb4cfa8af0b4f 2020/05/28 17:02 hhttps://www.xxx.com/jobs/9590606.html?show=IEEE1FIJ3106A1H062HA
934e8bee978a42c7a8dbb4cfa8af0b4f 2020/05/28 17:23 hhttps://www.xxx.com/jobs/998375.html?show=EC1JGEC8G3HJC82JIHCD
934e8bee978a42c7a8dbb4cfa8af0b4f 2020/05/28 17:09 hhttps://www.xxx.com/jobs/8205098.html?show=G75J62JE63JE3678G98F
934e8bee978a42c7a8dbb4cfa8af0b4f 2020/05/28 17:12 hhttps://www.xxx.com/jobs/2280203.html?show=1957CGIA1702C1J9F0GH
934e8bee978a42c7a8dbb4cfa8af0b4f 2020/05/28 17:31 hhttps://www.xxx.com/jobs/5921958.html?show=BJ9CJJ6F0GH0CDGGHCCB
934e8bee978a42c7a8dbb4cfa8af0b4f 2020/05/28 17:34 hhttps://www.xxx.com/jobs/2569616.html?show=G5472AH6G1I61CGF9HGC
--------
create table job3(
id string,
dt string,
browseid string
) row format delimited fields terminated by '\t';
每个id浏览时长、步⻓
select id,sum(step) sum_time,max(rank) sum_step
from (select id, dt,browseid,row_number() over (partition by id order by dt) rank,
(unix_timestamp(dt, 'yyyy/MM/dd HH:mm') -
unix_timestamp(nvl(lag(dt) over (partition by id order by dt),dt), 'yyyy/MM/dd HH:mm'))/60 as step from job3) tmp1
group by id;
如果两次浏览之间的间隔超过30分钟,认为是两个不不同的浏览时间;再求每个id浏览时⻓、步⻓
firstTypeselect id,(max(unix_timestamp(dt, 'yyyy/MM/dd HH:mm'))
- min(unix_timestamp(dt, 'yyyy/MM/dd HH:mm')))/60 as period,count(id) step
from (select id, dt,browseid,rank,minuxBefore,type,
sum(type) over (partition by id order by dt rows
between unbounded preceding and current row) as firstType
from(select id, dt,browseid,rank,minuxBefore,type
from (select id, dt,browseid,
row_number() over (partition by id order by dt) rank,
(unix_timestamp(dt, 'yyyy/MM/dd HH:mm') - unix_timestamp(nvl(lag(dt) over (partition by id order by dt),dt), 'yyyy/MM/dd HH:mm'))/60 minuxBefore,
case when (unix_timestamp(dt, 'yyyy/MM/dd HH:mm')
- unix_timestamp(nvl(lag(dt) over (partition by id order by dt),dt), 'yyyy/MM/dd HH:mm'))/60 >=30 then 1
else 0
end type
from job3
)t4)t5)t6 group by id,