select
dep_city,
arr_city,
count(distinct order_no) as order_num
from
tmp.big_traffic_data_three_new
group by dep_city,arr_city
对这条sql的理解,有几条航线结果肯定就是几条数据,如果一个order_no在同一个航线中出现了两次,那肯定会被去重的,但如果她是在多条航线中各出现了一次那肯定会被保存多次的。
hive> select * from tmp.0703testxhh;
OK
zs bj
zs sh
zs sh
Time taken: 0.137 seconds, Fetched: 3 row(s)
hive> select count(distinct name) over(partition by diqu) from tmp.0703testxhh;
OK
1
1
1
Time taken: 19.37 seconds, Fetched: 3 row(s)
hive> insert into tmp.0703testxhh values('zs','sh');
OK
Time taken: 12.382 seconds
hive> select count(distinct name) over(partition by diqu) from tmp.0703testxhh;
OK
1
1
1
1
Time taken: 22.6 seconds, Fetched: 4 row(s)
hive> select count(name) over(partition by diqu) from tmp.0703testxhh;
OK
1
3
3
3
Time taken: 18.37 seconds, Fetched: 4 row(s)
hive> select * from tmp.0703testxhh;
OK
zs bj
zs sh
zs sh
zs sh
Time taken: 0.174 seconds, Fetched: 4 row(s)
hive> select count(1) from tmp.0703testxhh group by name;
OK
4
Time taken: 20.017 seconds, Fetched: 1 row(s)
hive> select name,count(1) over(partition by diqu) from tmp.0703testxhh;
OK
zs 1
zs 3
zs 3
zs 3
Time taken: 20.51 seconds, Fetched: 4 row(s)
hive>
hive> select * from tmp.0703testxhh;
OK
zs bj
zs sh
zs sh
zs sh
ls hn
ww sh
Time taken: 0.152 seconds, Fetched: 6 row(s)
hive> select name,diqu,count(distinct name) over (partition by diqu) from tmp.0703testxhh;
OK
zs bj 1
ls hn 1
ww sh 2
zs sh 2
zs sh 2
zs sh 2
Time taken: 20.424 seconds, Fetched: 6 row(s)
hive>