测试数据
[root@master hive]# cat tmp_data/phy_opt_course.data
zhangsan 17181515 class101 badminton
lisi 17151716 class101 football
wangwu 17181717 class102 swimming
mike 17181718 class101 swimming
kate 17181719 class102 football
[root@master hive]# cat -A tmp_data/phy_opt_course.data
zhangsan^I17181515^Iclass101^Ibadminton$
lisi^I17151716^Iclass101^Ifootball$
wangwu^I17181717^Iclass102^Iswimming$
mike^I17181718^Iclass101^Iswimming$
kate^I17181719^Iclass102^Ifootball$
建表语句
[root@master hive]# cat /opt/phy_opt_course.hive
create table phy_opt_course(stname string, stID int, class string, opt_cour string)
row format delimited fields terminated by '\t'
lines terminated by '\n'
stored as textfile;
[root@master hive]# cat /opt/phy_opt_count.hive
create table phy_opt_count
(opt_course string, course_count int)
row format delimited
fields terminated by '\t'
lines terminated by '\n'
stored as textfile;
加载数据
hive> load data local inpath './tmp_data/phy_opt_course.data'
> into table phy_opt_course;
Loading data to table default.phy_opt_course
OK
Time taken: 2.41 seconds
执行计算并存入表中
hive> insert overwrite table phy_opt_count select phy_opt_course.opt_cour,count(phy_opt_course.stID) from phy_opt_course group by phy_opt_course.opt_cour;
hive> select * from phy_opt_count;
OK
badminton 1
football 2
swimming 2
Time taken: 0.168 seconds, Fetched: 3 row(s)