Hive2

Hive的部署
解压缩、重命名、设置环境变量
在目录$HIVE_HOME/conf/下,执行命令mv hive-default.xml.template hive-site.xml重命名
hive.metastore.warehouse.dir: Hive数据存储目录
hive.exec.scratchdir:Hive数据临时文件目录
在目录$HIVE_HOME/conf/下,执行命令mv hive-env.sh.template hive-env.sh重命名
修改hadoop的配置文件hadoop-env.sh,修改内容如下:
Export HADOOP_CLASSPATH=.:$CLASSPATH:$HADOOP_CLASSPATH:$HADOOP_HOME/bin
在目录$HIVE_HOME/bin下面,修改文件hive-config.sh(可跳过)
hive-site.xml

<property>
<name>javax.jdo.option.ConnectionURL</name>
<value>jdbc:mysql://localhost:3306/hive?createDatabaseIfNotExist=true</value>
<description>JDBC connect string for a JDBC metastore</description>
</property>
<property>
<name>javax.jdo.option.ConnectionUserName</name>
<value>hive</value>
<description>username to use against metastore database</description>
</property>
<property>
<name>javax.jdo.option.ConnectionPassword</name>
<value>123456</value>
<description>password to use against metastore database</description>
</property>

查看hive版本:
mysql> select * from VERSION;

查看有哪些表:
mysql> select * from TBLS \G;

查看表对应的hdfs目录的metedata:
mysql> select * from SDS \G;

查看某个表的partitions:
mysql> select * from PARTITIONS where TBL_ID=1 \G;

查看某个表的列:
mysql> select * from COLUMNS_V2;

查看某个表的partition:
mysql> select * from PARTITION_KEYS;

连接bigdata 元数据库

从/usr/local/hive/conf/hive-site.xml找ConnectionUserName和ConnectionPassword(hive/hive2017)

跳到bigdata003

mysql -u hive -phive2017 -D hive

Hive与HBase对比
共同点
1.HBase与Hive都架构在Hadoop之上,都是用hdfs作为底层存储。
2.职能上都能对外提供表形式的数据查询等服务。

区别
1.Hive是建立在Hadoop之上为了减少MapReduce jobs编写工作的批处理系统,HBase是为了支持弥补Hadoop对实时操作的缺陷的项目 。
2.Hive本身不存储和计算数据,它完全依赖于HDFS和MapReduce,Hive中的表是纯逻辑表,是对hdfs文件的一种记录方式。
3.Hbase是物理表,不是逻辑表,提供一个超大的内存hash表,搜索引擎通过它来存储索引,方便查询操作。

create table city(
province_code INT,
province_name string,
city_code INT,
city_name string
)
row FORMAT delimited
fields terminated by ','
lines terminated by '\n';

load data local inpath '/home/bigdata/hive/city.txt' into table city; //将数据从‘’中取出来,放到city表中。

alter table city rename to city_new;

select * from city_new limit 10; //查询十条数据

select count(*) from city_new; //查询一共多少条数据

select * from city_new order by city_code desc limit 10 ; //最大值

select province_name,count(*) as cnt from city_new group by province_name order by cnt desc limit 10;
//聚合函数在后面跟着order by的话,必须将聚合函数,设置别名。

select count(distinct province_name) from city_new ; //查询省份的个数

select
count(*)
from
(
select
province_name
from
city_new
group by
province_name
)a; //查询省份的个数

//查询只有一个市的省份
1.首先,查出来所有省份及其市的个数
2.查询市的个数为1的省份

select province_name,count(*) as su from city_new group by province_name having su = 1;

select province_name,su from (
select province_name,count(*) as su from city_new group by province_name

)a
where su = 1;

外表与内表
//创建一个外表,将'/user/cll/city/'这个路径下的文件,倒进来
create EXTERNAl table city_ex(
province_code int,
province_name String,
city_code int,
city_name String

)
row FORMAT delimited
fields terminated by ','
lines terminated by '\n'
location '/user/cll/city/';

//创建一个内表,将'/user/cll/city/'这个路径下的文件,倒进来
//'/user/cll/city/'这个路径是将city.txt文件put到服务器上的
create table city_in(
province_code int,
province_name String,
city_code int,
city_name String

)
row FORMAT delimited
fields terminated by ','
lines terminated by '\n'
location '/user/cll/city/';

show create table city_ex;
show create table city_in;
//将外表删除以后,倒进来的文件的路径,去服务器上查询,仍然存在;hadoop fs -ls /user/cll/city 文件下依然有文件
//将内表删除以后,倒进来的文件的路径,去服务器上查询,文件也不存在。

分区:
CREATE TABLE user(
uid INT,
city_code INT,
model string,
access string
)
row FORMAT delimited
fields terminated by ','
lines terminated by '\n';

load data local inpath '/home/bigdata/tanqi/hive/user.txt' into table user;

创建分区表:
create table user_daily(
uid int,
city_code int,
model String,
access String

)
partitioned by (p_date String);

//从user表中查出来数据,然后将其插入到表user_daily 2017.09.01这一天里面去
//静态分区
insert OVERWRITE table user_daily partition (p_date = '2017-09-01')
select * from user;

//插进来以后,可以展示查询知道,有几个分区
show partitions user_daily;

//动态分区(不指定日期)
set hive.exec.dynamic.partition.mode=nonstrict;
INSERT OVERWRITE TABLE user_daily PARTITION (p_date)
SELECT *,'2017-09-02' FROM user
UNION ALL
SELECT *,'2017-09-03' FROM user
;

ALTER TABLE user_daily PARTITION (p_date='2017-01-01') RENAME TO PARTITION (p_date='20170101');

insert OVERWRITE table user_daily partition (p_date = '2017-01-01')
select * from user;

alter table user_daily drop partition (p_date='20170101');

select * from user_daily where p_date='2017-09-02';

select p_date,count(*) from user_daily group by p_date;

作业1:

a)统计WIFI环境下用户最多的5个城市
select city_code, count(uid) as cun from user where access='WIFI' group by city_code order by cun desc limit 5; //在前面加一个explain,可以查看执行过程
b)统计用户数>=3的access和city_code组合

select city_code,access,count(uid) as cun from user group by city_code,access having cun>=3;
c)机型中带有ne(不区分大小写),不同access及对应用户数
selectaccess,count(uid) from user where lower(model) like '%n%e%' group by access order by access desc;

select sum(if(access='2G',1,0))/count(*) from user ; //求占比(WIFI人数与总数的比值)

//计算尾数为[0-3]、[4-7]、[8-9]的人数
select
case
when uid % 10 in (0,1,2,3) then '0-3'
when uid % 10 in (4,5,6,7) then '4-7'
else '8-9'
end as interval,
count(*) as cun
from user
group by
case
when uid % 10 in (0,1,2,3) then '0-3'
when uid % 10 in (4,5,6,7) then '4-7'
else '8-9'
end;

//集合
select collect_set(access) from user; //去重
select collect_list(access) from user; //不去重

//join

select user.uid, user.city_code, city_new.city_name
from
(select * from user where uid <= 100) user
right join
(select * from city_new where province_code <= 30) city_new
on (user.city_code = city_new.city_code)
limit 20;

select user.uid, user.city_code, city_new.city_name
from
(select * from user where uid <= 100) user
inner join
(select * from city_new where province_code <= 30) city_new
on (user.city_code = city_new.city_code)
limit 20;

select user.uid, user.city_code, city_new.city_name
from
(select * from user where uid <= 100) user
full join
(select * from city_new where province_code <= 30) city_new
on (user.city_code = city_new.city_code)
limit 20;

窗口函数:(a\、b)
a\分组topN:(窗口函数)
1、分组 access
2、排序 序号
3、top1 序号=1
select access,city_code,uid
from
(
select uid,access,city_code,
row_number() over (partition by access order by
city_code desc) as row_num
from user
)a
where row_num = 1;

b\累积和
select p_date,
sum(cun) over (order by p_date asc rows between unbounded preceding and current row)
from
(
select p_date,count(*) as cun
from user_daily
where p_date between '2017-09-01' and '2017-09-30'
group by p_date
)a
;

create table phone (
model String,
os String,
brand String,
resolution String,
launch_date String
)
row FORMAT delimited
fields terminated by ','
lines terminated by '\n'
location '/user/cll/phone/';

select * from phone limit 10;
set hive.cli.print.header=true;

select user.uid, user.city_code, city.province_name
from user left join city
on (user.city_code = city.city_code)
limit 20;

select user.model,user.access, user.city_code, phone.launch_date
from user left join phone
on (user.model = phone.model)
limit 20;

create table user_info(
uid int,
access String,
city_code int,
province_name String,
model String,
launch_date String
)
row FORMAT delimited
fields terminated by ','
lines terminated by '\n'

insert into //如果插入错了,在来一遍的话,需要覆盖式插入(insert overwrite) table user_info
select user.uid, user.access,user.city_code, city.province_name,user.model,phone.launch_date
from user
left join city on (user.city_code = city.city_code)
left join phone on (user.model = phone.model)
;

select * from phone;

select * from user_info limit 10;

作业2
create table phone (
model string,
os string,
brand string,
resolution string,
launch_data string
)
row FORMAT delimited
fields terminated by ','
lines terminated by '\n';
load data local inpath '/home/bigdata/tanqi/hive/phone.txt' into table phone;
set hive.cli.print.header=true;

create table user_info (
uid int,
access string,
city_code string,
provice_name string,
model string,
launch_data string
)
row FORMAT delimited
fields terminated by ','
lines terminated by '\n'
;
加载数据
以user为主表,分别左连接phone和city表,生成新表user_info
a)user和city连接时,利用city_code相等;user和phone连接时,利用model相等
b)user_info表包含字段:uid, access, city_code, province_name, model,launch_date

insert into table user_info
select user.uid, user.access, user.city_code, city.provice_name, user.model,phone.launch_data
from
user
left join
city
on (user.city_code = city.city_code)
left join
phone
on (user.model = phone.model);

a)统计不同省份下uid最大时的:province_name,uid,model
select provice_name,uid,model from
(
select provice_name,uid,model,
row_number() over (partition by provice_name order by uid desc) as row_num
from user_info
)a
where row_num=1;

b)按launch_date统计每年对应的累计总用户数。结果类似:
select launch_data,
sum(cnt) over (order by launch_data asc rows between unbounded preceding and current row)
from
(
select launch_data,count(*) as cnt from user_info
where launch_data between '2002' and '2016'
group by launch_data
)a
;

©著作权归作者所有,转载或内容合作请联系作者
  • 序言:七十年代末,一起剥皮案震惊了整个滨河市,随后出现的几起案子,更是在滨河造成了极大的恐慌,老刑警刘岩,带你破解...
    沈念sama阅读 213,864评论 6 494
  • 序言:滨河连续发生了三起死亡事件,死亡现场离奇诡异,居然都是意外死亡,警方通过查阅死者的电脑和手机,发现死者居然都...
    沈念sama阅读 91,175评论 3 387
  • 文/潘晓璐 我一进店门,熙熙楼的掌柜王于贵愁眉苦脸地迎上来,“玉大人,你说我怎么就摊上这事。” “怎么了?”我有些...
    开封第一讲书人阅读 159,401评论 0 349
  • 文/不坏的土叔 我叫张陵,是天一观的道长。 经常有香客问我,道长,这世上最难降的妖魔是什么? 我笑而不...
    开封第一讲书人阅读 57,170评论 1 286
  • 正文 为了忘掉前任,我火速办了婚礼,结果婚礼上,老公的妹妹穿的比我还像新娘。我一直安慰自己,他们只是感情好,可当我...
    茶点故事阅读 66,276评论 6 385
  • 文/花漫 我一把揭开白布。 她就那样静静地躺着,像睡着了一般。 火红的嫁衣衬着肌肤如雪。 梳的纹丝不乱的头发上,一...
    开封第一讲书人阅读 50,364评论 1 292
  • 那天,我揣着相机与录音,去河边找鬼。 笑死,一个胖子当着我的面吹牛,可吹牛的内容都是我干的。 我是一名探鬼主播,决...
    沈念sama阅读 39,401评论 3 412
  • 文/苍兰香墨 我猛地睁开眼,长吁一口气:“原来是场噩梦啊……” “哼!你这毒妇竟也来了?” 一声冷哼从身侧响起,我...
    开封第一讲书人阅读 38,179评论 0 269
  • 序言:老挝万荣一对情侣失踪,失踪者是张志新(化名)和其女友刘颖,没想到半个月后,有当地人在树林里发现了一具尸体,经...
    沈念sama阅读 44,604评论 1 306
  • 正文 独居荒郊野岭守林人离奇死亡,尸身上长有42处带血的脓包…… 初始之章·张勋 以下内容为张勋视角 年9月15日...
    茶点故事阅读 36,902评论 2 328
  • 正文 我和宋清朗相恋三年,在试婚纱的时候发现自己被绿了。 大学时的朋友给我发了我未婚夫和他白月光在一起吃饭的照片。...
    茶点故事阅读 39,070评论 1 341
  • 序言:一个原本活蹦乱跳的男人离奇死亡,死状恐怖,灵堂内的尸体忽然破棺而出,到底是诈尸还是另有隐情,我是刑警宁泽,带...
    沈念sama阅读 34,751评论 4 337
  • 正文 年R本政府宣布,位于F岛的核电站,受9级特大地震影响,放射性物质发生泄漏。R本人自食恶果不足惜,却给世界环境...
    茶点故事阅读 40,380评论 3 319
  • 文/蒙蒙 一、第九天 我趴在偏房一处隐蔽的房顶上张望。 院中可真热闹,春花似锦、人声如沸。这庄子的主人今日做“春日...
    开封第一讲书人阅读 31,077评论 0 21
  • 文/苍兰香墨 我抬头看了看天上的太阳。三九已至,却和暖如春,着一层夹袄步出监牢的瞬间,已是汗流浃背。 一阵脚步声响...
    开封第一讲书人阅读 32,312评论 1 267
  • 我被黑心中介骗来泰国打工, 没想到刚下飞机就差点儿被人妖公主榨干…… 1. 我叫王不留,地道东北人。 一个月前我还...
    沈念sama阅读 46,924评论 2 365
  • 正文 我出身青楼,却偏偏与公主长得像,于是被迫代替她去往敌国和亲。 传闻我的和亲对象是个残疾皇子,可洞房花烛夜当晚...
    茶点故事阅读 43,957评论 2 351

推荐阅读更多精彩内容