Hive CLI启动
hive --service cli
常规查询
show create table student;
desc student;
desc formatted student;
drop table if exists student;
创建表语法
总体的语法格式为
CREATE [EXTERNAL] TABLE [IF NOT EXISTS] table_name
[(col_name data_type [COMMENT col_comment], ...)]
[COMMENT table_comment]
[PARTITIONED BY (col_name data_type [COMMENT col_comment], ...)]
[CLUSTERED BY (col_name, col_name, ...)
[SORTED BY (col_name [ASC|DESC], ...)]
INTO num_buckets BUCKETS]
[ROW FORMAT row_format]
[STORED AS file_format] [LOCATION hdfs_path]
例如创建student表
create table student (id int comment "学生id", name string comment "姓名", sex string comment "性别", age int comment "年龄") row format delimited fields terminated by ",";
# format delimited fields: 使用自带的 SerDe 序列化
# erminated by ",": 用于指定字段分隔符
# 建表时同样可以指定 IF NOT EXISTS
创建完成后加载文件:
- 关键字 overwrite 是覆盖原表里的数据,不写则不会覆盖
- 关键字 local 是加载文件的来源为本地文件,不写则表示来源于 hdfs
load data local inpath '/Users/yepeng/opt/datas/student.txt' overwrite into table student;
Hive表分区
创建分区表
create table student_partition (id int comment "学生id", name string comment "姓名", age int comment "年龄") partitioned by (sex string comment "性别(分区字段)") row format delimited fields terminated by ",";
加载文件到指定的分区,Hive是通过 HDFS 上的文件夹来确定分区的,分区列在load时的文件中是不需要存在的
load data local inpath '/Users/yepeng/opt/datas/student_male.txt' overwrite into table student_partition partition ( sex = 'male' );
load data local inpath '/Users/yepeng/opt/datas/student_female.txt' overwrite into table student_partition partition ( sex = 'female' );
值得注意的是,测试发现,中文分区是不合法的(不知是否操作有误?)
load data local inpath '/Users/yepeng/opt/datas/student_male.txt' overwrite into table student_partition partition ( sex = '男性'); # 这是不合法的
删除分区
alter table student_partition drop partition (sex = 'female');