复杂数据类型:array、map、struct
1.数组array,里边不能装不同类型的数据
[hadoop@hadoop001 data]$ more hive_array.txt
zhangsan beijing,shanghai,tianjin,hangzhou
lisi changchun,chengdu,wuhan,beijing
hive> create table hive_array(name string, work_locations array<string>)
> row format delimited fields terminated by '\t'
> collection items terminated by ',';
OK
Time taken: 2.805 seconds
hive> desc formatted hive_array;
OK
# col_name data_type comment
name string
work_locations array<string>
# Detailed Table Information
Database: default
Owner: hadoop
CreateTime: Sun Jul 29 16:13:14 CST 2018
LastAccessTime: UNKNOWN
Protect Mode: None
Retention: 0
Location: hdfs://192.168.137.141:9000/user/hive/warehouse/hive_array
Table Type: MANAGED_TABLE
Table Parameters:
transient_lastDdlTime 1532851994
# Storage Information
SerDe Library: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
InputFormat: org.apache.hadoop.mapred.TextInputFormat
OutputFormat: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
Compressed: No
Num Buckets: -1
Bucket Columns: []
Sort Columns: []
Storage Desc Params:
colelction.delim ,
field.delim \t
serialization.format \t
Time taken: 1.022 seconds, Fetched: 29 row(s)
hive> load data local inpath '/home/hadoop/data/hive_array.txt'
> overwrite into table hive_array;
Loading data to table default.hive_array
Table default.hive_array stats: [numFiles=1, numRows=0, totalSize=81, rawDataSize=0]
OK
Time taken: 3.108 seconds
hive> select * from hive_array;
OK
ruoze ["beijing","shanghai","tianjin","hangzhou"]
jepson ["changchun","chengdu","wuhan","beijing"]
Time taken: 1.196 seconds, Fetched: 2 row(s)
hive> select name, size(work_locations) from hive_array;
OK
ruoze 4
jepson 4
Time taken: 0.453 seconds, Fetched: 2 row(s)
hive> select name, work_locations[0] from hive_array;
OK
ruoze beijing
jepson changchun
Time taken: 0.145 seconds, Fetched: 2 row(s)
hive> select * from hive_array where array_contains(work_locations, "tianjin");
OK
ruoze ["beijing","shanghai","tianjin","hangzhou"]
Time taken: 0.198 seconds, Fetched: 1 row(s)
2.map Map('a'#1,'b'#2)
[hadoop@hadoop001 data]$ more hive_map.txt
1,zhangsan,father:xiaoming#mother:xiaohuang#brother:xiaoxu,28
2,lisi,father:mayun#mother:huangyi#brother:guanyu,22
3,wangwu,father:wangjianlin#mother:ruhua#sister:jingtian,29
4,mayun,father:mayongzhen#mother:angelababy,26
hive> create table hive_map(id int,name string, family map<string,string>,age int)
> row format delimited fields terminated by ','
> collection items terminated by '#'
> map keys terminated by ':';
OK
Time taken: 0.176 seconds
hive> desc formatted hive_map;
OK
# col_name data_type comment
id int
name string
family map<string,string>
age int
# Detailed Table Information
Database: default
Owner: hadoop
CreateTime: Sun Jul 29 17:08:48 CST 2018
LastAccessTime: UNKNOWN
Protect Mode: None
Retention: 0
Location: hdfs://192.168.137.141:9000/user/hive/warehouse/hive_map
Table Type: MANAGED_TABLE
Table Parameters:
transient_lastDdlTime 1532855328
# Storage Information
SerDe Library: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
InputFormat: org.apache.hadoop.mapred.TextInputFormat
OutputFormat: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
Compressed: No
Num Buckets: -1
Bucket Columns: []
Sort Columns: []
Storage Desc Params:
colelction.delim #
field.delim ,
mapkey.delim :
serialization.format ,
Time taken: 0.177 seconds, Fetched: 32 row(s)
hive> load data local inpath '/home/hadoop/data/hive_map.txt'
> overwrite into table hive_map;
Loading data to table default.hive_map
Table default.hive_map stats: [numFiles=1, numRows=0, totalSize=224, rawDataSize=0]
OK
Time taken: 0.48 seconds
hive> select * from hive_map;
OK
1 zhangsan {"father":"xiaoming","mother":"xiaohuang","brother":"xiaoxu"} 28
2 lisi {"father":"mayun","mother":"huangyi","brother":"guanyu"} 22
3 wangwu {"father":"wangjianlin","mother":"ruhua","sister":"jingtian"} 29
4 mayun {"father":"mayongzhen","mother":"angelababy"} 26
Time taken: 0.113 seconds, Fetched: 4 row(s)
hive> select id,name,family['father'] as father, family['sister'] from hive_map;
OK
1 zhangsan xiaoming NULL
2 lisi mayun NULL
3 wangwu wangjianlin jingtian
4 mayun mayongzhen NULL
Time taken: 0.138 seconds, Fetched: 4 row(s)
hive> select id,name,map_keys(family) from hive_map;
OK
1 zhangsan ["father","mother","brother"]
2 lisi ["father","mother","brother"]
3 wangwu ["father","mother","sister"]
4 mayun ["father","mother"]
Time taken: 0.106 seconds, Fetched: 4 row(s)
hive> select id,name,map_values(family) from hive_map;
OK
1 zhangsan ["xiaoming","xiaohuang","xiaoxu"]
2 lisi ["mayun","huangyi","guanyu"]
3 wangwu ["wangjianlin","ruhua","jingtian"]
4 mayun ["mayongzhen","angelababy"]
Time taken: 0.106 seconds, Fetched: 4 row(s)
hive> select id,name,size(family) from hive_map;
OK
1 zhangsan 3
2 lisi 3
3 wangwu 3
4 mayun 2
Time taken: 0.182 seconds, Fetched: 4 row(s)
hive> select id,name,family['brother'] from hive_map where array_contains(map_keys(family),'brother');
OK
1 zhangsan xiaoxu
2 lisi guanyu
Time taken: 0.118 seconds, Fetched: 2 row(s)
3.struct结构体
//原始数据
[hadoop@hadoop001 data]$ cat hive_struct.txt
192.168.1.1#zhangsan:40
192.168.1.2#lisi:50
192.168.1.3#wangwu:60
192.168.1.4#zhaoliu:70
//建表并导入数据
hive> create table hive_struct(ip string,userinfo struct<name:string,age:int>)
> row format delimited fields terminated by '#'
> collection items terminated by ':';
OK
Time taken: 0.136 seconds
hive> load data local inpath '/home/hadoop/data/hive_struct.txt'
> overwrite into table hive_struct;
Loading data to table default.hive_struct
Table default.hive_struct stats: [numFiles=1, numRows=0, totalSize=91, rawDataSize=0]
OK
Time taken: 0.573 seconds
hive> select * from hive_struct;
OK
192.168.1.1 {"name":"zhangsan","age":40}
192.168.1.2 {"name":"lisi","age":50}
192.168.1.3 {"name":"wangwu","age":60}
192.168.1.4 {"name":"zhaoliu","age":70}
Time taken: 0.123 seconds, Fetched: 4 row(s)
//取值
hive> select ip,userinfo.name,userinfo.age from hive_struct;
OK
192.168.1.1 zhangsan 40
192.168.1.2 lisi 50
192.168.1.3 wangwu 60
192.168.1.4 zhaoliu 70
Time taken: 0.111 seconds, Fetched: 4 row(s)