1.parquet
CREATE TABLE parquet_test (
id int,
str string,
mp MAP<STRING,STRING>,
lst ARRAY<STRING>,
strct STRUCT<A:STRING,B:STRING>)
PARTITIONED BY (part string)
ROW FORMAT SERDE 'parquet.hive.serde.ParquetHiveSerDe'
STORED AS
INPUTFORMAT 'parquet.hive.DeprecatedParquetInputFormat'
OUTPUTFORMAT 'parquet.hive.DeprecatedParquetOutputFormat';
CREATE TABLE parquet_test (
id int,
str string,
mp MAP<STRING,STRING>,
lst ARRAY<STRING>,
strct STRUCT<A:STRING,B:STRING>)
PARTITIONED BY (part string)
STORED AS PARQUET;
2.lzo
hive -e "CREATE EXTERNAL TABLE IF NOT EXISTS hive_table_name (column_1 datatype_1......column_N datatype_N)
PARTITIONED BY (partition_col_1 datatype_1 ....col_P datatype_P)
ROW FORMAT DELIMITED FIELDS TERMINATED BY '\t'
STORED AS INPUTFORMAT \"com.hadoop.mapred.DeprecatedLzoTextInputFormat\"
OUTPUTFORMAT \"org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat\";"
3.orc
create table Addresses (
name string,
street string,
city string,
state string,
zip int
) stored as orc tblproperties ("orc.compress"="NONE");
CREATE EXTERNAL TABLE foo (
columnA string,
columnB string
) PARTITIONED BY (dt string)
ROW FORMAT DELIMITED
FIELDS TERMINATED BY "\t"
STORED AS INPUTFORMAT
'org.apache.hadoop.hive.ql.io.orc.OrcInputFormat'
OUTPUTFORMAT
'org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat'
LOCATION 'xxxxxxxxxxxx';
TBLPROPERTIES ('orc.compress'='SNAPPY');
CREATE EXTERNAL TABLE `table_name`(
`column1` string,
`column2` string,
`column3` string)
PARTITIONED BY (
`proc_date` string)
ROW FORMAT SERDE
'org.apache.hadoop.hive.ql.io.orc.OrcSerde'
STORED AS INPUTFORMAT
'org.apache.hadoop.hive.ql.io.orc.OrcInputFormat'
OUTPUTFORMAT
'org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat'
LOCATION
'hdfs://hdfscluster/...'
TBLPROPERTIES ( 'orc.compress'='snappy');
set hive.default.fileformat=Orc;
SET hive.exec.compress.output=true;
SET mapred.output.compression.codec=com.hadoop.compression.lzo.LzopCodec;
4.avro
CREATE TABLE kst
PARTITIONED BY (ds string)
ROW FORMAT SERDE
'org.apache.hadoop.hive.serde2.avro.AvroSerDe'
STORED AS INPUTFORMAT
'org.apache.hadoop.hive.ql.io.avro.AvroContainerInputFormat'
OUTPUTFORMAT
'org.apache.hadoop.hive.ql.io.avro.AvroContainerOutputFormat'
TBLPROPERTIES (
'avro.schema.url'='http://schema_provider/kst.avsc');
CREATE TABLE as_avro
ROW FORMAT SERDE
'org.apache.hadoop.hive.serde2.avro.AvroSerDe'
STORED as INPUTFORMAT
'org.apache.hadoop.hive.ql.io.avro.AvroContainerInputFormat'
OUTPUTFORMAT
'org.apache.hadoop.hive.ql.io.avro.AvroContainerOutputFormat'
TBLPROPERTIES (
'avro.schema.url'='file:///path/to/the/schema/test_serializer.avsc');
CREATE TABLE embedded
COMMENT "just drop the schema right into the HQL"
ROW FORMAT SERDE
'org.apache.hadoop.hive.serde2.avro.AvroSerDe'
STORED AS INPUTFORMAT
'org.apache.hadoop.hive.ql.io.avro.AvroContainerInputFormat'
OUTPUTFORMAT
'org.apache.hadoop.hive.ql.io.avro.AvroContainerOutputFormat'
TBLPROPERTIES (
'avro.schema.literal'='{
"namespace": "com.howdy",
"name": "some_schema",
"type": "record",
"fields": [ { "name":"string1","type":"string"}]
}');
set hiveconf:schema;
DROP TABLE example;
CREATE TABLE example
ROW FORMAT SERDE
'org.apache.hadoop.hive.serde2.avro.AvroSerDe'
STORED AS INPUTFORMAT
'org.apache.hadoop.hive.ql.io.avro.AvroContainerInputFormat'
OUTPUTFORMAT
'org.apache.hadoop.hive.ql.io.avro.AvroContainerOutputFormat'
TBLPROPERTIES (
'avro.schema.literal'='${hiveconf:schema}');
hive --hiveconf schema="${SCHEMA}" -f your_script_file.sql
5.建表语句案例
CREATE TABLE test_serializer(string1 STRING,
int1 INT,
tinyint1 TINYINT,
smallint1 SMALLINT,
bigint1 BIGINT,
boolean1 BOOLEAN,
float1 FLOAT,
double1 DOUBLE,
list1 ARRAY<STRING>,
map1 MAP<STRING,INT>,
struct1 STRUCT<sint:INT,sboolean:BOOLEAN,sstring:STRING>,
union1 uniontype<FLOAT, BOOLEAN, STRING>,
enum1 STRING,
nullableint INT,
bytes1 BINARY,
fixed1 BINARY)
ROW FORMAT DELIMITED FIELDS TERMINATED BY ',' COLLECTION ITEMS TERMINATED BY ':' MAP KEYS TERMINATED BY '#' LINES TERMINATED BY '\n'
STORED AS TEXTFILE;