2022-03-02 表数据加载

-- 1、insert into 添加分区表数据(测试用,实际开发不用)
-- 复制表结构
create table score3 like score;

-- table可省略
insert into table score3 partition (month = '202001')
values ('001', '002', 100);

insert into score3 partition (month = '202002')
values ('001', '003', 100);

select *
from score3;

-- 2、通过查询方式添加(常用)!!!!!!!!!!!!!!
create table score4 like score;

-- 重点,remember
insert overwrite table score4 partition (month = '201912')
-- 字段个数、顺序要一一对应
select sid, cid, sscore
from score;

select *
from score4;

-- 3、通过load方式加载(常用)!!!!!!!!!!!!!
create table score5 like score;

-- 重点,remember
-- 有local:从linux本地加载,这种加载做的是复制
load data local inpath '/export/data/score.txt' -- linux目录
    overwrite into table score5 partition (month = '194702');

-- 无local:从hdfs加载,这种加载做的是剪切
load data inpath '/myhive/data/score.txt' -- hdfs目录
    overwrite into table score5 partition (month = '190001');

select *
from score5;

select count(*)
from score5;

-- 4、如果先有数据后创建表,则可以通过location方法加载数据
create external table covid3
(
    date_value date,
    county     string,
    state      string,
    fips       string,
    cases      int,
    deaths     int
)
    row format delimited fields terminated by ','
    location '/hive/data/covid';

select count(*)
from covid3;

select sum(cases)
from covid3;

select *
from covid3;

-- 5、通过hadoop fs -put/-mv 将数据文件(数据文件格式要和表结构保持一致)上传到表目录文件夹即可自动生成表数据


-- 6、多插入模式加载(熟悉)!!!!!!!!!!!!!!
create table score6 like score;

load data local inpath '/export/data/score.txt'
    overwrite into table score6 partition (month = '171819');

select *
from score6;

-- 创建表字段对应score6表前两个字段
create table score_first
(
    sid string,
    cid string
)
    partitioned by (month string)
    row format delimited fields terminated by '\t';

-- 创建表字段对应score6表后两个字段
create table score_second
(
    cid    string,
    sscore int
)
    partitioned by (month string)
    row format delimited fields terminated by '\t';

from score6
-- 将score6表的前两个字段查询出来插入到score_first
insert
overwrite
table
score_first
partition
(
month = '199106'
)
select sid,
       cid
-- 将score6表的后两个字段查询出来插入到score_second
insert
overwrite
table
score_second
partition
(
month = '199106'
)
select cid, sscore;

select *
from score_first;

select *
from score_second;
©著作权归作者所有,转载或内容合作请联系作者
【社区内容提示】社区部分内容疑似由AI辅助生成,浏览时请结合常识与多方信息审慎甄别。
平台声明:文章内容(如有图片或视频亦包括在内)由作者上传并发布,文章内容仅代表作者本人观点,简书系信息发布平台,仅提供信息存储服务。

相关阅读更多精彩内容

友情链接更多精彩内容