Impala简单教程

进入impala shell
impala-shell -i localhost --quiet
查看impala版本
[localhost:21000] > select version();
查看数据库
[localhost:21000] > show databases;
查看当前的数据库
[localhost:21000] > select current_database();
列出当前数据库的表
[localhost:21000] > show tables;
列出某个数据库的表
[localhost:21000] > show tables in dwt
模糊查询某个数据库的表
[localhost:21000] > show tables in dwt like 'vcc*';
进入某个数据库
[localhost:21000] > use dwt;
显示一张表的结构
[localhost:21000] > describe vcc_b_dept;
或者
[localhost:21000] > desc vcc_b_dept;
显示一张表的行数
[localhost:21000] > select count(*) from vcc_b_dept;
显示某一列不同值的个数
[localhost:21000] > select count(distinct dept_no) from vcc_b_dept;
显示dept_no is null的行数
[localhost:21000]>select count(*) from vcc_b_dept where dept_no is null;
显示某一列10个不同的值
[localhost:21000] > select distinct dept_no from vcc_b_dept limit 10;
创建数据库
[localhost:21000] > create database test_impala;
显示数据库
[localhost:21000] > show databases;
模糊查询数据库
[localhost:21000] > show databases like 'test*';
创建一张表
[bigdata1:21000] > create table t1 (x int);
将表移动到另一个数据库中
[bigdata1:21000] > alter table t1 rename to test_impala1.t1;
添加数据到表里
[bigdata1:21000] > insert into t1 values(1), (2), (3), (4);
执行函数
[bigdata1:21000] > select min(x), max(x), sum(x), avg(x) from t1;
join表
[bigdata1:21000] > select word from t1 join t2 on t1.x = t2.id
删除表
[bigdata1:21000] > drop table if exists tab1;
根据HDFS创建外表
create external table tab1(id int, col_1 boolean,col_2 double,col_3 timestamp) row format delimited fields terminated by ',' location '/user/impala/sample_data/tab1';
创建表
create table tab3(id int,col_1 boolean,col_2 double,month int,day int)row format delimited fields terminated by ',';
执行sql脚本
impala-shell -i localhost -f customer_setup.sql
执行一条命令
impala-shell -i impala-host -q 'select count(*) from customer_address'
汇总查询
SELECT tab2.col_1, MAX(tab2.col_2), MIN(tab2.col_2) FROM tab2 JOIN tab1 USING (id) GROUP BY col_1 ORDER BY 1 LIMIT 5;
子查询
select tab2.* from tab2, (select tab1.col_1, max(tab2.col_2) as max_col2 from tab2, tab1 where tab2.id = tab1.id group by col_1) subquery1 where subquery1.max_col2 = tab2.col_2;
insert查询
insert overwrite table tab3 select id, col_1, col_2, month(col_3), dayofmonth(col_3) from tab1 where year(col_3) = 2012;
创建分区表
create table logs (field1 string, field2 string, field3 string) partitioned by (year string, month string, day string, host string);
加载数据到分区表
insert into logs partition(year="2018", month="08", day="28", host="host1") values("foo1", "foo2", "foo3");
创建外表
create external table external_logs (field1 string, field2 string, field3 string) partitioned by(year string, month string, day string, host string) location '/user/hive/warehouse/test_impala1.db/logs';
添加分区
alter table external_logs add partition(year="2018", month="08", day="28", host="host1");

Impala简单教程

Impala简单教程

相关阅读更多精彩内容

友情链接更多精彩内容