hive的語法命令介紹

1.hive的基本語法：

create databases mydb #創(chuàng)建數(shù)據(jù)庫
show databases            #查看所有的庫
use mydb                      #切換數(shù)據(jù)庫
create table t_user(id int ,name string,age int)  #創(chuàng)建表
create table t_user(id int ,name string,age int) row format delimited fields terminated by '分隔符'  #指定分隔符的建表語句
insert into table t_user values(值1,值1,值1)     #插入數(shù)據(jù)
select * from t_table       #查詢語句
load data inpath 'HDFS path' into table t_name  #在hdfs中導(dǎo)入數(shù)據(jù)
load data local inpath 'linux path' into table t_name #導(dǎo)入Linux數(shù)據(jù)到hive

2.hive的DDL操作：

（1）對(duì)hive庫的操作：

建庫

創(chuàng)新互聯(lián)-專業(yè)網(wǎng)站定制、快速模板網(wǎng)站建設(shè)、高性價(jià)比嘉蔭網(wǎng)站開發(fā)、企業(yè)建站全套包干低至880元,成熟完善的模板庫,直接使用。一站式嘉蔭網(wǎng)站制作公司更省心,省錢,快速模板網(wǎng)站建設(shè)找我們，業(yè)務(wù)覆蓋嘉蔭地區(qū)。費(fèi)用合理售后完善，十年實(shí)體公司更值得信賴。

create database if not exists myhive   #如果不存在則創(chuàng)建該數(shù)據(jù)庫
create database if not exists myhive2 localtion 'hdfs path'  #指定該庫的位置

查看庫：

show databases;                            #查看hive中所有的數(shù)據(jù)庫
desc databases dbname ;              #顯示數(shù)據(jù)庫的詳細(xì)信息
select current_database();             #查看正在使用的數(shù)據(jù)庫
show create database db_name ;  #查看建庫語句

刪除庫：

drop databases db_name restrict;
drop database if exists dbname;
#注意：默認(rèn)情況下，hive不允許刪除包含表的庫，有兩種辦法：
1. 手動(dòng)刪除所有的表，然后在刪除庫
2. 使用cascade 關(guān)鍵字：drop database myhive cascade ;

（2）對(duì)hive表的操作：

建表：
語法分析：

CREATE [EXTERNAL] TABLE [IF NOT EXISTS] table_name 
[(col_name data_type [COMMENT col_comment], ...)] 
[COMMENT table_comment] 
[PARTITIONED BY (col_name data_type [COMMENT col_comment], ...)]
[CLUSTERED BY (col_name, col_name, ...) [SORTED BY (col_name [ASC|DESC], ...)] INTO num_buckets BUCKETS] 
[ROW FORMAT row_format]
[STORED AS file_format]
[LOCATION hdfs_path]
[EXTERNAL] TABLE                # 表示創(chuàng)建的是內(nèi)部表還是外部表
[IF NOT EXISTS] table_name  # 防止報(bào)錯(cuò)
[(col_name data_type [COMMENT col_comment], ...)]  #表的字段
[COMMENT table_comment]   #表的描述信息
[PARTITIONED BY (col_name data_type [COMMENT col_comment], ...)]  #指定分區(qū)表
[CLUSTERED BY (col_name, col_name, ...) [SORTED BY (col_name [ASC|DESC], ...)] INTO num_buckets BUCKETS]   #指定分桶，排序規(guī)則，以及分桶個(gè)數(shù)
[ROW FORMAT row_format]   #指定分隔符
fields terminated by ‘’  #指定列分割符
lines terminated by ‘’  #指定行分隔符
[STORED AS file_format]   #指定數(shù)據(jù)存儲(chǔ)格式
[LOCATION hdfs_path]      #指定數(shù)據(jù)存儲(chǔ)目錄 （在創(chuàng)建外部表時(shí)使用）

建表舉例：

#內(nèi)部表
create table if not exists student(id int ,name string) row format delimited fields terminated by ','      

#外部表
create external table if not exists student (id int ,name string) row format delimited fields terminated by ',' location '/hive/data/';
#分區(qū)表
create table  if not exists student (id int ,name string) partitioned by (age int conmment 'partitioned comment') row format delimited fields terminated by ',' ;   #分區(qū)字段的字段名稱，不能是表中的任意一個(gè)字段
#創(chuàng)建分桶表
create table if not exists  student (id int ,name string,age int ) clustered by (age) sort by (age desc) into 10 buckets row format delimited fields terminated by ',' ;   #分桶字段一定要是表中的屬性字段 
#like 方式
create table student like t_student ;   #復(fù)制一個(gè)表結(jié)構(gòu)，分區(qū)表和分桶表也同樣可以復(fù)制（分區(qū)表只能復(fù)制在創(chuàng)建表的時(shí)候的信息，之后添加的信息不能復(fù)制）
#CTAS
create table student as select * from t_student #創(chuàng)建表并復(fù)制

修改表屬性

alter table old_name rename to new_name ;   #修改表名
alter table t_name set tb_properties (property_name=roperty_val)  #修改表的屬性
alter table t_name set serdeproperties('field.delim'='-');   #修改列的分隔符
alter table t_name add cloumns(f_name type) #增加一個(gè)字段
alter table t_name drop   #hive本身不支持
alter table t_name replace columns(id int ,name string )  #替換所有的列
alter table t_name change old_field_name new_field_name type [first|after field] #修改字段的名稱、類型以及位置
#接下來是對(duì)分區(qū)表操作：
alter table t_name add partition(分區(qū)字段=‘value’)  #添加一個(gè)分區(qū)
alter table t_name add partition(分區(qū)字段=‘value’) partition (分區(qū)字段=‘value’)  #添加多個(gè)分區(qū)
alter table t_name drop partition(分區(qū)字段='value')  #刪除分區(qū)
alter table t_name partition(分區(qū)字段=‘value’) set location 'hdfs path' #修改分區(qū)路徑
alter table t_name partition(分區(qū)字段=‘value’) enable no_drop ; #防止分區(qū)被刪除
alter table t_name partition(分區(qū)字段=‘value’) enable offline #防止分區(qū)被查詢

刪除表：

drop tab;e if exists t_name ;   #刪除表
注意：
1. 內(nèi)部表刪除：元數(shù)據(jù)和數(shù)據(jù)都刪
2. 內(nèi)部表刪除：元數(shù)據(jù)和數(shù)據(jù)都刪
3. 分區(qū)表（內(nèi)部表）：所有的分區(qū)都會(huì)被刪除，元數(shù)據(jù)和數(shù)據(jù)都刪
4. 分桶表的刪除和普通表的刪除沒有任何區(qū)別
truncate table t_name ;  #清空表的內(nèi)容

對(duì)表的常見操作：

show tables ;  #查看庫下的所有表
show partitions t_name;  # 查看表的分區(qū)
show partitions 表名 partition(分區(qū)字段=‘value’)  #查看某個(gè)分區(qū)
desc t_name ;   #查看表的詳細(xì)信息
desc extended 表名  #查看表的詳細(xì)信息   
desc formatted 表名  #查看表的詳細(xì)信息

3.hive的DML操作：

（1）數(shù)據(jù)的裝載：

load data local inpath 'linux path' into table t_name ;   #本地導(dǎo)入
local data inpath 'hdfs path' into table t_name    #從hdfs中導(dǎo)入
#注意：如果是內(nèi)部表的話，在hdfs導(dǎo)入，那么原本的數(shù)據(jù)會(huì)被移動(dòng)到相應(yīng)的表的目錄下
load data local inpath 'linux path ' overwrite into table 表名；  #覆蓋導(dǎo)入

（2）數(shù)據(jù)的插入：

注意：

insert into //表示追加操作
insert overwrite   //表示覆蓋插入操作

insert into table t_name(fields1,fields2,fields3) values(value1,value2,value3)  #插入一條數(shù)據(jù)
insert into table t_name select * from tt_name;  #利用查詢，將結(jié)果導(dǎo)入表中
#分區(qū)表的多重插入
insert into talbe student_ptn partition(department=’SC’) select id ,name,age ,sex from student where dept=’ SC’; 
insert into talbe student_ptn partition(department=’AC’) select id ,name,age ,sex from student where dept=’ AC’;
insert into talbe student_ptn partition(department=’ES’) select id ,name,age ,sex from student where dept=’ ES’;
上面的方法是使用單個(gè)sql去查詢表，但是這里每執(zhí)行一個(gè)sql就需要對(duì)student表中的所有數(shù)據(jù)進(jìn)行掃描，效率太低！
轉(zhuǎn)換：
from student 
insert into table student_ptn partition(department=’SC’) select id ,name,age ,sex where dept=’ SC’; 
insert into talbe student_ptn partition(department=’AC’) select id ,name,age ,sex  where dept=’ AC’;
insert into talbe student_ptn partition(department=’ES’) select id ,name,age ,sex where dept=’ ES’;
這種方式進(jìn)行數(shù)據(jù)的處理，只需要掃描表一次，整個(gè)MR程序就是一個(gè)輸入多個(gè)輸出，如果指定的分區(qū)不存在，在執(zhí)行這條語句時(shí)會(huì)自動(dòng)創(chuàng)建。
# 分桶表的數(shù)據(jù)插入，這里分桶表只能使用insert進(jìn)行數(shù)據(jù)插入
insert into table stu_bck select * from 表名  #和普通的插入一樣
**分桶的原則：分桶字段的hashcode值%分桶個(gè)數(shù)=  相同的值分在一組

動(dòng)態(tài)分區(qū)插入和靜態(tài)分區(qū)插入：
靜態(tài)分區(qū)插入：要進(jìn)行數(shù)據(jù)插入的數(shù)據(jù)的定義是手動(dòng)指定的（分區(qū)在插入之前指定）
動(dòng)態(tài)分區(qū)插入：用來解決靜態(tài)分區(qū)插入的缺點(diǎn)。按照某個(gè)分區(qū)字段的值進(jìn)行判斷，每遇到一個(gè)不同的值，當(dāng)前的程序自行進(jìn)行判斷來創(chuàng)建對(duì)應(yīng)的分區(qū)
舉例：

#靜態(tài)分區(qū)插入：
load data local inpath “路徑” into table 表名 partition(dpt=’’)
insert into talbe student_ptn partition(department=’SC’) select id ,name,age ,sex where dept=’ SC’;
#動(dòng)態(tài)分區(qū)插入：
insert into table t_name partition(字段名) select * from tt_name #這里查詢的表的最后一個(gè)字段需要是分區(qū)字段。
#多分區(qū)動(dòng)態(tài)插入：
insert into table stu_ptn01 partition(sex,department) select id,name,age,sex,department from student_manager;  #只要查詢字段的最后幾個(gè)字段是分區(qū)字段即可，順序不能顛倒

注意：如果想使用動(dòng)態(tài)分區(qū)插入需要在hive中開啟幾個(gè)參數(shù)：

set hive.exec.dynamic.partiton=true;  #打開動(dòng)態(tài)分區(qū)開關(guān)
set hive.exec.dynamic.partition.mode=nonstrict ;  #關(guān)閉動(dòng)態(tài)分區(qū)插入的不合法約束。

靜態(tài)分區(qū)插入和動(dòng)態(tài)分區(qū)插入的區(qū)別：
- 靜態(tài)分區(qū)插入數(shù)據(jù)后，需要指定分區(qū)的名字，而動(dòng)態(tài)分區(qū)不需要
- 靜態(tài)分區(qū)中可能會(huì)存在某一個(gè)分區(qū)沒有數(shù)據(jù)，分區(qū)的目錄是一個(gè)空目錄，動(dòng)態(tài)分區(qū)的時(shí)候根據(jù)實(shí)際的數(shù)據(jù)生成分區(qū)，每一個(gè)分區(qū)至少有一條數(shù)據(jù)
- 3）動(dòng)態(tài)分區(qū)的時(shí)候，每一個(gè)分區(qū)都會(huì)對(duì)應(yīng)配置文件中設(shè)置的reducetask的個(gè)數(shù)，
set reducetask=3

（3）數(shù)據(jù)的導(dǎo)出：

#單重導(dǎo)出
insert overwrite local directory 'linux path' select * from t_name;
#多重導(dǎo)出
from t_name insert overwrite local directory 'linux path' select * where ...
insert overwrite local directory 'linux path' select * where...

4.hive的DQL操作：

查詢語句的書寫順序：select fields ... from [join] where group by having order by limit
查詢語句的執(zhí)行順序：from ----join ----group by ---having ---select ---ordey by -----limit

（1）hive中的join

特點(diǎn)：
- Hive中連接，只支持等值連接不支持不等值連接
- Hive中and連接，不支持or
- Hive支持多表關(guān)聯(lián)，但是hive中進(jìn)行關(guān)聯(lián)的時(shí)候盡量避免笛卡爾積
- Hive支持in 和 exists 但是效率特別低
舉例：

#內(nèi)連接
select a.id aid,a.name name,b.id bid,b.score score from test_a a inner join test_b b on a.id=b.id;  (交集)
#左外鏈接：以join左側(cè)的表為基礎(chǔ)表  左側(cè)的表的所有數(shù)據(jù)都會(huì)顯示  右側(cè)可以關(guān)聯(lián)上的就會(huì)補(bǔ)全  關(guān)聯(lián)不上 null補(bǔ)充
select a.id aid,a.name name,b.id bid,b.score score from test_a a left join test_b b on a.id=b.id;
#右外鏈接：以join右側(cè)的表為基礎(chǔ)
select a.id aid,a.name name,b.id bid,b.score score from test_a a right join test_b b on a.id=b.id;
#全外鏈接：取兩個(gè)表的并集
select a.id aid,a.name name,b.id bid,b.score score from test_a a full join test_b b on a.id=b.id;
#半連接，相當(dāng)于內(nèi)連接  取左半表的數(shù)據(jù)，左表中在右表中出現(xiàn)關(guān)聯(lián)上的數(shù)據(jù)
select * from test_a a left semi join test_b b on a.id=b.id;

關(guān)于 left semi join 的特點(diǎn)：
left semi join 是對(duì)hive中的exists/in的一個(gè)更高級(jí)額的操作。
- left semi join 的限制是， JOIN 子句中右邊的表只能在 ON 子句中設(shè)置過濾條件，在 WHERE 子句、SELECT 子句或其他地方過濾都不行。
- left semi join 是只傳遞表的 join key 給 map 階段，因此left semi join 中最后 select 的結(jié)果只許出現(xiàn)左表。
- 因?yàn)?left semi join 是 in(keySet) 的關(guān)系，遇到右表重復(fù)記錄，左表會(huì)跳過，而 join 則會(huì)一直遍歷。這就導(dǎo)致右表有重復(fù)值得情況下 left semi join 只產(chǎn)生一條，join 會(huì)產(chǎn)生多條，也會(huì)導(dǎo)致 left semi join 的性能更高。
hive的語法命令介紹

（2）hive中的排序

order by
特點(diǎn)：局排序
例：select * from 表名 order by 字段 desc; (降序排序)

sort by
特點(diǎn)：sort by 是一個(gè)局部排序，在每一個(gè)reduce中進(jìn)行排序，當(dāng)reduceTask個(gè)數(shù)為1個(gè)時(shí)，這時(shí)與全局排序相同
原理：sort by 在進(jìn)行分每一個(gè)reduceTask中的數(shù)據(jù)時(shí)，時(shí)隨機(jī)選擇的字段進(jìn)行分配
例：select * from 表名sort by 字段；

distribute by
特點(diǎn)：按照指定字段分桶，在每個(gè)桶中進(jìn)行排序。
例1：select * from 表名 distribute by 字段（字段.hash%分桶個(gè)數(shù)）
例2：select * from 表名 distribute by 分桶字段 sort by 排序字段 #按照指定字段分桶，在每一個(gè)桶中進(jìn)行排序

cluster by
特點(diǎn)：既分桶又排序
例：select * from 表名 cluster by 分桶排序字段
注意：當(dāng)分桶字段和排序字段相同時(shí)：distribute by+ sort by= cluster by，否則distribute by+ sort by功能更強(qiáng)大一些！

（3）union和union all區(qū)別

union和union all：都是將查詢結(jié)果進(jìn)行拼接，（連接的兩個(gè)表的結(jié)構(gòu)必須相同）

select * from xxx  union selecet * from xxx
select * from xxx  union all selecet * from xxx

union：表示去重連接
union all ：表示不去重連接

分享名稱：hive的語法命令介紹
本文地址：http://jinyejixie.com/article10/ipjego.html

成都網(wǎng)站建設(shè)公司_創(chuàng)新互聯(lián)，為您提供做網(wǎng)站、手機(jī)網(wǎng)站建設(shè)、響應(yīng)式網(wǎng)站、動(dòng)態(tài)網(wǎng)站、外貿(mào)建站、

聲明：本網(wǎng)站發(fā)布的內(nèi)容（圖片、視頻和文字）以用戶投稿、用戶轉(zhuǎn)載內(nèi)容為主，如果涉及侵權(quán)請(qǐng)盡快告知，我們將會(huì)在第一時(shí)間刪除。文章觀點(diǎn)不代表本網(wǎng)站立場(chǎng)，如需處理請(qǐng)聯(lián)系客服。電話：028-86922220；郵箱：631063699@qq.com。內(nèi)容未經(jīng)允許不得轉(zhuǎn)載，或轉(zhuǎn)載時(shí)需注明來源：創(chuàng)新互聯(lián)

猜你還喜歡下面的內(nèi)容

成人午夜视频全免费观看高清-秋霞福利视频一区二区三区-国产精品久久久久电影小说-亚洲不卡区三一区三区一区