一、创建/删除/修改/使用数据库
# 创建数据库
CREATE (DATABASE|SCHEMA) [IF NOT EXISTS] database_name
[COMMENT database_comment]
[LOCATION hdfs_path]
[WITH DBPROPERTIES (property_name=property_value, ...)];
# 删除数据库
DROP (DATABASE|SCHEMA) [IF EXISTS] database_name [RESTRICT|CASCADE];
# 修改数据库
ALTER (DATABASE|SCHEMA) database_name SET DBPROPERTIES (property_name=property_value, ...);
ALTER (DATABASE|SCHEMA) database_name SET OWNER [USER|ROLE] user_or_role;
ALTER (DATABASE|SCHEMA) database_name SET LOCATION hdfs_path;
# 使用数据库
USE database_name;
USE DEFAULT;
二、创建/删除/截断表
1.创建表
# 建表语法大全
CREATE [EXTERNAL] TABLE [IF NOT EXISTS] table_name # 表示创建的是内部表还是外部表
[(col_name data_type [COMMENT col_comment], ...)] #表的字段
[COMMENT table_comment] #表的描述信息
[PARTITIONED BY (col_name data_type [COMMENT col_comment], ...)] #指定分区表
[CLUSTERED BY (col_name, col_name, ...) [SORTED BY (col_name [ASC|DESC], ...)] INTO num_buckets BUCKETS] #指定分桶,排序规则,以及分桶个数
[ROW FORMAT row_format] #指定分隔符
[STORED AS file_format] | STORED BY 'storage.handler.class.name' [WITH SERDEPROPERTIES (...)] #指定数据存储格式
[LOCATION hdfs_path] #指定数据存储目录 (在创建外部表时使用)
[TBLPROPERTIES (property_name=property_value, ...)]
[AS select_statement];
# LIKE形式允许准确地复制现有的表定义(而不复制其数据)
CREATE [TEMPORARY] [EXTERNAL] TABLE [IF NOT EXISTS] [db_name.]table_name
LIKE existing_table_or_view_name
[LOCATION hdfs_path];
# 支持的字段的类型
data_type
: primitive_type
| array_type
| map_type
| struct_type
| union_type
primitive_type
: TINYINT
| SMALLINT
| INT
| BIGINT
| BOOLEAN
| FLOAT
| DOUBLE
| DOUBLE PRECISION
| STRING
| BINARY
| TIMESTAMP
| DECIMAL
| DECIMAL(precision, scale)
| DATE
| VARCHAR
| CHAR
array_type
: ARRAY < data_type >
map_type
: MAP < primitive_type, data_type >
struct_type
: STRUCT < col_name : data_type [COMMENT col_comment], ...>
union_type
: UNIONTYPE < data_type, data_type, ... >
# 行格式化的形式
row_format
: DELIMITED [FIELDS TERMINATED BY char [ESCAPED BY char]] [COLLECTION ITEMS TERMINATED BY char]
[MAP KYS TERMINATED BY char] [LINES TERMINATED BY char]
[NULL DEFINED AS char]
| SERDE serde_name [WITH SERDEPROPERTIES (property_name=property_value, property_name=property_value, ...)]
# 文件格式
file_format:
: SEQUENCEFILE
| TEXTFILE -- (Default, depending on hive.default.fileformat configuration)
| RCFILE
| ORC
| PARQUET
| AVRO
| JSONFILE -- (Note: Available in Hive 4.0.0 and later)
| INPUTFORMAT input_format_classname OUTPUTFORMAT output_format_classname
# 约束
constraint_specification:
: [, PRIMARY KEY (col_name, ...) DISABLE NOVALIDATE ]
[, CONSTRAINT constraint_name FOREIGN KEY (col_name, ...) REFERENCES table_name(col_name, ...) DISABLE NOVALIDATE
# 约束示例
create table fk(id1 integer, id2 integer,
constraint c1 foreign key(id1, id2) references pk(id2, id1) disable novalidate);
# 数据的导出
## 单重导出
insert overwrite local directory 'linux path' select * from t_name;
## 多重导出
from t_name insert overwrite local directory 'linux path' select * where ...
insert overwrite local directory 'linux path' select * where...
# 数据的装载
load data local inpath 'linux path' into table t_name ; #本地导入
local data inpath 'hdfs path' into table t_name #从hdfs中导入
#注意:如果是内部表的话,在hdfs导入,那么原本的数据会被移动到相应的表的目录下
load data local inpath 'linux path ' overwrite into table 表名; #覆盖导入
# 查询语句的执行顺序
from ----join ---on----where----group by ---having ---select ----distinct ----ordey by -----limit
2.截断表和删除表
# 截断表
TRUNCATE TABLE table_name [PARTITION partition_spec];
partition_spec:
: (partition_column = partition_col_value, partition_column = partition_col_value, ...)
# 删除表
DROP TABLE [IF EXISTS] table_name [PURGE];
3.改变表/分区/列
# 重命名表
ALTER TABLE table_name RENAME TO new_table_name;
# 修改属性
ALTER TABLE table_name SET TBLPROPERTIES table_properties;
TBLPROPERTIES:
: (property_name = property_value, property_name = property_value, ... )
# 修改注释
ALTER TABLE table_name SET TBLPROPERTIES ('comment' = new_comment);
# 添加SERDE属性
ALTER TABLE table_name [PARTITION partition_spec] SET SERDE serde_class_name [WITH SERDEPROPERTIES serde_properties];
ALTER TABLE table_name [PARTITION partition_spec] SET SERDEPROPERTIES serde_properties;
SERDEPROPERTIES :
: (property_name = property_value, property_name = property_value, ... )
# 修改存储属性
ALTER TABLE table_name CLUSTERED BY (col_name, col_name, ...) [SORTED BY (col_name, ...)]
INTO num_buckets BUCKETS;
# 修改倾斜或存储为目录
## 倾斜
ALTER TABLE table_name SKEWED BY (col_name1, col_name2, ...)
ON ([(col_name1_value, col_name2_value, ...) [, (col_name1_value, col_name2_value), ...]
[STORED AS DIRECTORIES];
## 不倾斜
ALTER TABLE table_name NOT SKEWED;
## 不存储为目录
ALTER TABLE table_name NOT STORED AS DIRECTORIES;
## 设置倾斜表存储位置
ALTER TABLE table_name SET SKEWED LOCATION (col_name1="location1" [, col_name2="location2", ...] );
# 修改约束
ALTER TABLE table_name ADD CONSTRAINT constraint_name PRIMARY KEY (column, ...) DISABLE NOVALIDATE;
ALTER TABLE table_name ADD CONSTRAINT constraint_name FOREIGN KEY (column, ...) REFERENCES table_name(column, ...) DISABLE NOVALIDATE RELY;
ALTER TABLE table_name DROP CONSTRAINT constraint_name;
# 添加分区
ALTER TABLE table_name ADD [IF NOT EXISTS] PARTITION partition_spec [LOCATION 'location'][, PARTITION partition_spec [LOCATION 'location'], ...];
partition_spec:
: (partition_column = partition_col_value, partition_column = partition_col_value, ...)
# 重命名分区
ALTER TABLE table_name PARTITION partition_spec RENAME TO PARTITION partition_spec;
# 恢复分区
MSCK [REPAIR] TABLE table_name [ADD/DROP/SYNC PARTITIONS];
# 删除分区
ALTER TABLE table_name DROP [IF EXISTS] PARTITION partition_spec[, PARTITION partition_spec, ...]
[IGNORE PROTECTION] [PURGE];
# 修改分区文件格式
ALTER TABLE table_name [PARTITION partition_spec] SET FILEFORMAT file_format;
# 修改分区位置
ALTER TABLE table_name [PARTITION partition_spec] SET LOCATION "new location";
# 更改列名称/类型/位置/注释
ALTER TABLE table_name [PARTITION partition_spec] CHANGE [COLUMN] col_old_name col_new_name column_type
[COMMENT col_comment] [FIRST|AFTER column_name] [CASCADE|RESTRICT];
# 添加/替换列
ALTER TABLE table_name
[PARTITION partition_spec] -- (Note: Hive 0.14.0 and later)
ADD|REPLACE COLUMNS (col_name data_type [COMMENT col_comment], ...)
[CASCADE|RESTRICT]
三、创建/删除/更改视图
# 创建视图
CREATE VIEW [IF NOT EXISTS] [db_name.]view_name [(column_name [COMMENT column_comment], ...) ]
[COMMENT view_comment]
[TBLPROPERTIES (property_name = property_value, ...)]
AS SELECT ...;
# 删除视图
DROP VIEW [IF EXISTS] [db_name.]view_name;
# 改变视图属性
ALTER VIEW [db_name.]view_name SET TBLPROPERTIES table_properties;
table_properties:
: (property_name = property_value, property_name = property_value, ...)
四、创建/删除/修改索引
# 创建索引
CREATE INDEX index_name
ON TABLE base_table_name (col_name, ...)
AS index_type
[WITH DEFERRED REBUILD]
[IDXPROPERTIES (property_name=property_value, ...)]
[IN TABLE index_table_name]
[
[ ROW FORMAT ...] STORED AS ...
| STORED BY ...
]
[LOCATION hdfs_path]
[TBLPROPERTIES (...)]
[COMMENT "index comment"];
# 删除索引
DROP INDEX [IF EXISTS] index_name ON table_name;
# 修改索引
ALTER INDEX index_name ON table_name [PARTITION partition_spec] REBUILD;
五、创建/删除/重新装载函数
# 临时函数
CREATE TEMPORARY FUNCTION function_name AS class_name;
# 删除临时函数
DROP TEMPORARY FUNCTION [IF EXISTS] function_name;
# 创建永久函数
CREATE FUNCTION [db_name.]function_name AS class_name
[USING JAR|FILE|ARCHIVE 'file_uri' [, JAR|FILE|ARCHIVE 'file_uri'] ];
# 删除函数
DROP FUNCTION [IF EXISTS] function_name;
# 重新加载函数
RELOAD FUNCTION;
六、创建/删除/授予/撤销角色和权限
# 将角色授予某个用户、角色:
GRANT role_name [, role_name] ...
TO principal_specification [, principal_specification] ...
[ WITH ADMIN OPTION ];
principal_specification
: USER user
| ROLE role
# 查看授予某个用户、角色的角色列表
SHOW ROLE GRANT (USER|ROLE) principal_name;
# 查看属于某种角色的用户、角色列表
SHOW PRINCIPALS role_name;
# 移除某个用户、角色的角色:
REVOKE [ADMIN OPTION FOR] role_name [, role_name] ...
FROM principal_specification [, principal_specification] ... ;
principal_specification
: USER user
| ROLE role
# 将权限授予某个用户、角色:
GRANT
priv_type [, priv_type ] ...
ON table_or_view_name
TO principal_specification [, principal_specification] ...
[WITH GRANT OPTION];
# 移除某个用户、角色的权限:
REVOKE [GRANT OPTION FOR]
priv_type [, priv_type ] ...
ON table_or_view_name
FROM principal_specification [, principal_specification] ... ;
principal_specification
: USER user
| ROLE role
priv_type
: INSERT | SELECT | UPDATE | DELETE | ALL
# 查看某个用户、角色的权限:
SHOW GRANT [principal_name] ON (ALL| ([TABLE] table_or_view_name)