##一、HiveDDL## ####1.数据库####
CREATE (DATABASE|SCHEMA) [IF NOT EXISTS] database_name [COMMENT database_comment] //关于数据块的描述 [LOCATION hdfs_path] //指定数据库在HDFS上的存储位置 [WITH DBPROPERTIES (property_name=property_value, ...)];//指定数据块属性 CREATE [TEMPORARY] [EXTERNAL] TABLE [IF NOT EXISTS] [db_name.]table_name LIKE existing_table_or_view_name [LOCATION hdfs_path]; create database if not exists t1; create database if not exists t2 comment 'learning hive'; create database if not exists t3 with dbproperties('creator'='hadoop','date'='2018-04-05'); DROP (DATABASE|SCHEMA) [IF EXISTS] database_name [RESTRICT|CASCADE]; //删除数据库; ALTER (DATABASE|SCHEMA) database_name SET DBPROPERTIES (property_name=property_value, ...); ALTER (DATABASE|SCHEMA) database_name SET OWNER [USER|ROLE] user_or_role; ALTER (DATABASE|SCHEMA) database_name SET LOCATION hdfs_path; ALTER (DATABASE|SCHEMA) database_name SET MANAGEDLOCATION hdfs_path; USE database_name;####2.数据表####
CREATE [TEMPORARY] [EXTERNAL] TABLE [IF NOT EXISTS] [db_name.]table_name[(col_name data_type [column_constraint_specification] [COMMENT col_comment],[constraint_specification])] [COMMENT table_comment] [PARTITIONED BY (col_name data_type [COMMENT col_comment], ...)] [CLUSTERED BY (col_name, col_name, ...) [SORTED BY (col_name [ASC|DESC], ...)] INTO num_buckets BUCKETS] [SKEWED BY (col_name, col_name, ...) ON ((col_value, col_value, ...), (col_value, col_value, ...), ...) [STORED AS DIRECTORIES] [[ROW FORMAT row_format] [STORED AS file_format] | STORED BY 'storage.handler.class.name' [WITH SERDEPROPERTIES (...)] ] [LOCATION hdfs_path] [TBLPROPERTIES (property_name=property_value, ...)] [AS select_statement]; CREATE [TEMPORARY] [EXTERNAL] TABLE [IF NOT EXISTS] [db_name.]table_name LIKE existing_table_or_view_name [LOCATION hdfs_path]; create table table_name (id int,dtDontQuery string,name string)partitioned by (date string); CREATE TABLE page_view(viewTime INT, userid BIGINT,page_url STRING, referrer_url STRING,ip STRING COMMENT 'IP Address of the User') COMMENT 'This is the page view table' PARTITIONED BY(dt STRING, country STRING) ROW FORMAT DELIMITED FIELDS TERMINATED BY '\001' STORED AS SEQUENCEFILE; CREATE TABLE new_key_value_store ROW FORMAT SERDE "org.apache.hadoop.hive.serde2.columnar.ColumnarSerDe" STORED AS RCFile AS [like] SELECT (key % 1024) new_key, concat(key, value) key_value_pair FROM key_value_store SORT BY new_key, key_value_pair; CREATE TABLE page_view(viewTime INT, userid BIGINT, page_url STRING, referrer_url STRING, ip STRING COMMENT 'IP Address of the User') COMMENT 'This is the page view table' PARTITIONED BY(dt STRING, country STRING) CLUSTERED BY(userid) SORTED BY(viewTime) INTO 32 BUCKETS ROW FORMAT DELIMITED FIELDS TERMINATED BY '\001' COLLECTION ITEMS TERMINATED BY '\002' MAP KEYS TERMINATED BY '\003' STORED AS SEQUENCEFILE; CREATE TABLE list_bucket_multiple (col1 STRING, col2 int, col3 STRING) SKEWED BY (col1, col2) ON (('s1',1), ('s3',3), ('s13',13), ('s78',78)) [STORED AS DIRECTORIES]; // create table constraints1(id1 integer UNIQUE disable novalidate, id2 integer NOT NULL, usr string DEFAULT current_user(), price double CHECK (price > 0 AND price <= 1000)); DROP TABLE [IF EXISTS] table_name [PURGE]; TRUNCATE [TABLE] table_name [PARTITION partition_spec]; ALTER TABLE table_name RENAME TO new_table_name; ALTER TABLE table_name SET TBLPROPERTIES table_properties; table_properties: : (property_name = property_value, property_name = property_value, ... ) ALTER TABLE table_name SET TBLPROPERTIES ('comment' = new_comment); ALTER TABLE table_name [PARTITION partition_spec] SET SERDE serde_class_name [WITH SERDEPROPERTIES serde_properties]; ALTER TABLE table_name [PARTITION partition_spec] UNSET SERDEPROPERTIES (property_name, ... ); ALTER TABLE table_name CLUSTERED BY (col_name, col_name, ...) [SORTED BY (col_name, ...)] INTO num_buckets BUCKETS; ALTER TABLE table_name NOT SKEWED; ALTER TABLE table_name NOT STORED AS DIRECTORIES; ALTER TABLE table_name ADD CONSTRAINT constraint_name PRIMARY KEY (column, ...) DISABLE NOVALIDATE; ALTER TABLE table_name DROP CONSTRAINT constraint_name; ALTER TABLE table_name ADD [IF NOT EXISTS] PARTITION partition_spec [LOCATION 'location'][, PARTITION partition_spec [LOCATION 'location'], ...]; partition_spec: : (partition_column = partition_col_value, partition_column = partition_col_value, ...) MSCK [REPAIR] TABLE table_name [ADD/DROP/SYNC PARTITIONS]; ALTER TABLE table_name DROP [IF EXISTS] PARTITION partition_spec[ PARTITION partition_spec, ...] [IGNORE PROTECTION] [PURGE]; ALTER TABLE table_name ARCHIVE PARTITION partition_spec; ALTER TABLE table_name UNARCHIVE PARTITION partition_spec; ALTER TABLE table_name [PARTITION partition_spec] SET FILEFORMAT file_format; ALTER TABLE table_name [PARTITION partition_spec] SET LOCATION "new location"; ALTER TABLE table_name TOUCH [PARTITION partition_spec]; ALTER TABLE table_name [PARTITION (partition_key = 'partition_value' [, ...])] CONCATENATE; ALTER TABLE table_name [PARTITION partition_spec] ENABLE|DISABLE NO_DROP [CASCADE]; ALTER TABLE table_name [PARTITION partition_spec] CHANGE [COLUMN] col_old_name col_new_name column_type [COMMENT col_comment] [FIRST|AFTER column_name] [CASCADE|RESTRICT]; ALTER TABLE test_change CHANGE a1 a2 STRING AFTER b; ALTER TABLE test_change CHANGE a1 a1 INT COMMENT 'this is column a1'; ALTER TABLE table_name [PARTITION partition_spec] ADD|REPLACE COLUMNS (col_name data_type [COMMENT col_comment], ...) [CASCADE|RESTRICT] SET hive.exec.dynamic.partition = true; CREATE VIEW [IF NOT EXISTS] [db_name.]view_name [(column_name [COMMENT column_comment], ...) ] [COMMENT view_comment] [TBLPROPERTIES (property_name = property_value, ...)] AS SELECT ...; DROP VIEW [IF EXISTS] [db_name.]view_name; ALTER VIEW [db_name.]view_name SET TBLPROPERTIES table_properties; table_properties: : (property_name = property_value, property_name = property_value, ...) ALTER VIEW [db_name.]view_name AS select_statement; CREATE INDEX index_name ON TABLE base_table_name (col_name, ...) AS index_type [WITH DEFERRED REBUILD] [IDXPROPERTIES (property_name=property_value, ...)][IN TABLE index_table_name][[ ROW FORMAT ...] STORED AS ...| STORED BY ...][LOCATION hdfs_path][TBLPROPERTIES (...)][COMMENT "index comment"]; DROP INDEX [IF EXISTS] index_name ON table_name; ALTER INDEX index_name ON table_name [PARTITION partition_spec] REBUILD; CREATE TEMPORARY MACRO macro_name([col_name col_type, ...]) expression; CREATE TEMPORARY MACRO simple_add (x int, y int) x + y; DROP TEMPORARY MACRO [IF EXISTS] macro_name; CREATE TEMPORARY FUNCTION function_name AS class_name; DROP TEMPORARY FUNCTION [IF EXISTS] function_name; CREATE FUNCTION [db_name.]function_name AS class_name [USING JAR|FILE|ARCHIVE 'file_uri' [, JAR|FILE|ARCHIVE 'file_uri'] ]; DROP FUNCTION [IF EXISTS] function_name; RELOAD (FUNCTIONS|FUNCTION); //同步hiveserver2; SHOW (DATABASES|SCHEMAS) [LIKE 'identifier_with_wildcards']; SHOW TABLES [IN database_name] ['identifier_with_wildcards']; SHOW VIEWS [IN/FROM database_name] [LIKE 'pattern_with_wildcards']; SHOW PARTITIONS table_name; SHOW PARTITIONS table_name PARTITION(ds='2010-03-03'); SHOW TABLE EXTENDED [IN|FROM database_name] LIKE 'identifier_with_wildcards' [PARTITION(partition_spec)]; SHOW TBLPROPERTIES tblname; SHOW TBLPROPERTIES tblname("foo"); SHOW CREATE TABLE ([db_name.]table_name|view_name); SHOW COLUMNS (FROM|IN) table_name [(FROM|IN) db_name]; SHOW FUNCTIONS [LIKE "<pattern>"]; SHOW CONF <configuration_name>; SHOW LOCKS <table_name> PARTITION (<partition_spec>) EXTENDED; SHOW LOCKS (DATABASE|SCHEMA) database_name; SHOW CONF <configuration_name>; SHOW COMPACTIONS; DESCRIBE DATABASE [EXTENDED] db_name; DESCRIBE [EXTENDED|FORMATTED] table_name[.col_name ( [.field_name] | [.'$elem$'] | [.'$key$'] | [.'$value$'] )* ]; DESCRIBE FORMATTED [db_name.]table_name column_name PARTITION (partition_spec); DESCRIBE [EXTENDED|FORMATTED] [db_name.]table_name [column_name] PARTITION partition_spec; DESCRIBE FORMATTED default.src_table PARTITION (part_col = 100) columnA; SHOW TRANSACTIONS; ABORT TRANSACTIONS transactionID [ transactionID ...];##二、HiveDML##
LOAD DATA [LOCAL] INPATH 'filepath' [OVERWRITE] INTO TABLE tablename [PARTITION (partcol1=val1, partcol2=val2 ...)] [INPUTFORMAT 'inputformat' SERDE 'serde'] INSERT INTO TABLE tablename1 [PARTITION (partcol1=val1, partcol2=val2 ...)] select_statement1 FROM from_statement; INSERT INTO TABLE tablename [PARTITION (partcol1[=val1], partcol2[=val2] ...)] VALUES values_row [, values_row ...]; UPDATE tablename SET column = value [, column = value ...] [WHERE expression]; DELETE FROM tablename [WHERE expression]; MERGE INTO <target table> AS T USING <source expression/table> AS S ON <boolean expression1> WHEN MATCHED [AND <boolean expression2>] THEN UPDATE SET <set clause list> WHEN MATCHED [AND <boolean expression3>] THEN DELETE WHEN NOT MATCHED [AND <boolean expression4>] THEN INSERT VALUES<value list>##三、HiveDMQ##
SELECT [ALL | DISTINCT] select_expr, select_expr, ... FROM table_reference [WHERE where_condition] [GROUP BY col_list] [ORDER BY col_list] [CLUSTER BY col_list | [DISTRIBUTE BY col_list] [SORT BY col_list] ] [having ] [LIMIT [offset,] rows]; SELECT current_database(); SELECT DISTINCT col1 FROM t1; SELECT page_views.* FROM page_views JOIN dim_users ON (page_views.user_id = dim_users.id AND page_views.date >= '2008-03-01' AND page_views.date <= '2008-03-31');##四、HiveQL实例##
create database if not exists myhive4 comment '创建hive测试库'; alter database ylj_db set dbproperties('create_time' = '2019-04-30 15:28:56'); drop database ylj_db cascade; desc database extended ylj_db; ALTER DATABASE test SET LOCATION '/hive/test';