环境
虚拟机:VMware 10
Linux版本:CentOS-6.5-x86_64
客户端:Xshell4
FTP:Xftp4
jdk8
hadoop-3.1.1
apache-hive-3.1.1
一、Hive运行方式
1、命令行方式cli:控制台模式
--与hdfs交互
hive> dfs -ls /;
Found items
drwxr-xr-x - root supergroup -- : /root
drwxrwx--- - root supergroup -- : /tmp
drwxr-xr-x - root supergroup -- : /usr
hive> dfs -cat /root/hive_remote/warehouse/person/*;
1,小明1,18,lol-book-movie,beijing:shangxuetang-shanghai:pudong
2,小明2,20,lol-book-movie,beijing:shangxuetang-shanghai:pudong
3,小明3,21,lol-book-movie,beijing:shangxuetang-shanghai:pudong
4,小明4,21,lol-book-movie,beijing:shangxuetang-shanghai:pudong
5,小明5,21,lol-book-movie,beijing:shangxuetang-shanghai:pudong
6,小明6,21,lol-book-movie,beijing:shangxuetang-shanghai:pudong
hive>
--与Linux交互 :!开头
hive> !pwd;
/root
2、脚本运行方式(实际生产环境中用最多)
#直接按照入参执行 输出结果到linux控制台
[root@PCS102 ~]# hive -e "select * from psn2"
which: no hbase in (/usr/local/jdk1..0_65/bin:/home/cluster/subversion-1.10./bin:/home/cluster/apache-storm-0.9./bin:/usr/local/hadoop-3.1./bin:/usr/local/hadoop-3.1./sbin:/usr/local/apache-hive-3.1.-bin/bin:/usr/local/sbin:/usr/bin:/bin:/usr/sbin:/sbin:/usr/local/openssh/bin:/root/bin)
SLF4J: Class path contains multiple SLF4J bindings.
SLF4J: Found binding in [jar:file:/usr/local/apache-hive-3.1.-bin/lib/log4j-slf4j-impl-2.10..jar!/org/slf4j/impl/StaticLoggerBinder.class]
SLF4J: Found binding in [jar:file:/usr/local/hadoop-3.1./share/hadoop/common/lib/slf4j-log4j12-1.7..jar!/org/slf4j/impl/StaticLoggerBinder.class]
SLF4J: See http://www.slf4j.org/codes.html#multiple_bindings for an explanation.
SLF4J: Actual binding is of type [org.apache.logging.slf4j.Log4jLoggerFactory]
Hive Session ID = b243b1f6-0b67-416f-8b9a-3da0304cb88b Logging initialized using configuration in jar:file:/usr/local/apache-hive-3.1.-bin/lib/hive-common-3.1..jar!/hive-log4j2.properties Async: true
Hive Session ID = 0a2ced87--44bb-927e-17ab4d993b91
OK
psn2.id psn2.name psn2.likes psn2.address psn2.age
小明1 ["lol","book","movie"] {"beijing":"shangxuetang","shanghai":"pudong"}
小明2 ["lol","book","movie"] {"beijing":"shangxuetang","shanghai":"pudong"}
小明3 ["lol","book","movie"] {"beijing":"shangxuetang","shanghai":"pudong"}
小明4 ["lol","book","movie"] {"beijing":"shangxuetang","shanghai":"pudong"}
小明5 ["lol","book","movie"] {"beijing":"shangxuetang","shanghai":"pudong"}
小明6 ["lol","book","movie"] {"beijing":"shangxuetang","shanghai":"pudong"}
小明1 ["lol","book","movie"] {"beijing":"shangxuetang","shanghai":"pudong"}
小明2 ["lol","book","movie"] {"beijing":"shangxuetang","shanghai":"pudong"}
小明3 ["lol","book","movie"] {"beijing":"shangxuetang","shanghai":"pudong"}
小明4 ["lol","book","movie"] {"beijing":"shangxuetang","shanghai":"pudong"}
小明5 ["lol","book","movie"] {"beijing":"shangxuetang","shanghai":"pudong"}
小明6 ["lol","book","movie"] {"beijing":"shangxuetang","shanghai":"pudong"}
Time taken: 2.416 seconds, Fetched: row(s)
#直接按照入参执行 输出结果重定向到文件
[root@PCS102 ~]# hive -e "select * from psn2" > aaa
which: no hbase in (/usr/local/jdk1..0_65/bin:/home/cluster/subversion-1.10./bin:/home/cluster/apache-storm-0.9./bin:/usr/local/hadoop-3.1./bin:/usr/local/hadoop-3.1./sbin:/usr/local/apache-hive-3.1.-bin/bin:/usr/local/sbin:/usr/bin:/bin:/usr/sbin:/sbin:/usr/local/openssh/bin:/root/bin)
SLF4J: Class path contains multiple SLF4J bindings.
SLF4J: Found binding in [jar:file:/usr/local/apache-hive-3.1.-bin/lib/log4j-slf4j-impl-2.10..jar!/org/slf4j/impl/StaticLoggerBinder.class]
SLF4J: Found binding in [jar:file:/usr/local/hadoop-3.1./share/hadoop/common/lib/slf4j-log4j12-1.7..jar!/org/slf4j/impl/StaticLoggerBinder.class]
SLF4J: See http://www.slf4j.org/codes.html#multiple_bindings for an explanation.
SLF4J: Actual binding is of type [org.apache.logging.slf4j.Log4jLoggerFactory]
Hive Session ID = 1ee55846-3df2-4fc0-8ce8-501d2202a617 Logging initialized using configuration in jar:file:/usr/local/apache-hive-3.1.-bin/lib/hive-common-3.1..jar!/hive-log4j2.properties Async: true
Hive Session ID = 7549c4cf-d416-406b-82f7-f5012c3f1173
OK
Time taken: 2.59 seconds, Fetched: row(s)
[root@PCS102 ~]# cat aaa
psn2.id psn2.name psn2.likes psn2.address psn2.age
小明1 ["lol","book","movie"] {"beijing":"shangxuetang","shanghai":"pudong"}
小明2 ["lol","book","movie"] {"beijing":"shangxuetang","shanghai":"pudong"}
小明3 ["lol","book","movie"] {"beijing":"shangxuetang","shanghai":"pudong"}
小明4 ["lol","book","movie"] {"beijing":"shangxuetang","shanghai":"pudong"}
小明5 ["lol","book","movie"] {"beijing":"shangxuetang","shanghai":"pudong"}
小明6 ["lol","book","movie"] {"beijing":"shangxuetang","shanghai":"pudong"}
小明1 ["lol","book","movie"] {"beijing":"shangxuetang","shanghai":"pudong"}
小明2 ["lol","book","movie"] {"beijing":"shangxuetang","shanghai":"pudong"}
小明3 ["lol","book","movie"] {"beijing":"shangxuetang","shanghai":"pudong"}
小明4 ["lol","book","movie"] {"beijing":"shangxuetang","shanghai":"pudong"}
小明5 ["lol","book","movie"] {"beijing":"shangxuetang","shanghai":"pudong"}
小明6 ["lol","book","movie"] {"beijing":"shangxuetang","shanghai":"pudong"}
#直接按照入参执行 输出结果重定向到文件 -S静默执行
[root@PCS102 ~]# hive -S -e "select * from psn2" > bbb
which: no hbase in (/usr/local/jdk1..0_65/bin:/home/cluster/subversion-1.10./bin:/home/cluster/apache-storm-0.9./bin:/usr/local/hadoop-3.1./bin:/usr/local/hadoop-3.1./sbin:/usr/local/apache-hive-3.1.-bin/bin:/usr/local/sbin:/usr/bin:/bin:/usr/sbin:/sbin:/usr/local/openssh/bin:/root/bin)
SLF4J: Class path contains multiple SLF4J bindings.
SLF4J: Found binding in [jar:file:/usr/local/apache-hive-3.1.-bin/lib/log4j-slf4j-impl-2.10..jar!/org/slf4j/impl/StaticLoggerBinder.class]
SLF4J: Found binding in [jar:file:/usr/local/hadoop-3.1./share/hadoop/common/lib/slf4j-log4j12-1.7..jar!/org/slf4j/impl/StaticLoggerBinder.class]
SLF4J: See http://www.slf4j.org/codes.html#multiple_bindings for an explanation.
SLF4J: Actual binding is of type [org.apache.logging.slf4j.Log4jLoggerFactory]
Hive Session ID = 991dd630-b1ae-448d-a43c-5870fb7508cc
Hive Session ID = ed0b4ba8-c8ec-4c9b-acba-4815e3e5762a
[root@PCS102 ~]# cat bbb
psn2.id psn2.name psn2.likes psn2.address psn2.age
小明1 ["lol","book","movie"] {"beijing":"shangxuetang","shanghai":"pudong"}
小明2 ["lol","book","movie"] {"beijing":"shangxuetang","shanghai":"pudong"}
小明3 ["lol","book","movie"] {"beijing":"shangxuetang","shanghai":"pudong"}
小明4 ["lol","book","movie"] {"beijing":"shangxuetang","shanghai":"pudong"}
小明5 ["lol","book","movie"] {"beijing":"shangxuetang","shanghai":"pudong"}
小明6 ["lol","book","movie"] {"beijing":"shangxuetang","shanghai":"pudong"}
小明1 ["lol","book","movie"] {"beijing":"shangxuetang","shanghai":"pudong"}
小明2 ["lol","book","movie"] {"beijing":"shangxuetang","shanghai":"pudong"}
小明3 ["lol","book","movie"] {"beijing":"shangxuetang","shanghai":"pudong"}
小明4 ["lol","book","movie"] {"beijing":"shangxuetang","shanghai":"pudong"}
小明5 ["lol","book","movie"] {"beijing":"shangxuetang","shanghai":"pudong"}
小明6 ["lol","book","movie"] {"beijing":"shangxuetang","shanghai":"pudong"}
#直接按照入参执行 输出结果重定向到文件 如果sql有问题 会报错 报错信息输出到linux控制台
[root@PCS102 ~]# hive -e "select * from psn55" > ccc
which: no hbase in (/usr/local/jdk1..0_65/bin:/home/cluster/subversion-1.10./bin:/home/cluster/apache-storm-0.9./bin:/usr/local/hadoop-3.1./bin:/usr/local/hadoop-3.1./sbin:/usr/local/apache-hive-3.1.-bin/bin:/usr/local/sbin:/usr/bin:/bin:/usr/sbin:/sbin:/usr/local/openssh/bin:/root/bin)
SLF4J: Class path contains multiple SLF4J bindings.
SLF4J: Found binding in [jar:file:/usr/local/apache-hive-3.1.-bin/lib/log4j-slf4j-impl-2.10..jar!/org/slf4j/impl/StaticLoggerBinder.class]
SLF4J: Found binding in [jar:file:/usr/local/hadoop-3.1./share/hadoop/common/lib/slf4j-log4j12-1.7..jar!/org/slf4j/impl/StaticLoggerBinder.class]
SLF4J: See http://www.slf4j.org/codes.html#multiple_bindings for an explanation.
SLF4J: Actual binding is of type [org.apache.logging.slf4j.Log4jLoggerFactory]
Hive Session ID = 83a20df1-6f19-414a-a247-cf7dbc6ee58c Logging initialized using configuration in jar:file:/usr/local/apache-hive-3.1.-bin/lib/hive-common-3.1..jar!/hive-log4j2.properties Async: true
Hive Session ID = 8d5bfc04-7e76-46b5-b2a2-13e8ccfc890a
FAILED: SemanticException [Error 10001]: Line 1:14 Table not found 'psn55'
[root@PCS102 ~]# cat ccc #-f 执行文件中的sql 结果输出到linux控制台
[root@PCS102 ~]# hive -f test
which: no hbase in (/usr/local/jdk1..0_65/bin:/home/cluster/subversion-1.10./bin:/home/cluster/apache-storm-0.9./bin:/usr/local/hadoop-3.1./bin:/usr/local/hadoop-3.1./sbin:/usr/local/apache-hive-3.1.-bin/bin:/usr/local/sbin:/usr/bin:/bin:/usr/sbin:/sbin:/usr/local/openssh/bin:/root/bin)
SLF4J: Class path contains multiple SLF4J bindings.
SLF4J: Found binding in [jar:file:/usr/local/apache-hive-3.1.-bin/lib/log4j-slf4j-impl-2.10..jar!/org/slf4j/impl/StaticLoggerBinder.class]
SLF4J: Found binding in [jar:file:/usr/local/hadoop-3.1./share/hadoop/common/lib/slf4j-log4j12-1.7..jar!/org/slf4j/impl/StaticLoggerBinder.class]
SLF4J: See http://www.slf4j.org/codes.html#multiple_bindings for an explanation.
SLF4J: Actual binding is of type [org.apache.logging.slf4j.Log4jLoggerFactory]
Hive Session ID = b5831035-da17--95aa-10c68f729327 Logging initialized using configuration in jar:file:/usr/local/apache-hive-3.1.-bin/lib/hive-common-3.1..jar!/hive-log4j2.properties Async: true
Hive Session ID = 19738ea4-0c4b-473f-8f05-171a16f8ec04
OK
psn2.id psn2.name psn2.likes psn2.address psn2.age
小明1 ["lol","book","movie"] {"beijing":"shangxuetang","shanghai":"pudong"}
小明2 ["lol","book","movie"] {"beijing":"shangxuetang","shanghai":"pudong"}
小明3 ["lol","book","movie"] {"beijing":"shangxuetang","shanghai":"pudong"}
小明4 ["lol","book","movie"] {"beijing":"shangxuetang","shanghai":"pudong"}
小明5 ["lol","book","movie"] {"beijing":"shangxuetang","shanghai":"pudong"}
小明6 ["lol","book","movie"] {"beijing":"shangxuetang","shanghai":"pudong"}
小明1 ["lol","book","movie"] {"beijing":"shangxuetang","shanghai":"pudong"}
小明2 ["lol","book","movie"] {"beijing":"shangxuetang","shanghai":"pudong"}
小明3 ["lol","book","movie"] {"beijing":"shangxuetang","shanghai":"pudong"}
小明4 ["lol","book","movie"] {"beijing":"shangxuetang","shanghai":"pudong"}
小明5 ["lol","book","movie"] {"beijing":"shangxuetang","shanghai":"pudong"}
小明6 ["lol","book","movie"] {"beijing":"shangxuetang","shanghai":"pudong"}
Time taken: 2.569 seconds, Fetched: row(s)
#-i 执行文件中的sql 会进入hive CLI
[root@PCS102 ~]# hive -i test
which: no hbase in (/usr/local/jdk1..0_65/bin:/home/cluster/subversion-1.10./bin:/home/cluster/apache-storm-0.9./bin:/usr/local/hadoop-3.1./bin:/usr/local/hadoop-3.1./sbin:/usr/local/apache-hive-3.1.-bin/bin:/usr/local/sbin:/usr/bin:/bin:/usr/sbin:/sbin:/usr/local/openssh/bin:/root/bin)
SLF4J: Class path contains multiple SLF4J bindings.
SLF4J: Found binding in [jar:file:/usr/local/apache-hive-3.1.-bin/lib/log4j-slf4j-impl-2.10..jar!/org/slf4j/impl/StaticLoggerBinder.class]
SLF4J: Found binding in [jar:file:/usr/local/hadoop-3.1./share/hadoop/common/lib/slf4j-log4j12-1.7..jar!/org/slf4j/impl/StaticLoggerBinder.class]
SLF4J: See http://www.slf4j.org/codes.html#multiple_bindings for an explanation.
SLF4J: Actual binding is of type [org.apache.logging.slf4j.Log4jLoggerFactory]
Hive Session ID = d720d685---a07d-f47d4d078bd7 Logging initialized using configuration in jar:file:/usr/local/apache-hive-3.1.-bin/lib/hive-common-3.1..jar!/hive-log4j2.properties Async: true
Hive Session ID = eb4cbb84---8be6-dd38bac70f2d
小明1 ["lol","book","movie"] {"beijing":"shangxuetang","shanghai":"pudong"}
小明2 ["lol","book","movie"] {"beijing":"shangxuetang","shanghai":"pudong"}
小明3 ["lol","book","movie"] {"beijing":"shangxuetang","shanghai":"pudong"}
小明4 ["lol","book","movie"] {"beijing":"shangxuetang","shanghai":"pudong"}
小明5 ["lol","book","movie"] {"beijing":"shangxuetang","shanghai":"pudong"}
小明6 ["lol","book","movie"] {"beijing":"shangxuetang","shanghai":"pudong"}
小明1 ["lol","book","movie"] {"beijing":"shangxuetang","shanghai":"pudong"}
小明2 ["lol","book","movie"] {"beijing":"shangxuetang","shanghai":"pudong"}
小明3 ["lol","book","movie"] {"beijing":"shangxuetang","shanghai":"pudong"}
小明4 ["lol","book","movie"] {"beijing":"shangxuetang","shanghai":"pudong"}
小明5 ["lol","book","movie"] {"beijing":"shangxuetang","shanghai":"pudong"}
小明6 ["lol","book","movie"] {"beijing":"shangxuetang","shanghai":"pudong"}
Hive-on-MR is deprecated in Hive and may not be available in the future versions. Consider using a different execution engine (i.e. spark, tez) or using Hive .X releases.
#在Hive CLI里执行外面的包含sql的文件
hive> source test;
OK
小明1 ["lol","book","movie"] {"beijing":"shangxuetang","shanghai":"pudong"}
小明2 ["lol","book","movie"] {"beijing":"shangxuetang","shanghai":"pudong"}
小明3 ["lol","book","movie"] {"beijing":"shangxuetang","shanghai":"pudong"}
小明4 ["lol","book","movie"] {"beijing":"shangxuetang","shanghai":"pudong"}
小明5 ["lol","book","movie"] {"beijing":"shangxuetang","shanghai":"pudong"}
小明6 ["lol","book","movie"] {"beijing":"shangxuetang","shanghai":"pudong"}
小明1 ["lol","book","movie"] {"beijing":"shangxuetang","shanghai":"pudong"}
小明2 ["lol","book","movie"] {"beijing":"shangxuetang","shanghai":"pudong"}
小明3 ["lol","book","movie"] {"beijing":"shangxuetang","shanghai":"pudong"}
小明4 ["lol","book","movie"] {"beijing":"shangxuetang","shanghai":"pudong"}
小明5 ["lol","book","movie"] {"beijing":"shangxuetang","shanghai":"pudong"}
小明6 ["lol","book","movie"] {"beijing":"shangxuetang","shanghai":"pudong"}
Time taken: 0.151 seconds, Fetched: row(s)
hive>
3、JDBC方式:hiveserver2
4、web GUI接口 :hwi操作麻烦、基本不用、hue要好一下
(1)下载源码包apache-hive-*-src.tar.gz (注意:新版本里没有hwi,这里举例apache-hive-1.2.1-src.tar.gz)
(2)将hwi/web/*里面所有的文件打成war包
cd /usr/local/apache-hive-1.2.1-src/hwi/web && jar -cvf hive-hwi.war ./*
(3)将hwi war包放在$HIVE_HOME/lib/
cp /usr/local/apache-hive-1.2.1-src/hwi/web/hive-hwi.war /usr/local/apache-hive-3.1.1-bin/lib/
(4)复制tools.jar(在jdk的lib目录下)到$HIVE_HOME/lib下
cp /usr/local/jdk1.8.0_65/lib/tools.jar /usr/local/apache-hive-3.1.1-bin/lib
(5)修改hive-site.xml
<property>
<name>hive.hwi.listen.host</name>
<value>0.0.0.0</value>
</property>
<property>
<name>hive.hwi.listen.port</name>
<value>9999</value>
</property>
<property>
<name>hive.hwi.war.file</name>
<value>lib/hive-hwi.war</value>
</property>
(6)启动hwi服务(端口号9999)
hive --service hwi
(7)浏览器通过以下链接来访问
http://PCS102:9999/hwi/
二、Hive 权限管理
1、三种授权模型:
(1)Storage Based Authorization in the Metastore Server
基于存储的授权 - 可以对Metastore中的元数据进行保护,但是没有提供更加细粒度的访问控制(例如:列级别、行级别)。
(2)SQL Standards Based Authorization in HiveServer2
基于SQL标准的Hive授权 - 完全兼容SQL的授权模型,推荐使用该模式。
(3)Default Hive Authorization (Legacy Mode)
hive默认授权 - 设计目的仅仅只是为了防止用户产生误操作,而不是防止恶意用户访问未经授权的数据。
重点看一下第(2)种授权:Hive - SQL Standards Based Authorization in HiveServer2
--完全兼容SQL的授权模型
--除支持对于用户的授权认证,还支持角色role的授权认证
·role可理解为是一组权限的集合,通过role为用户授权
·一个用户可以具有一个或多个角色
·默认包含两种角色:public、admin
2、限制
(1)启用当前认证方式之后,dfs, add, delete, compile, and reset等命令被禁用。
(2)通过set命令设置hive configuration的方式被限制某些用户使用。
(可通过修改配置文件hive-site.xml中hive.security.authorization.sqlstd.confwhitelist进行配置)
(3)添加、删除函数以及宏的操作,仅为具有admin的用户开放。
(4)用户自定义函数(开放支持永久的自定义函数),可通过具有admin角色的用户创建,其他用户都可以使用。
(5)Transform功能被禁用。
3、配置
在hive服务端修改配置文件hive-site.xml添加以下配置内容:
<property>
<name>hive.security.authorization.enabled</name>
<value>true</value>
</property>
<property>
<name>hive.server2.enable.doAs</name>
<value>false</value>
</property>
<property>
<name>hive.users.in.admin.role</name>
<value>root</value>
</property>
<property>
<name>hive.security.authorization.manager</name>
<value>org.apache.hadoop.hive.ql.security.authorization.plugin.sqlstd.SQLStdHiveAuthorizerFactory</value>
</property>
<property>
<name>hive.security.authenticator.manager</name>
<value>org.apache.hadoop.hive.ql.security.SessionStateUserAuthenticator</value>
</property>
服务端启动hiveserver2;客户端通过beeline进行连接
4、角色的添加、删除、查看、设置
CREATE ROLE role_name; -- 创建角色
DROP ROLE role_name; -- 删除角色
SET ROLE (role_name|ALL|NONE); -- 设置角色
SHOW CURRENT ROLES; -- 查看当前具有的角色
SHOW ROLES; -- 查看所有存在的角色
角色的授予、移除、查看
#将角色授予某个用户、角色
GRANT role_name [, role_name] ...
TO principal_specification [, principal_specification] ...
[ WITH ADMIN OPTION ]; principal_specification
: USER user
| ROLE role #移除某个用户、角色的角色
REVOKE [ADMIN OPTION FOR] role_name [, role_name] ...
FROM principal_specification [, principal_specification] ... ; principal_specification
: USER user
| ROLE role #查看授予某个用户、角色的角色列表
SHOW ROLE GRANT (USER|ROLE) principal_name; #查看属于某种角色的用户、角色列表
SHOW PRINCIPALS role_name;
5、Hive权限管理
权限:
SELECT privilege – gives read access to an object.
INSERT privilege – gives ability to add data to an object (table).
UPDATE privilege – gives ability to run update queries on an object (table).
DELETE privilege – gives ability to delete data in an object (table).
ALL PRIVILEGES – gives all privileges (gets translated into all the above privileges).
权限的授予、移除、查看:
#将权限授予某个用户、角色:
GRANT
priv_type [, priv_type ] ...
ON table_or_view_name
TO principal_specification [, principal_specification] ...
[WITH GRANT OPTION]; #移除某个用户、角色的权限:
REVOKE [GRANT OPTION FOR]
priv_type [, priv_type ] ...
ON table_or_view_name
FROM principal_specification [, principal_specification] ... ; principal_specification
: USER user
| ROLE role priv_type
: INSERT | SELECT | UPDATE | DELETE | ALL #查看某个用户、角色的权限:
SHOW GRANT [principal_name] ON (ALL| ([TABLE] table_or_view_name)