hive 创建三种文件类型的表

--TextFile
set hive.exec.compress.output=true;
set mapred.output.compress=true;
set mapred.output.compression.codec=org.apache.hadoop.io.compress.GzipCodec;
set io.compression.codecs=org.apache.hadoop.io.compress.GzipCodec;
INSERT OVERWRITE table hzr_test_text_table PARTITION(product='xxx',dt='2013-04-22')
SELECT xxx,xxx.... FROM xxxtable WHERE product='xxx' AND dt='2013-04-22'; --SquenceFile
set hive.exec.compress.output=true;
set mapred.output.compress=true;
set mapred.output.compression.codec=org.apache.hadoop.io.compress.GzipCodec;
set io.compression.codecs=org.apache.hadoop.io.compress.GzipCodec;
set io.seqfile.compression.type=BLOCK;
INSERT OVERWRITE table hzr_test_sequence_table PARTITION(product='xxx',dt='2013-04-22')
SELECT xxx,xxx.... FROM xxxtable WHERE product='xxx' AND dt='2013-04-22'; --RCFile
set hive.exec.compress.output=true;
set mapred.output.compress=true;
set mapred.output.compression.codec=org.apache.hadoop.io.compress.GzipCodec;
set io.compression.codecs=org.apache.hadoop.io.compress.GzipCodec;
INSERT OVERWRITE table hzr_test_rcfile_table PARTITION(product='xxx',dt='2013-04-22')
SELECT xxx,xxx.... FROM xxxtable WHERE product='xxx' AND dt='2013-04-22'; 动态分区插入 set hive.exec.compress.output=true;
set mapred.output.compress=true;
SET hive.exec.dynamic.partition=true;
SET hive.exec.dynamic.partition.mode=nonstrict;
SET hive.exec.max.dynamic.partitions.pernode = 1000;
SET hive.exec.max.dynamic.partitions=1000; INSERT overwrite TABLE t_lxw1234_partitioned PARTITION (month,day)
SELECT url,substr(day,1,7) AS month,day
FROM t_lxw1234; 注意:在PARTITION (month,day)中指定分区字段名即可; 在SELECT子句的最后两个字段,必须对应前面PARTITION (month,day)中指定的分区字段,包括顺序。 4 行转换列: 单表下写法 hive如何将
a b 1
a b 2
a b 3
c d 4
c d 5
c d 6
变为:
a b 1,2,3
c d 4,5,6 select col1,col2,concat_ws(',',collect_set(col3))
from tmp_jiangzl_test
group by col1,col2; ----------------》 已经验证过 OK 添加metastore启动脚本bin/hive-metastore.sh #!/bin/sh
nohup ./hive --service metastore >> metastore.log 2>&1 &
echo $! > hive-metastore.pid
添加hive server启动脚本bin/hive-server.sh nohup ./hive --service hiveserver >> hiveserver.log 2>&1 &
echo $! > hive-server.pid
启动metastore和hive server ./hive-metastore.sh
./hive-server.sh nohup ./hiveserver2 >> hiveserver.log 2>&1 & beeline参数设置:https://cwiki.apache.org/confluence/display/Hive/HiveServer2+Clients
上一篇:熄灯问题


下一篇:PostgreSQL中的执行计划