1、快速建表
drop table if exists dwt.dwt_activity_auction_house_activity_sale_full_1d_0805forliuqian;
create table dwt.dwt_activity_auction_house_activity_sale_full_1d_0805forliuqian like dwt.dwt_activity_auction_house_activity_sale_full_1d;
2、查看复制好的表结构
hive> show create table dwt.dwt_activity_auction_house_activity_sale_full_1d_0805forliuqian;
OK
CREATE TABLE `dwt.dwt_activity_auction_house_activity_sale_full_1d_0805forliuqian`(
`activity_sale_id` bigint COMMENT '拍卖会拍品信息主键id',
`activityid` bigint COMMENT '拍卖会活动id',
`userinfoid` bigint COMMENT '拍卖行id',
`lot` int COMMENT 'lot号',
`title` string COMMENT '标题',
`bidbzj` bigint COMMENT '保证金',
`increase` bigint COMMENT '加价幅度',
`startprize` bigint COMMENT '起拍价',
`min_referenceprice` bigint COMMENT '最小估值',
`max_referenceprice` bigint COMMENT '最大估值',
`category` int COMMENT '分类',
`seccategory` int COMMENT '二级分类',
`seccategory_template` string COMMENT '发拍模板',
`content` string COMMENT '拍品详情',
`illustration` string COMMENT '拍品说明',
`state` int COMMENT '0:草稿;1-待审核;2-审核通过;3-审核驳回;4-已上拍;5-已隐藏;6-已下架',
`upload_time` string COMMENT '上拍时间',
`sale_id` bigint COMMENT '拍品id,sale表id',
`end_time` string COMMENT '截拍时间',
`open_time` string COMMENT '开拍时间',
`sale_status` int COMMENT '拍品状态: -1:unsold 1:等待支付保证金 2:sale 3:deal',
`enable_return` int COMMENT '是否包退 0-不包退;1-包退',
`express_fee` int COMMENT '是否包邮 0-不包邮;1-包邮',
`last_edit_username` string COMMENT '最后编辑人记录',
`last_edit_time` string COMMENT '最后编辑时间',
`reject_reason` string COMMENT '拒绝原因',
`sort_num` bigint COMMENT '排序值',
`is_del` int COMMENT '是否删除',
`create_time` string COMMENT '创建时间',
`update_time` string COMMENT '更新时间')
PARTITIONED BY (
`dt` string)
ROW FORMAT SERDE
'org.apache.hadoop.hive.ql.io.orc.OrcSerde'
STORED AS INPUTFORMAT
'org.apache.hadoop.hive.ql.io.orc.OrcInputFormat'
OUTPUTFORMAT
'org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat'
LOCATION
'/usr/hive/warehouse/dwt.db/dwt_activity_auction_house_activity_sale_full_1d_0805forliuqian'
TBLPROPERTIES (
'orc.compress'='snappy')
Time taken: 0.903 seconds, Fetched: 44 row(s)
跟原始表一模一样,有分区,就是没有数据。
3、拷数据
hadoop fs -cp /usr/hive/warehouse/dwt.db/dwt_activity_auction_house_activity_sale_full_1d/* /usr/hive/warehouse/dwt.db/dwt_activity_auction_house_activity_sale_full_1d_0805forliuqian/
4、修复分区元数据
MSCK REPAIR TABLE dwt.dwt_activity_auction_house_activity_sale_full_1d_0805forliuqian;
5、查询和校验
select dt,count(*)
from dwt.dwt_activity_auction_house_activity_sale_full_1d_0805forliuqian
group by dt;
select dt,count(*)
from dwt.dwt_activity_auction_house_activity_sale_full_1d
group by dt;
6、动态分区数据插入方式
以上步骤3、4也可以通过动态分区数据插入来做
set hive.exec.dynamic.partition=true;
set hive.exec.dynamic.partition.mode=nonstrict;
insert overwrite table ds_parttion partition(state='china',ct)
select id ,city from mytest_tmp2_p;
set hive.exec.dynamici.partition=true;
set hive.exec.dynamic.partition.mode=nonstrict;
insert overwrite table ds_parttion partition(state,ct)
select id ,country,city from mytest_tmp2_p;
使用动态分区表必须配置的参数:
set hive.exec.dynamic.partition =true(默认false),表示开启动态分区功能
set hive.exec.dynamic.partition.mode = nonstrict(默认strict),表示允许所有分区都是动态的,否则必须有静态分区字段
动态分区相关的调优参数:
set hive.exec.max.dynamic.partitions.pernode=100
set hive.exec.max.dynamic.partitions =1000
set hive.exec.max.created.files =10000 (默认) 全局可以创建的最大文件个数,超出报错。