创建分区表
spark.sql(
"""
|create table mro_ns2_hive_db.tmp_mro_msisdn_njy_xgboots_105_ts_partitions
|(
|sc_longitude double
|,sc_latitude double
|,sc_pci double
|,sc_freq double
|,scrsrp double
|,scrsrq double
|,nc1pci double
|,nc1freq double
|,nc1rsrp double
|,nc1rsrq double
|,nc2pci double
|,nc2freq double
|,nc2rsrp double
|,nc2rsrq double
|,nc3pci double
|,nc3freq double
|,nc3rsrp double
|,nc3rsrq double
|,sctadv double
|,longitude double
|,latitude double
|,time_stamp string
|,msisdn string
|)
|partitioned by (key Int)
|ROW FORMAT DELIMITED
|FIELDS TERMINATED BY '\t'
|STORED AS TEXTFILE
""".stripMargin)
注意这里的分区字段不能在()中出现
动态插入
spark.sql(
"""
|set hive.exec.dynamic.partition.mode=nonstrict
|set hive.exec.dynamic.partition=true
""".stripMargin)
这里是设置动态插入
spark.sql(
"""
|insert overwrite table mro_ns2_hive_db.tmp_mro_msisdn_njy_xgboots_105_ts_partitions
|PARTITION(key)
|select
|CAST( sc_longitude as DECIMAL(10,7)) as sc_longitude
|,CAST( sc_latitude as DECIMAL(10,7)) as sc_latitude
|,CAST( sc_pci as decimal) as sc_pci
|,CAST( sc_freq as decimal ) as sc_freq
|,CAST( scrsrp as decimal ) as scrsrp
|,CAST( scrsrq as decimal ) as scrsrq
|,CAST( nc1pci as decimal ) as nc1pci
|,CAST( nc1freq as decimal ) as nc1freq
|,CAST( nc1rsrp as decimal ) as nc1rsrp
|,CAST( nc1rsrq as decimal ) as nc1rsrq
|,CAST( nc2pci as decimal ) as nc2pci
|,CAST( nc2freq as decimal ) as nc2freq
|,CAST( nc2rsrp as decimal ) as nc2rsrp
|,CAST( nc2rsrq as decimal ) as nc2rsrq
|,CAST( nc3pci as decimal ) as nc3pci
|,CAST( nc3freq as decimal ) as nc3freq
|,CAST( nc3rsrp as decimal ) as nc3rsrp
|,CAST( nc3rsrq as decimal ) as nc3rsrq
|,CAST(sctadv AS decimal) as sctadv
|,case when instr(longitude ,'.')>0 then CAST(longitude AS DECIMAL(10,7))
|else (CAST(longitude AS DECIMAL(10,7)) * 360 / 16777216.0) end as longitude
|,case when instr(latitude ,'.')>0 then CAST( latitude as DECIMAL(10,7))
|else (CAST(latitude as DECIMAL(10,7)) * 90 / 8388608.0) end as latitude
|,time_stamp
|,msisdn
|,key(CAST( sc_longitude as DECIMAL(10,7)),CAST( sc_latitude as DECIMAL(10,7))) as key
|from mro_ns2_hive_db.tmp_mro_msisdn_njy_xgboots_105_ts
|where city_id=571
|distribute by cast(rand()*4 as int)
""".stripMargin)
注意这里分区字段需要在最后