不分发数据,使用单个reducer
set mapred.reduce.tasks=; select *
from dw.dw_app
where
dt>='2016-09-01'
and dt <='2016-09-18'
order by stime
limit ;
包多一层,是用order by
select t.* from
(
select *
from dw.dw_app
where
dt>='2016-09-01'
and dt <='2016-09-18'
and app_id=''
and msgtype = 'role.recharge'
) t
order by t.stime
limit ;
把所有具有相同的行最终都在一个reducer分区中,在在一个reducer中排序。 cluster by column=distribute by column+sort by colum
select *
from dw.dw_app
where
dt>='2016-09-01'
and dt <='2016-09-18'
and app_id=''
and msgtype = 'role.recharge'
cluster by dt
limit ;
查询每天前十名充值用户和充值总额
select t3.*
from (select t2.*
from (select dt,
account_id,
sum(recharge_money) as total_money,
row_number() over(partition by dt order by sum(recharge_money) desc) rank
from (select dt, account_id, recharge_money
from dw.dw_app
where dt >= '2016-09-01'
and dt <= '2016-09-18'
and app_id = ''
and msgtype = 'role.recharge'
cluster by dt, account_id) t
group by dt, account_id) t2
where t2.rank <= ) t3
order by t3.dt asc, rank asc limit ;