1:排名函数:
row_number():没有并列,相同名次按顺序排。
rank():有并列,相同名次空位。
dense_rank():有并列,相同名次不空位。
2:创建表,加载数据,排序
//创建表
create table t_stud(
uid int,
classid string,
score int
)
row format delimited fields terminated by ','
;
//加载数据
load data local inpath '/usr/local/hivedata/t_stud.txt' into table t_stud;
id classid score
01,class01,91
02,class01,82
03,class01,63
04,class02,84
05,class02,95
06,class02,86
07,class03,77
08,class03,88
09,class03,59
排名:
select *,row_number() over(distribute by classid sort by score desc) rm from t_stud;
查询每个班级排名前3的学生 ,窗口函数使用分桶distribute 来做聚合 分桶可以使用sort by:
select * from (
select *,row_number() over(distribute by classid sort by score desc) rm from t_stud
) rmu
where rmu.rm < 4
;
select * from (
select *,
row_number() over(distribute by classid sort by score desc) rm ,
rank() over(distribute by classid sort by score desc) rk ,
dense_rank() over(distribute by classid sort by score desc) drk
from t_stud
) rmu
where rmu.drk < 4
;
查询每个班级排名前3的学生 ,窗口函数使用分区partition 来做聚合 ,rank() ,dense_rank()不可以使用sort by,使用order by:
select * from (
select *,
row_number() over(partition by classid order by score desc) rm ,
rank() over(partition by classid order by score desc) rk ,
dense_rank() over(partition by classid order by score desc) drk
from t_stud
) rmu
where rmu.drk < 4
;