表结构
-- 表1
小明 26
小强 45
小司 57
小武 12
小高 80
小陈 99
小张 45
小李 77
小红 93
小赵 90
-- 表2
0
30
60
80
100
-- 建表
create table if not exists score(
name string, score string
)
row format delimited fields terminated by '\t'
stored as textfile
location '/user/hive/warehouse/score';
create table if not exists edge(
val string
)
row format delimited fields terminated by '\t'
stored as textfile
location '/user/hive/warehouse/edge';
要求最终展示的效果如下:
思路
-- 第一步:先构建分数区间
select concat(val, '-', edge_max) as score_range
from (
SELECT val, lead(val, 1) OVER (ORDER BY CAST(val AS int)) AS edge_max
FROM edge
) B1 where edge_max is not null;
如下:
score_range
0-30
30-60
60-80
80-100
-- 第二步:组装想要的数据,获得的是笛卡尔积,后续优化
select A.*,
split(B.score_range, '-')[0] score_min,
split(B.score_range, '-')[1] score_max,
B.score_range
from (select name, score from score) A,
(
select concat(val, '-', edge_max) as score_range
from (
SELECT val, lead(val, 1) OVER (ORDER BY CAST(val AS int)) AS edge_max
FROM edge
) B1
where edge_max is not null
) B;
-- 第三步:过滤数据,得到最终结果
select name,
score,
if(cast(score as int) <= cast((score_min + score_max) as int) / 2 , score_min, score_max) as near_edge,
score_range
from (
select A.*,
split(B.score_range, '-')[0] score_min,
split(B.score_range, '-')[1] score_max,
B.score_range
from (select name, score from score) A,
(
select concat(val, '-', edge_max) as score_range
from (
SELECT val, lead(val, 1) OVER (ORDER BY CAST(val AS int)) AS edge_max
FROM edge
) B1
where edge_max is not null
) B
) T
where cast(T.score as int) > cast(T.score_min as int)
and cast(T.score as int) < cast(T.score_max as int);
学习lead()函数的用法