创建表
create table student( s_id string comment ‘学生编号‘, s_name string comment ‘学生姓名‘, s_birth string comment ‘学生生日‘, s_sex string comment ‘学生性别‘) comment ‘学生表‘ row format delimited fields terminated by ‘ ‘;
create table score(
s_id string comment ‘学生编号‘,
c_id string comment ‘课程编号‘,
s_score int comment ‘分数‘) comment ‘课程表‘
row format delimited
fields terminated by ‘ ‘;
create table course(
c_id string comment ‘学生编号‘,
c_name string comment ‘课程名称‘,
t_id string comment ‘教师编号‘) comment ‘课程表‘
row format delimited
fields terminated by ‘ ‘;
create table teacher(
t_id string comment ‘教师编号‘,
t_name string comment ‘教师姓名‘) comment ‘教师表‘
row format delimited
fields terminated by ‘ ‘;
数据
[root@ke04 0516]# cat student 01 赵雷 1990-01-01 男 02 张三 1990-12-21 男 03 迪丽热巴 2000-01-01 女 04 雪见 2005-03-04 女 05 梅超风 1988-04-06 女 06 李云龙 1999-12-12 男 07 蒸煮 1989-07-01 男 08 王菊 1990-01-20 女
[root@ke04 0516]# cat score
01 01 80
01 02 90
01 03 99
02 01 70
02 02 60
02 03 80
03 01 80
03 02 80
03 03 80
04 01 50
04 02 30
04 03 20
05 01 76
05 02 87
06 01 31
06 03 34
07 02 89
07 03 98
[root@ke04 0516]# cat course
01 语文 02
02 数学 01
03 英语 03
[root@ke04 0516]# cat teacher
01 张三
02 李四
03 王五
导入数据到hive
load data local inpath ‘/tmp/bigdata/0516/student‘ into table student; load data local inpath ‘/tmp/bigdata/0516/score‘ into table score; load data local inpath ‘/tmp/bigdata/0516/course‘ into table course; load data local inpath ‘/tmp/bigdata/0516/teacher‘ into table teacher;
1.查询课程编号为‘01‘的课程比‘02‘的课程成绩高的所有学生信息
select stu.*, a.s_score as 01_score, b.s_score as 02_score from student stu join score a on stu.s_id = a.s_id and a.c_id = ‘01‘ join score b on stu.s_id = b.s_id and b.c_id = ‘02‘ where a.s_score > b.s_score; 02 张三 1990-12-21 男 70 60 04 雪见 2005-03-04 女 50 30 方案二: hive不支持子查询 所以查询报错 select stu.* from student stu where stu.s_id in (select a.s_id from score a where a.c_id = ‘01‘ and a.s_score > (select max(b.s_score) from score b where b.c_id = ‘02‘));
2. 查询平均成绩大于等于60分的同学的学生编号和学生姓名和平均成绩
select stu.s_id, stu.s_name, round(avg(score.s_score), 1) as avg from student stu join score b on stu.s_id=b.s_id group by stu.s_id,stu.s_name having avg >=60; select stu.s_id, stu.s_name, tmp.avgscore as score from student stu join (select score.s_id, round(avg(score.s_score), 1) as avgscore from score group by s_id) as tmp on tmp.s_id = stu.s_id where tmp.avgscore >= 60; 01 赵雷 89.7 02 张三 70.0 03 迪丽热巴 80.0 05 梅超风 81.5 07 蒸煮 93.5
3.查询平均成绩小于60分的同学的学生编号和学生姓名和平均成绩:(包括有成绩的和无成绩的)
hive不支持not in, 可以使用1. left join isnull 2.not exist 解决 select stu2.s_id, stu2.s_name, 0 as avgScore from student stu2 where stu2.s_id not in (select distinct(sc2.s_id) from score sc2); 替换一: select stu2.s_id, stu2.s_name, 0 as avgScore from student stu2 left join score sc2 on stu2.s_id = sc2.s_id where sc2.s_id is null; 替换二: select stu2.s_id, stu2.s_name, 0 as avgScore from student stu2 where not exists (select distinct(sc2.s_id) from score sc2 where sc2.s_id = stu2.s_id); 方案一: select stu.s_id, stu.s_name, tmp.avgScore from student as stu join (select sc.s_id, round(avg(sc.s_score), 1) as avgScore from score sc group by sc.s_id having round(avg(sc.s_score), 1) < 60 ) as tmp on stu.s_id = tmp.s_id union all select stu2.s_id, stu2.s_name, 0 as avgScore from student stu2 left join score sc2 on stu2.s_id = sc2.s_id where sc2.s_id is null; 方案二: having不支持别名查询 select stu.s_id, stu.s_name, round(avg(sc.s_score), 1) avgScore from student stu join score sc on stu.s_id = sc.s_id group by stu.s_id, stu.s_name having round(avg(sc.s_score), 1) < 60 union all select stu2.s_id, stu2.s_name, 0 as avgScore from student stu2 where not exists (select distinct(sc2.s_id) from score sc2 where sc2.s_id = stu2.s_id); 08 王菊 0.0 04 雪见 33.3 06 李云龙 32.5
4.查询‘李‘老师的数量
select t_name, count(1) from teacher where t_name like ‘李%‘ group by t_name; 李四 1
5.查询所有同学的学生编号、学生姓名、选课总数、所有课程的总成绩
select stu.s_id, stu.s_name, count(sc.c_id) as total_count, sum(sc.s_score) as total_score from student stu left join score sc on stu.s_id = sc.s_id group by stu.s_id, stu.s_name; 01 赵雷 3 269 02 张三 3 210 03 迪丽热巴 3 240 04 雪见 3 100 05 梅超风 2 163 06 李云龙 2 65 07 蒸煮 2 187 08 王菊 0 NULL
6.查询学过‘张三‘老师授课的同学的信息
select stu.s_id, stu.s_name, stu.s_birth, stu.s_sex from student stu join score on stu.s_id = score.s_id join course on score.c_id = course.c_id join teacher t on course.t_id = t.t_id and t.t_name =‘张三‘; 01 赵雷 1990-01-01 男 02 张三 1990-12-21 男 03 迪丽热巴 2000-01-01 女 04 雪见 2005-03-04 女 05 梅超风 1988-04-06 女 07 蒸煮 1989-07-01 男
7.查询没学过‘张三‘老师授课的同学的信息
select stu.s_id, stu.s_name, stu.s_birth, stu.s_sex from student stu left join (select s_id from score join course on course.c_id = score.c_id join teacher on course.t_id =teacher.t_id and teacher.t_name = ‘张三‘ )tmp on stu.s_id = tmp.s_id where tmp.s_id is null; select stu.s_id, stu.s_name, stu.s_birth, stu.s_sex from student stu where not exists (select s_id from score join course on course.c_id = score.c_id join teacher on course.t_id =teacher.t_id and teacher.t_name = ‘张三‘ where score.s_id = stu.s_id); 06 李云龙 1999-12-12 男 08 王菊 1990-01-20 女
8.查询学习编号为‘01‘并且也学过编号为‘02‘的课程的同学的信息
select stu.s_id, stu.s_name, stu.s_birth, stu.s_sex from student stu join (select s_id from score where c_id = ‘1‘) tmp1 on stu.s_id = tmp1.s_id join (select s_id from score where c_id = ‘2‘) tmp2 on stu.s_id = tmp2.s_id;
9.查询学过编号为‘01‘但是没有学过编号为‘02‘的课程的同学的信息
select stu.* from student stu join (select s_id from score where c_id = ‘1‘)tmp1 on stu.s_id = tmp1.s_id left join (select s_id from score where c_id = ‘2‘)tmp2 on stu.s_id = tmp2.s_id where tmp2.s_id is null;
10.查询没有学全所有课程的同学的信息
select stu.* from student stu join (select count(c_id) num from course) tmp1 left join (select s_id, count(c_id) num from score group by s_id) tmp2 on stu.s_id = tmp2.s_id and tmp1.num = tmp2.num where tmp2.s_id is null; select stu.* from student stu left join (select s_id, count(c_id) num from score group by s_id) tmp2 on stu.s_id = tmp2.s_id join (select count(c_id) num from course) tmp1 on tmp1.num = tmp2.num where tmp2.s_id is null;
11.查询至少有一门课与学号为‘01‘的同学所学相同的同学的信息
select stu.* from student stu join (select s_id, c_id from score) tmp1 join (select c_id from score where score.s_id=‘01‘) tmp2 on tmp1.c_id = tmp2.c_id and stu.s_id = tmp1.s_id group by stu.s_id, stu.s_name, stu.s_birth, stu.s_sex; 01 赵雷 1990-01-01 男 02 张三 1990-12-21 男 03 迪丽热巴 2000-01-01 女 04 雪见 2005-03-04 女 05 梅超风 1988-04-06 女 06 李云龙 1999-12-12 男 07 蒸煮 1989-07-01 男
12.查询和‘01‘号的同学学习的课程完全相同的其他同学的信息
select stu.*, tmp1.course_id from student stu join (select s_id, concat_ws(‘|‘, collect_set(c_id)) course_id from score group by s_id having s_id not in (‘1‘)) tmp1 on stu.s_id = tmp1.s_id join (select concat_ws(‘|‘, collect_set(c_id)) course_id from score where s_id = ‘1‘) tmp2 on tmp2.course_id = tmp1.course_id;
13.查询没学过‘张三‘老师讲授的任一门课程的学生姓名
select stu.* from student stu join (select s_id from score tmp1 join (select c_id from course join teacher on course.t_id = teacher.t_id where teacher.t_name=‘张三‘) tmp2 on tmp1.c_id = tmp2.c_id) tmp3 on stu.s_id = tmp3.s_id where tmp3.s_id is null;