HQL 50题
1.建表
create table study.student (
student_id string -- 学生编号
,name string -- 学生姓名
,birthday string -- 学生生日
,sex string -- 学生性别
)
stored as parquet
tblproperties("orc.compress"="snappy");
create table study.teacher (
teacher_id string -- 教师编号
,name string -- 教师姓名
)
stored as parquet
tblproperties("orc.compress"="snappy");
create table study.course (
course_id string -- 课程编号
,name string -- 课程名
,teacher_id string -- 课程对应的教师编号
)
stored as parquet
tblproperties("orc.compress"="snappy");
create table study.score (
student_id string -- 学生编号
,course_id string -- 课程编号
,score int -- 对应的成绩
)
stored as parquet
tblproperties("orc.compress"="snappy");
2.导数据
insert overwrite table study.student VALUES
('01' , '赵雷' , '1990-01-01' , '男'),
('02' , '钱电' , '1990-12-21' , '男'),
('03' , '孙风' , '1990-12-20' , '男'),
('04' , '李云' , '1990-12-06' , '男'),
('05' , '周梅' , '1991-12-01' , '女'),
('06' , '吴兰' , '1992-01-01' , '女'),
('07' , '郑竹' , '1989-01-01' , '女'),
('09' , '张三' , '2017-12-20' , '女'),
('10' , '李蛋' , '2017-12-25' , '女'),
('11' , '李四' , '2012-06-06' , '女'),
('12' , '赵六' , '2013-06-13' , '女'),
('13' , '孙七' , '2014-06-01' , '女');
insert overwrite table study.teacher VALUES
('01' , '张老师'),
('02' , '李老师'),
('03' , '王老师');
insert overwrite table study.course VALUES
('01' , '语文' , '02'),
('02' , '数学' , '01'),
('03' , '英语' , '03');
insert overwrite table study.score VALUES
('01' , '01' , 80),
('01' , '02' , 90),
('01' , '03' , 99),
('02' , '01' , 70),
('02' , '02' , 60),
('02' , '03' , 80),
('03' , '01' , 80),
('03' , '02' , 80),
('03' , '03' , 80),
('04' , '01' , 50),
('04' , '02' , 30),
('04' , '03' , 20),
('05' , '01' , 76),
('05' , '02' , 87),
('06' , '01' , 31),
('06' , '03' , 34),
('07' , '02' , 89),
('07' , '03' , 98);
3.答题
3.1 查询所有教师的全部信息(教师编号和姓名)
SELECT * FROM teacher;
3.2 输出所有学生中男生的全部信息
SELECT * FROM student WHERE sex = '男';
3.3 查询所有学生中男生的全部信息,按照生日排降序
SELECT * FROM student WHERE sex = '男' ORDER BY birthday DESC;
3.4 查询所有学生的全部信息,先按照性别排序,再按照生日排降序
SELECT * FROM student ORDER BY sex,birthday DESC;
3.5 求出学生总数
SELECT count(1) FROM student;
3.6 查询学生中男生、女生人数
SELECT sex,count(1) FROM student GROUP BY sex;
3.7 参加考试的学生中,查出每个学生的学生编号、选了几门课
SELECT student_id,count(course_id) FROM score GROUP BY student_id;
3.8 检索至少选修三门课程的学生学号
SELECT student_id,count(course_id) AS num FROM score GROUP BY student_id HAVING num >= 3;
或者
SELECT student_id FROM
(SELECT student_id,count(course_id) AS num FROM score GROUP BY student_id) t
WHERE num >= 3;
3.9 查询存在不及格的课程编号
SELECT DISTINCT(course_id) FROM score WHERE score < 60;
或
SELECT course_id FROM score WHERE score < 60 GROUP BY course_id;
3.10 输出所有课程的课程编号、课程名、对应的教师姓名
SELECT t1.*,t2.name FROM course t1 LEFT JOIN teacher t2 ON t1.teacher_id = t2.teacher_id;
3.11 求每门课程的学生人数及平均分,输出课程编号、对应学生人数、平均分
SELECT course_id,count(1) student_num,avg(score) avg_score FROM score GROUP BY course_id;
3.12 求每门课程的学生人数及平均分,输出课程名称、对应学生人数、平均分
SELECT t1.name course_name,t2.student_num,t2.avg_score FROM course t1
LEFT JOIN
(SELECT
course_id,
count(1) student_num,
avg(score) avg_score
FROM score
GROUP BY course_id) t2 ON t1.course_id = t2.course_id;
3.13 查询同时学习" 01 “课程和” 02 "课程的学生编号及01和02课程分数
SELECT t1.student_id,t1.score score1,t2.score score2 FROM score t1
JOIN score t2 on t1.student_id = t2.student_id
WHERE t1.course_id = '01' AND t2.course_id = '02';
或
select a.student_id
,a.score as score_01
,b.score as score_02
from
(
select *
from score
where course_id = '01'
) a
join
(
select *
from score
where course_id = '02'
) b
on a.student_id = b.student_id;
3.14 查询" 01 “课程比” 02 "课程成绩高的学生编号及01和02课程分数
SELECT * FROM
(SELECT t1.student_id,t1.score score1,t2.score score2 FROM score t1
JOIN score t2 on t1.student_id = t2.student_id
WHERE t1.course_id = '01' AND t2.course_id = '02') t WHERE score1 > score2;
3.15 查询" 01 “课程比” 02 "课程成绩高的学生姓名及01和02课程分数
SELECT t2.name,t.score1,t.score2 FROM
(SELECT t1.student_id,t1.score score1,t2.score score2 FROM score t1
JOIN score t2 on t1.student_id = t2.student_id
WHERE t1.course_id = '01' AND t2.course_id = '02') t
LEFT JOIN student t2 ON t.student_id = t2.student_id WHERE score1 > score2;
3.16 查询选择了 "01"课程但没选择 "02"课程的学生姓名
SELECT t1.name FROM student t1 JOIN
(SELECT a.student_id FROM
(SELECT * FROM score WHERE course_id = '01') a
LEFT JOIN (SELECT * FROM score WHERE course_id = '02') b
ON a.student_id = b.student_id WHERE b.student_id IS NULL) t2 ON t1.student_id = t2.student_id;
--hql中 in和not in后不支持子查询,所以只能用join来解决
3.17 查询学过 ‘张老师’ 老师课程的所有同学姓名、生日、性别
SELECT t1.name,t1.birthday,t1.sex FROM student t1 LEFT JOIN(
SELECT t1.* FROM score t1 LEFT JOIN
(SELECT t2.course_id FROM teacher t1
LEFT JOIN course t2 ON t1.teacher_id = t2.teacher_id
WHERE t1.name = '张老师') t2 ON t1.course_id = t2.course_id
WHERE t2.course_id IS NOT NULL) t2 ON t1.student_id = t2.student_id
WHERE t2.student_id IS NOT NULL;
3.18 查询学习 "01"课程但没有学习 "02"课程学生的学生编号以及"01"课程成绩
SELECT a.student_id,a.score FROM
(SELECT * FROM score WHERE course_id = '01') a
LEFT JOIN (SELECT * FROM score WHERE course_id = '02') b ON a.student_id = b.student_id
WHERE b.student_id is NULL;
3.19 查询学习 "02"课程但没有学习 "01"课程学生的学生编号以及"02"课程成绩
SELECT a.student_id,a.score FROM
(SELECT * FROM score WHERE course_id = '02') a
LEFT JOIN (SELECT * FROM score WHERE course_id = '01') b ON a.student_id = b.student_id
WHERE b.student_id is NULL;
3.20 查询选课的同学的学生姓名、选课总数、所有课程的成绩总和、课程平均分
SELECT t1.name,t2.course_num,t2.avg_score,t2.sum_score FROM student t1 RIGHT JOIN(
SELECT student_id,count(course_id) course_num,sum(score) sum_score,avg(score) avg_score
FROM score GROUP BY student_id) t2 ON t1.student_id = t2.student_id;
3.21 查询考试平均分大于 60 分同学的学生姓名、选课总数、所有课程的成绩总和、课程平均分,按照科目数排降序、科目数相同按照分数排降序
SELECT * FROM (
SELECT t1.name,t2.course_num,t2.avg_score,t2.sum_score FROM student t1 RIGHT JOIN(
SELECT student_id,count(course_id) course_num,sum(score) sum_score,avg(score) avg_score
FROM score GROUP BY student_id) t2 ON t1.student_id = t2.student_id) t WHERE t.avg_score > 60
ORDER BY t.course_num DESC,t.sum_score DESC;
3.22 检索 “01” 课程分数小于 60 分的学生信息及"01" 课程分数,按照分数排降序
SELECT t1.student_id,t1.name,t1.name,t1.birthday,t2.score FROM student t1 LEFT JOIN (
SELECT * FROM score WHERE course_id = '01' AND score < 60) t2
ON t1.student_id = t2.student_id WHERE t2.student_id IS NOT NULL
ORDER BY t2.score DESC;
3.23 查询两门及其以上不及格课程的同学的姓名及其平均成绩
SELECT t1.name,t2.avg_score FROM student t1
RIGHT JOIN (
SELECT * FROM (
SELECT student_id,count(course_id) course_num,avg(score) avg_score
FROM score WHERE score < 60 GROUP BY student_id) t WHERE course_num >= 2) t2
ON t1.student_id = t2.student_id;
3.24 查询没有学全所有课程的同学的编号 (包含无选课的同学)
--先从成绩表找出选了所有课的同学
--因为hive的in和not in 不支持子查询,所以只能再通过left join一次成绩表找出没选所有课的同学
--通过学生表左联成绩表找出一门课都没选的同学
--union两张表 ->完成
--(因为hive版本问题,我使用的版本好像不支union只支持union all,所以只能再在外面包一层DISTINCT来去重
-- 这样会影响效率:DISTINCT在mapreduc阶段,map阶段不能用combine消重,数据输出为(key,value)形式然后在reduce阶段进行消重。
--重点是,Hive在处理COUNT这种“全聚合(full aggregates)”计算时,它会忽略用户指定的Reduce Task数,而强制使用1。)
SELECT DISTINCT(student_id) FROM (
SELECT t1.student_id FROM score t1 LEFT JOIN(
SELECT t.student_id FROM
(SELECT student_id,count(course_id) num FROM score GROUP BY student_id) t
JOIN (SELECT count(course_id) num FROM course) t2 ON t.num = t2.num) t2
ON t1.student_id = t2.student_id
WHERE t2.student_id IS NULL GROUP BY t1.student_id
UNION ALL
SELECT t.student_id FROM
(SELECT t1.student_id FROM student t1
LEFT JOIN score t2 ON t1.student_id = t2.student_id WHERE t2.student_id IS NULL) t) s
3.25 查询 1990 年出生的学生名单
SELECT * FROM student WHERE birthday LIKE '%1990%';
3.26 查询名字中含有「风」字的学生信息
SELECT * FROM student WHERE name LIKE '%风%';
3.27 查询「李」姓老师的数量
SELECT count(1) FROM teacher WHERE name LIKE '李%';
3.28 查询至少有两门课与学号为" 01 "的学生所学相同的学生id
SELECT student_id FROM (
SELECT t1.student_id,count(t1.course_id) AS num FROM score t1
LEFT JOIN
(SELECT * FROM score WHERE student_id = '01') t2
ON t1.course_id = t2.course_id WHERE t1.student_id <> '01' GROUP BY t1.student_id) t
WHERE t.num >= 2;
3.29 查询选修了全部课程的学生id的姓名
SELECT a.name FROM student a LEFT JOIN (
SELECT * FROM (
SELECT student_id,count(course_id) num1 FROM score GROUP BY student_id) t1
LEFT JOIN(
SELECT count(course_id) num2 FROM course) t2 ON t1.num1 = t2.num2 WHERE t2.num2 IS NOT NULL) b
ON a.student_id = b.student_id WHERE b.student_id IS NOT NULL;
3.30 查询和" 01 "号的同学学习的课程完全相同的其他同学的学生id
SELECT a.student_id FROM (
SELECT t1.student_id,count(t1.course_id) num1 FROM score t1 LEFT JOIN(
SELECT * FROM score WHERE student_id = '01') t2
ON t1.course_id = t2.course_id GROUP BY t1.student_id) a
LEFT JOIN (SELECT count(1) num2 FROM score WHERE student_id = '01') b ON a.num1 = b.num2
WHERE b.num2 IS NOT NULL AND a.student_id <> '01';
3.31 统计各科成绩各分数段人数:课程编号,课程名称,[100-85],[85-70],[70-60],[60-0] 及所占百分比
-根据课程号在成绩表中查询分数段的个数除以该学习该课程的总数
SELECT t1.course_id,t1.name,t2.first,t2.second,t2.third,t2.fourth FROM course t1 LEFT JOIN(
SELECT course_id,
round(sum(CASE WHEN score > 85 THEN 1 ELSE 0 END)/count(1),2) AS first,
round(sum(CASE WHEN score BETWEEN 70 AND 84 THEN 1 ELSE 0 END)/count(1),2) AS second,
round(sum(CASE WHEN score BETWEEN 60 AND 69 THEN 1 ELSE 0 END)/count(1),2) AS third,
round(sum(CASE WHEN score <60 THEN 1 ELSE 0 END)/count(1),2) AS fourth
FROM (
SELECT course_id,score FROM score) a
GROUP BY course_id) t2 ON t1.course_id = t2.course_id
3.32 查询学生的总成绩,并进行排名
--使用了hive的开窗函数 RANK() VOER(...)
SELECT student_id,
sum(score) sum_score,
RANK() OVER(ORDER BY sum(score) DESC) AS stu_rank
FROM score GROUP BY student_id
3.33 查询各科成绩前三名的记录
SELECT * FROM (
SELECT student_id,course_id,score,
rank() OVER(PARTITION BY course_id ORDER BY score DESC) AS stu_rank
FROM score) t WHERE t.stu_rank <= 3
3.34 查询出只选修两门课程的学生学号和姓名
SELECT t1.student_id,t1.name FROM student t1 LEFT JOIN(
SELECT student_id,count(course_id) course_num FROM score GROUP BY student_id) t2
ON t1.student_id = t2.student_id WHERE t2.course_num = 2
3.35 查询每门课程的平均成绩,结果按平均成绩降序排列,平均成绩相同时,按课程编号升序排列
SELECT course_id,avg(score) avg_score FROM score GROUP BY course_id
ORDER BY avg_score DESC,course_id ASC
3.36 查询平均成绩大于等于 85 的所有学生的学号、姓名和平均成绩
SELECT t1.student_id,t1.name,t2.avg_score FROM student t1 LEFT JOIN(
SELECT student_id,avg(score) avg_score FROM score GROUP BY student_id) t2
ON t1.student_id = t2.student_id WHERE t2.avg_score >= 85;
3.37 查询课程名称为「数学」,且分数低于 60 的学生姓名和分数
SELECT a.name,b.score FROM student a LEFT JOIN(
SELECT t1.student_id,t1.score FROM score t1 LEFT JOIN(
SELECT * FROM course WHERE name = '数学') t2 ON t1.course_id = t2.course_id
WHERE t1.score < 60 AND t2.course_id IS NOT NULL) b ON a.student_id = b.student_id
WHERE b.score IS NOT NULL
3.38 查询课程编号为 01 且课程成绩在 80 分及以上的学生的学号和姓名
SELECT t1.student_id,t1.name FROM student t1 LEFT JOIN(
SELECT * FROM score WHERE course_id = '01' AND score >= 80) t2
ON t1.student_id = t2.student_id WHERE t2.student_id IS NOT NULL
3.39 查询选修「张三」老师所授课程的学生中,成绩最高的学生编号、课程编号及其成绩
SELECT a.student_id,a.course_id,a.score FROM score a RIGHT JOIN(
SELECT t1.course_id FROM course t1 RIGHT JOIN(
SELECT * FROM teacher WHERE name = '张老师') t2
ON t1.teacher_id =t2.teacher_id) b ON a.course_id = b.course_id
ORDER BY a.score DESC LIMIT 1;
3.40 统计每门课程的学生选修人数(超过 5 人的课程才统计)
SELECT t1.course_id,t1.name,t2.count_num FROM course t1 LEFT JOIN(
SELECT course_id,count(student_id) count_num FROM score
GROUP BY course_id HAVING count(student_id) > 5) t2
ON t1.course_id = t2.course_id
3.41 查询该学生有不同课程但成绩相同的学生编号、课程编号、学生成绩
SELECT t.student_id,t.course_id,t.score FROM (
SELECT t1.student_id,t1.course_id,t1.score FROM score t1 LEFT JOIN
score t2 ON t1.student_id = t2.student_id
AND t1.score = t2.score
WHERE t1.course_id <> t2.course_id) t
GROUP BY t.student_id,t.course_id,t.score
文章转载自BB侠的大数据之旅,如果涉嫌侵权,请发送邮件至:contact@modb.pro进行举报,并提供相关证据,一经查实,墨天轮将立刻删除相关内容。




