表模型
1.逆向思维
Not IN
Example:
查询没学过"张三"老师授课的同学的信息
SELECT * FROM student a
WHERE a.sid NOT IN(
SELECT a.sid
FROM student a, sc b, course c , teacher d
WHERE a.sid = b.sid AND b.cid = c.cid AND c.tid = d.tid AND d.tname = '张三'
);
2.在....但不在
IN + NOT IN
Example:
查询学过编号为"01"但是没有学过编号为"02"的课程的同学的信息
SELECT * FROM student a
WHERE a.sid NOT IN(SELECT sid FROM sc b WHERE b.cid = '02')
AND a.sid IN(SELECT sid FROM sc c WHERE c.cid = '01' );
SELECT Student.* FROM Student , SC
WHERE Student.sid = SC.sid AND SC.cid = '01'
AND EXISTS
(SELECT 1 FROM SC SC_2 WHERE SC_2.sid = SC.sid AND SC_2.cid = '02')
ORDER BY Student.sid
3.完全词语(双重否定,去除部分存在)
NOT IN + NOT IN
Example:
查询和"01"号的同学学习的课程完全相同的其他同学的信息
SELECT * FROM student WHERE sid IN(
SELECT sid FROM sc
WHERE sid NOT IN (SELECT sid FROM sc WHERE cid NOT IN(SELECT cid
FROM sc
WHERE sid = '01'))
GROUP BY sid HAVING COUNT(cid) = (SELECT COUNT(cid) FROM sc WHERE sid = '01') AND sid != '01'
)
拆解sql分析
SELECT sid FROM sc WHERE cid NOT IN(SELECT cid
FROM sc
WHERE sid = '01')
这部分就表示图中 2,4
SELECT * FROM student WHERE sid IN(
SELECT sid FROM sc
WHERE sid NOT IN (SELECT sid FROM sc WHERE cid NOT IN(SELECT cid
FROM sc
WHERE sid = '01'))
这部分表示图中1,3,所以最后分组求和判断课程数相同就得到完全一致的。
这题用正向的思维是,不好做的。总是会出现部分相同,使用这种反向双重否定的方式去处理“完全”这种sql最好
4.将列转为行
case when ...then .... else .....end
Example:
显示所有学生的所有课程的成绩以及平均成绩,按平均成绩从高到低排序
SELECT * ,
MAX(CASE c.cname WHEN '语文' THEN b.score ELSE 0 END) 语文,
MAX(CASE c.cname WHEN '数学' THEN b.score ELSE 0 END) 数学,
MAX(CASE c.cname WHEN '英语' THEN b.score ELSE 0 END) 英语,
CAST(AVG(b.score) AS DECIMAL(10,2)) avg_score
FROM student a
LEFT JOIN sc b ON a.sid = b.sid
LEFT JOIN course c ON b.cid = c.cid
GROUP BY a.sid
ORDER BY avg_score DESC
max函数一定要加,使用学生id分组后的语文,数学,英语在一行上
5.求某个范围的总人数
Sum(condition)
Example:
查询各科成绩最高分、最低分和平均分:以如下形式显示:课程ID,课程name,最高分,最低分,平均分,及格率,中等率,优良率,优秀率
-- 及格为>=60,中等为:70-80,优良为:80-90,优秀为:>=90
SELECT * ,
MAX(b.score) 最高分,
MIN(b.score) 最低分,
CAST(AVG(b.score) AS DECIMAL(10,2))平均分,
CAST(SUM(b.score >= 60) /COUNT(*) AS DECIMAL(10,2)) * 100 及格率,
CAST(SUM(b.score >= 70 AND b.score <= 80) / COUNT(*) AS DECIMAL(10,2)) * 100 中等率
FROM course a, sc b
WHERE a.cid = b.cid
GROUP BY a.cid
6.计算排名的方式
1> 通过表自关联+关键字段筛选条件+左外连接
Example:
按各科成绩进行排序,并显示排名
SELECT a.*,COUNT(b.score)+1 FROM sc a
LEFT JOIN sc b
ON a.cid = b.cid AND (a.score < b.score)
GROUP BY a.cid,a.sid
ORDER BY a.cid,COUNT(b.score)
自关联:通过自己与本组的人相比,有多少人小于我并且加一就是排名,这是个数学规律,可以自验证
左外连接:必须使用外连接,如果使用内连接,第一名右边为空,就会被过滤掉
2>定义变量+ case when方式
思想:首先排序好,那么从第一个开始比较,不相等就加+,最后数字就是排名
SELECT cid ,score,rk1,rk2,rk3
from
(SELECT
* ,
IF(@p=b.cid,
CASE
WHEN @刚才=b.score THEN @排名
WHEN @刚才!=b.score THEN @排名:=@排名+1
END,
@排名 := 1
) as rk1,
IF(@p=b.cid,
case
when 1=1 then @排:=@排+1
end,
@排:=1
) as rk2,
IF(@p=b.cid,
CASE
WHEN @刚=b.score THEN @名
WHEN @刚!=b.score THEN @名:=@排
END,
@名 := 1
) as rk3,
@p:=b.cid,
@刚才:=b.score,
@刚:=b.score
FROM sc b,(SELECT @p:=NULL,@刚才:=NULL,@才:=NULL,@排名:=0,@排:=0,@名:=0)r
ORDER BY b.cid ,b.score DESC
)s;
结果
rk1 为 1 ,1,2 —— 重复不空缺
rk2 为 1 ,2,3 —— 不重复不空缺
rk3 为 1 1,3 —— 重复空缺
3>hive 开窗函数(doing)
SELECT ref_host,ref_host_cnts,concat(month,day,hour),
row_number() OVER (PARTITION BY concat(month,day,hour) ORDER BY ref_host_cnts DESC) as rk1,
rank() OVER (PARTITION BY concat(month,day,hour) ORDER BY ref_host_cnts DESC) as rk2,
dense_rank() OVER (PARTITION BY concat(month,day,hour) ORDER BY ref_host_cnts DESC) as rk3
FROM dw_pvs_refererhost_everyhour
7.查询前几名的
思路:先排名然后筛选即可
Example:
SELECT *
FROM (
SELECT s1.*,COUNT(s1.SID) as top
FROM sc s1 LEFT JOIN
sc s2 ON s1.CID = s2.CID AND s1.score < s2.score
GROUP BY s1.CID,s1.SID
ORDER BY s1.CID,top
) AS top_t
WHERE top_t.top < 4
8.多字段交叉去重
去重方式:distinct or group by
思路:自关联+使用条件+NOT IN
多字段交叉去重
9.预测生日,计算年龄
使用函数:
week() year() day() month() now() datediff()
Example:
查询各学生的年龄
SELECT s.*,FLOOR(DATEDIFF(NOW(),s.sage)/365.2422) age
FROM student s
查询下周过生日的学生
SELECT *,WEEK(NOW()) as now_week, WEEK(s.sage) as s_week
FROM student s
WHERE WEEK(s.sage) = WEEK(NOW())+1
总结:
写sql,多使用函数,如果普通思维完成不了使用集合逆向求反的思维