ROC曲线含义参考:
题目:
表1
PREDICTION | OBSERVE_VALUE |
---|---|
0.95 | 1 |
0.82 | 0 |
0.7 | 1 |
0.88 | 1 |
0.6 | 0 |
上表1第一列是预测值,第二列是观测值,如果预测值>=0.8判定为正,否则为负。依次计算出TPR和FPR,得出下表2。
表2
PREDICTION | OBSERVE_VALUE | TPR | FPR |
---|---|---|---|
0.95 | 1.00 | 1.00 | 0.00 |
0.82 | 0.00 | 1.00 | 1.00 |
0.70 | 1.00 | 0.50 | 1.00 |
0.88 | 1.00 | 0.67 | 1.00 |
0.60 | 0.00 | 0.67 | 0.50 |
问:用SQL实现输出表2。
注意:TPR = TP/P; FPR= FP/N
-- ROC曲线的SQL实现
-- PREDICTION >=0.8则预测值为正(TRUE), <0.8为负(FALSE)
-- 基础数据:
CREATE TABLE TABLE1(
PREDICTION DECIMAL(19,2) NULL,
OBSERVE_VALUE INT
)
INSERT INTO TABLE1
VALUES(0.95, 1),
(0.82, 0),
(0.70, 1),
(0.88, 1),
(0.6, 0)
- 基于SQLSERVER的实现方式:
-- 基于SQLSERVER实现
SELECT A.PREDICTION, A.OBSERVE_VALUE
, CONVERT(DECIMAL(19,2),1.00*A.TP/CASE WHEN A.P=0 THEN 1 ELSE A.P END) AS TPR
, CONVERT(DECIMAL(19,2),1.00*A.FP/CASE WHEN A.N=0 THEN 1 ELSE A.N END) AS FPR
FROM (
SELECT A.*, SUM(CASE WHEN OBSERVE_VALUE=1 THEN 1 ELSE 0 END) OVER(ORDER BY RN ROWS BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW) AS P
, SUM(CASE WHEN OBSERVE_VALUE=0 THEN 1 ELSE 0 END) OVER(ORDER BY RN ROWS BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW) AS N
, SUM(CASE WHEN OBSERVE_VALUE=1 AND PRED_VALUE=1 THEN 1 ELSE 0 END) OVER(ORDER BY RN ROWS BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW) AS TP
, SUM(CASE WHEN OBSERVE_VALUE=0 AND PRED_VALUE=1 THEN 1 ELSE 0 END) OVER(ORDER BY RN ROWS BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW) AS FP
FROM (
SELECT ROW_NUMBER() OVER(ORDER BY ROWGROUP) AS RN,* FROM (
SELECT ‘A‘ AS ROWGROUP, *, CASE WHEN PREDICTION>=0.8 THEN 1 ELSE 0 END AS PRED_VALUE FROM TABLE1
) A
) A
) A