R 相关性分析
1. 相关性矩阵计算:
> setwd("E:\\Rwork")
> data("mtcars")
> head(mtcars)
mpg cyl disp hp drat wt qsec vs am gear carb
Mazda RX4 21.0 6 160 110 3.90 2.620 16.46 0 1 4 4
Mazda RX4 Wag 21.0 6 160 110 3.90 2.875 17.02 0 1 4 4
Datsun 710 22.8 4 108 93 3.85 2.320 18.61 1 1 4 1
Hornet 4 Drive 21.4 6 258 110 3.08 3.215 19.44 1 0 3 1
Hornet Sportabout 18.7 8 360 175 3.15 3.440 17.02 0 0 3 2
Valiant 18.1 6 225 105 2.76 3.460 20.22 1 0 3 1
> cor(mtcars$mpg,mtcars$cyl)
[1] -0.852162
> matcar.cor <- cor(mtcars)
> matcar.cor
mpg cyl disp hp drat wt
mpg 1.0000000 -0.8521620 -0.8475514 -0.7761684 0.68117191 -0.8676594
cyl -0.8521620 1.0000000 0.9020329 0.8324475 -0.69993811 0.7824958
disp -0.8475514 0.9020329 1.0000000 0.7909486 -0.71021393 0.8879799
hp -0.7761684 0.8324475 0.7909486 1.0000000 -0.44875912 0.6587479
drat 0.6811719 -0.6999381 -0.7102139 -0.4487591 1.00000000 -0.7124406
wt -0.8676594 0.7824958 0.8879799 0.6587479 -0.71244065 1.0000000
qsec 0.4186840 -0.5912421 -0.4336979 -0.7082234 0.09120476 -0.1747159
vs 0.6640389 -0.8108118 -0.7104159 -0.7230967 0.44027846 -0.5549157
am 0.5998324 -0.5226070 -0.5912270 -0.2432043 0.71271113 -0.6924953
gear 0.4802848 -0.4926866 -0.5555692 -0.1257043 0.69961013 -0.5832870
carb -0.5509251 0.5269883 0.3949769 0.7498125 -0.09078980 0.4276059
qsec vs am gear carb
mpg 0.41868403 0.6640389 0.59983243 0.4802848 -0.55092507
cyl -0.59124207 -0.8108118 -0.52260705 -0.4926866 0.52698829
disp -0.43369788 -0.7104159 -0.59122704 -0.5555692 0.39497686
hp -0.70822339 -0.7230967 -0.24320426 -0.1257043 0.74981247
drat 0.09120476 0.4402785 0.71271113 0.6996101 -0.09078980
wt -0.17471588 -0.5549157 -0.69249526 -0.5832870 0.42760594
qsec 1.00000000 0.7445354 -0.22986086 -0.2126822 -0.65624923
vs 0.74453544 1.0000000 0.16834512 0.2060233 -0.56960714
am -0.22986086 0.1683451 1.00000000 0.7940588 0.05753435
gear -0.21268223 0.2060233 0.79405876 1.0000000 0.27407284
carb -0.65624923 -0.5696071 0.05753435 0.2740728 1.00000000
2. 相关系数的显著性水平
- 使用Hmisc 包,计算矩阵相关系数及其对应的显著性水平
> library(Hmisc)
> res <- rcorr(as.matrix(mtcars))
> res
mpg cyl disp hp drat wt qsec vs am gear carb
mpg 1.00 -0.85 -0.85 -0.78 0.68 -0.87 0.42 0.66 0.60 0.48 -0.55
cyl -0.85 1.00 0.90 0.83 -0.70 0.78 -0.59 -0.81 -0.52 -0.49 0.53
disp -0.85 0.90 1.00 0.79 -0.71 0.89 -0.43 -0.71 -0.59 -0.56 0.39
hp -0.78 0.83 0.79 1.00 -0.45 0.66 -0.71 -0.72 -0.24 -0.13 0.75
drat 0.68 -0.70 -0.71 -0.45 1.00 -0.71 0.09 0.44 0.71 0.70 -0.09
wt -0.87 0.78 0.89 0.66 -0.71 1.00 -0.17 -0.55 -0.69 -0.58 0.43
qsec 0.42 -0.59 -0.43 -0.71 0.09 -0.17 1.00 0.74 -0.23 -0.21 -0.66
vs 0.66 -0.81 -0.71 -0.72 0.44 -0.55 0.74 1.00 0.17 0.21 -0.57
am 0.60 -0.52 -0.59 -0.24 0.71 -0.69 -0.23 0.17 1.00 0.79 0.06
gear 0.48 -0.49 -0.56 -0.13 0.70 -0.58 -0.21 0.21 0.79 1.00 0.27
carb -0.55 0.53 0.39 0.75 -0.09 0.43 -0.66 -0.57 0.06 0.27 1.00
n= 32
P
mpg cyl disp hp drat wt qsec vs am gear
mpg 0.0000 0.0000 0.0000 0.0000 0.0000 0.0171 0.0000 0.0003 0.0054
cyl 0.0000 0.0000 0.0000 0.0000 0.0000 0.0004 0.0000 0.0022 0.0042
disp 0.0000 0.0000 0.0000 0.0000 0.0000 0.0131 0.0000 0.0004 0.0010
hp 0.0000 0.0000 0.0000 0.0100 0.0000 0.0000 0.0000 0.1798 0.4930
drat 0.0000 0.0000 0.0000 0.0100 0.0000 0.6196 0.0117 0.0000 0.0000
wt 0.0000 0.0000 0.0000 0.0000 0.0000 0.3389 0.0010 0.0000 0.0005
qsec 0.0171 0.0004 0.0131 0.0000 0.6196 0.3389 0.0000 0.2057 0.2425
vs 0.0000 0.0000 0.0000 0.0000 0.0117 0.0010 0.0000 0.3570 0.2579
am 0.0003 0.0022 0.0004 0.1798 0.0000 0.0000 0.2057 0.3570 0.0000
gear 0.0054 0.0042 0.0010 0.4930 0.0000 0.0005 0.2425 0.2579 0.0000
carb 0.0011 0.0019 0.0253 0.0000 0.6212 0.0146 0.0000 0.0007 0.7545 0.1290
carb
mpg 0.0011
cyl 0.0019
disp 0.0253
hp 0.0000
drat 0.6212
wt 0.0146
qsec 0.0000
vs 0.0007
am 0.7545
gear 0.1290
carb
> signif(res$r, 2)
mpg cyl disp hp drat wt qsec vs am gear carb
mpg 1.00 -0.85 -0.85 -0.78 0.680 -0.87 0.420 0.66 0.600 0.48 -0.550
cyl -0.85 1.00 0.90 0.83 -0.700 0.78 -0.590 -0.81 -0.520 -0.49 0.530
disp -0.85 0.90 1.00 0.79 -0.710 0.89 -0.430 -0.71 -0.590 -0.56 0.390
hp -0.78 0.83 0.79 1.00 -0.450 0.66 -0.710 -0.72 -0.240 -0.13 0.750
drat 0.68 -0.70 -0.71 -0.45 1.000 -0.71 0.091 0.44 0.710 0.70 -0.091
wt -0.87 0.78 0.89 0.66 -0.710 1.00 -0.170 -0.55 -0.690 -0.58 0.430
qsec 0.42 -0.59 -0.43 -0.71 0.091 -0.17 1.000 0.74 -0.230 -0.21 -0.660
vs 0.66 -0.81 -0.71 -0.72 0.440 -0.55 0.740 1.00 0.170 0.21 -0.570
am 0.60 -0.52 -0.59 -0.24 0.710 -0.69 -0.230 0.17 1.000 0.79 0.058
gear 0.48 -0.49 -0.56 -0.13 0.700 -0.58 -0.210 0.21 0.790 1.00 0.270
carb -0.55 0.53 0.39 0.75 -0.091 0.43 -0.660 -0.57 0.058 0.27 1.000
> CorMatrix <- function(cor,p) {
+ ut <- upper.tri(cor)
+ data.frame(row = rownames(cor)[row(cor)[ut]] ,
+ column = rownames(cor)[col(cor)[ut]],
+ cor =(cor)[ut],
+ p = p[ut] )
+ }
>
>
> res <- rcorr(as.matrix(mtcars))
> CorMatrix (res$r, res$P)
row column cor p
1 mpg cyl -0.85216196 6.112688e-10
2 mpg disp -0.84755138 9.380328e-10
3 cyl disp 0.90203287 1.803002e-12
4 mpg hp -0.77616837 1.787835e-07
5 cyl hp 0.83244745 3.477861e-09
6 disp hp 0.79094859 7.142679e-08
7 mpg drat 0.68117191 1.776240e-05
8 cyl drat -0.69993811 8.244636e-06
9 disp drat -0.71021393 5.282022e-06
10 hp drat -0.44875912 9.988772e-03
11 mpg wt -0.86765938 1.293958e-10
12 cyl wt 0.78249579 1.217567e-07
13 disp wt 0.88797992 1.222311e-11
14 hp wt 0.65874789 4.145827e-05
15 drat wt -0.71244065 4.784260e-06
16 mpg qsec 0.41868403 1.708199e-02
17 cyl qsec -0.59124207 3.660533e-04
18 disp qsec -0.43369788 1.314404e-02
19 hp qsec -0.70822339 5.766253e-06
20 drat qsec 0.09120476 6.195826e-01
21 wt qsec -0.17471588 3.388683e-01
22 mpg vs 0.66403892 3.415937e-05
23 cyl vs -0.81081180 1.843018e-08
24 disp vs -0.71041589 5.235012e-06
25 hp vs -0.72309674 2.940896e-06
26 drat vs 0.44027846 1.167553e-02
27 wt vs -0.55491568 9.798492e-04
28 qsec vs 0.74453544 1.029669e-06
29 mpg am 0.59983243 2.850207e-04
30 cyl am -0.52260705 2.151207e-03
31 disp am -0.59122704 3.662114e-04
32 hp am -0.24320426 1.798309e-01
33 drat am 0.71271113 4.726790e-06
34 wt am -0.69249526 1.125440e-05
35 qsec am -0.22986086 2.056621e-01
36 vs am 0.16834512 3.570439e-01
37 mpg gear 0.48028476 5.400948e-03
38 cyl gear -0.49268660 4.173297e-03
39 disp gear -0.55556920 9.635921e-04
40 hp gear -0.12570426 4.930119e-01
41 drat gear 0.69961013 8.360110e-06
42 wt gear -0.58328700 4.586601e-04
43 qsec gear -0.21268223 2.425344e-01
44 vs gear 0.20602335 2.579439e-01
45 am gear 0.79405876 5.834043e-08
46 mpg carb -0.55092507 1.084446e-03
47 cyl carb 0.52698829 1.942340e-03
48 disp carb 0.39497686 2.526789e-02
49 hp carb 0.74981247 7.827810e-07
50 drat carb -0.09078980 6.211834e-01
51 wt carb 0.42760594 1.463861e-02
52 qsec carb -0.65624923 4.536949e-05
53 vs carb -0.56960714 6.670496e-04
54 am carb 0.05753435 7.544526e-01
55 gear carb 0.27407284 1.290291e-01
3. 可视化相关性分析
1. symnum() function
> cor_matr <- cor(mtcars)
> symnum(cor_matr)
m cy ds h dr w q v a g cr
mpg 1
cyl + 1
disp + * 1
hp , + , 1
drat , , , . 1
wt + , + , , 1
qsec . . . , 1
vs , + , , . . , 1
am . . . , , 1
gear . . . , . , 1
carb . . . , . , . 1
attr(,"legend")
[1] 0 ‘ ’ 0.3 ‘.’ 0.6 ‘,’ 0.8 ‘+’ 0.9 ‘*’ 0.95 ‘B’ 1
2. corrplot() function to plot a correlogram
library(corrplot)
matcar.cor <- cor(mtcars)
round(matcar.cor, 2)
class(matcar.cor)
corrplot(matcar.cor)
corrplot(matcar.cor, order = "AOE", method = "color",
addCoef.col = "gray")
corrplot.mixed(matcar.cor, order = "AOE")
corrplot(matcar.cor, method = "ellipse")
3. scatter plots
library(PerformanceAnalytics)
chart.Correlation(mtcars,histogram = TRUE,pch=19)
4. heatmap
matcar.cor <- cor(mtcars)
col<- colorRampPalette(c("blue", "white", "red"))(20)#调用颜色版自定义颜色
heatmap(x = matcar.cor, col = col, symm = TRUE)#symm表示是否对称
5.ggcorrplot
setwd("E:\\Rwork")
data("mtcars")
head(mtcars)
library(ggcorrplot)
#计算相关矩阵(cor()计算结果不提供p-value)
data("mtcars")
corr <- round(cor(mtcars), 2)
head(corr[, 1:6])
#用ggcorrplot包提供的函数cor_pmat()
p.mat <- cor_pmat(mtcars)
head(p.mat[, 1:4])
ggcorrplot(corr, hc.order = TRUE, type = "lower", p.mat = p.mat)