import pandas as pd
df = pd.read_table(r'C:\Users\HP\Downloads\prostate.data',engine="python",encoding="utf-8")
df
Unnamed: 0 | lcavol | lweight | age | lbph | svi | lcp | gleason | pgg45 | lpsa | train | |
---|---|---|---|---|---|---|---|---|---|---|---|
0 | 1 | -0.579818 | 2.769459 | 50 | -1.386294 | 0 | -1.386294 | 6 | 0 | -0.430783 | T |
1 | 2 | -0.994252 | 3.319626 | 58 | -1.386294 | 0 | -1.386294 | 6 | 0 | -0.162519 | T |
2 | 3 | -0.510826 | 2.691243 | 74 | -1.386294 | 0 | -1.386294 | 7 | 20 | -0.162519 | T |
3 | 4 | -1.203973 | 3.282789 | 58 | -1.386294 | 0 | -1.386294 | 6 | 0 | -0.162519 | T |
4 | 5 | 0.751416 | 3.432373 | 62 | -1.386294 | 0 | -1.386294 | 6 | 0 | 0.371564 | T |
5 | 6 | -1.049822 | 3.228826 | 50 | -1.386294 | 0 | -1.386294 | 6 | 0 | 0.765468 | T |
6 | 7 | 0.737164 | 3.473518 | 64 | 0.615186 | 0 | -1.386294 | 6 | 0 | 0.765468 | F |
7 | 8 | 0.693147 | 3.539509 | 58 | 1.536867 | 0 | -1.386294 | 6 | 0 | 0.854415 | T |
8 | 9 | -0.776529 | 3.539509 | 47 | -1.386294 | 0 | -1.386294 | 6 | 0 | 1.047319 | F |
9 | 10 | 0.223144 | 3.244544 | 63 | -1.386294 | 0 | -1.386294 | 6 | 0 | 1.047319 | F |
10 | 11 | 0.254642 | 3.604138 | 65 | -1.386294 | 0 | -1.386294 | 6 | 0 | 1.266948 | T |
11 | 12 | -1.347074 | 3.598681 | 63 | 1.266948 | 0 | -1.386294 | 6 | 0 | 1.266948 | T |
12 | 13 | 1.613430 | 3.022861 | 63 | -1.386294 | 0 | -0.597837 | 7 | 30 | 1.266948 | T |
13 | 14 | 1.477049 | 2.998229 | 67 | -1.386294 | 0 | -1.386294 | 7 | 5 | 1.348073 | T |
14 | 15 | 1.205971 | 3.442019 | 57 | -1.386294 | 0 | -0.430783 | 7 | 5 | 1.398717 | F |
15 | 16 | 1.541159 | 3.061052 | 66 | -1.386294 | 0 | -1.386294 | 6 | 0 | 1.446919 | T |
16 | 17 | -0.415515 | 3.516013 | 70 | 1.244155 | 0 | -0.597837 | 7 | 30 | 1.470176 | T |
17 | 18 | 2.288486 | 3.649359 | 66 | -1.386294 | 0 | 0.371564 | 6 | 0 | 1.492904 | T |
18 | 19 | -0.562119 | 3.267666 | 41 | -1.386294 | 0 | -1.386294 | 6 | 0 | 1.558145 | T |
19 | 20 | 0.182322 | 3.825375 | 70 | 1.658228 | 0 | -1.386294 | 6 | 0 | 1.599388 | T |
20 | 21 | 1.147402 | 3.419365 | 59 | -1.386294 | 0 | -1.386294 | 6 | 0 | 1.638997 | T |
21 | 22 | 2.059239 | 3.501043 | 60 | 1.474763 | 0 | 1.348073 | 7 | 20 | 1.658228 | F |
22 | 23 | -0.544727 | 3.375880 | 59 | -0.798508 | 0 | -1.386294 | 6 | 0 | 1.695616 | T |
23 | 24 | 1.781709 | 3.451574 | 63 | 0.438255 | 0 | 1.178655 | 7 | 60 | 1.713798 | T |
24 | 25 | 0.385262 | 3.667400 | 69 | 1.599388 | 0 | -1.386294 | 6 | 0 | 1.731656 | F |
25 | 26 | 1.446919 | 3.124565 | 68 | 0.300105 | 0 | -1.386294 | 6 | 0 | 1.766442 | F |
26 | 27 | 0.512824 | 3.719651 | 65 | -1.386294 | 0 | -0.798508 | 7 | 70 | 1.800058 | T |
27 | 28 | -0.400478 | 3.865979 | 67 | 1.816452 | 0 | -1.386294 | 7 | 20 | 1.816452 | F |
28 | 29 | 1.040277 | 3.128951 | 67 | 0.223144 | 0 | 0.048790 | 7 | 80 | 1.848455 | T |
29 | 30 | 2.409644 | 3.375880 | 65 | -1.386294 | 0 | 1.619388 | 6 | 0 | 1.894617 | T |
... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... |
67 | 68 | 2.198335 | 4.050915 | 72 | 2.307573 | 0 | -0.430783 | 7 | 10 | 2.962692 | T |
68 | 69 | -0.446287 | 4.408547 | 69 | -1.386294 | 0 | -1.386294 | 6 | 0 | 2.962692 | T |
69 | 70 | 1.193922 | 4.780383 | 72 | 2.326302 | 0 | -0.798508 | 7 | 5 | 2.972975 | T |
70 | 71 | 1.864080 | 3.593194 | 60 | -1.386294 | 1 | 1.321756 | 7 | 60 | 3.013081 | T |
71 | 72 | 1.160021 | 3.341093 | 77 | 1.749200 | 0 | -1.386294 | 7 | 25 | 3.037354 | T |
72 | 73 | 1.214913 | 3.825375 | 69 | -1.386294 | 1 | 0.223144 | 7 | 20 | 3.056357 | F |
73 | 74 | 1.838961 | 3.236716 | 60 | 0.438255 | 1 | 1.178655 | 9 | 90 | 3.075006 | F |
74 | 75 | 2.999226 | 3.849083 | 69 | -1.386294 | 1 | 1.909542 | 7 | 20 | 3.275256 | T |
75 | 76 | 3.141130 | 3.263849 | 68 | -0.051293 | 1 | 2.420368 | 7 | 50 | 3.337547 | T |
76 | 77 | 2.010895 | 4.433789 | 72 | 2.122262 | 0 | 0.500775 | 7 | 60 | 3.392829 | T |
77 | 78 | 2.537657 | 4.354784 | 78 | 2.326302 | 0 | -1.386294 | 7 | 10 | 3.435599 | T |
78 | 79 | 2.648300 | 3.582129 | 69 | -1.386294 | 1 | 2.583998 | 7 | 70 | 3.457893 | T |
79 | 80 | 2.779440 | 3.823192 | 63 | -1.386294 | 0 | 0.371564 | 7 | 50 | 3.513037 | F |
80 | 81 | 1.467874 | 3.070376 | 66 | 0.559616 | 0 | 0.223144 | 7 | 40 | 3.516013 | T |
81 | 82 | 2.513656 | 3.473518 | 57 | 0.438255 | 0 | 2.327278 | 7 | 60 | 3.530763 | T |
82 | 83 | 2.613007 | 3.888754 | 77 | -0.527633 | 1 | 0.559616 | 7 | 30 | 3.565298 | T |
83 | 84 | 2.677591 | 3.838376 | 65 | 1.115142 | 0 | 1.749200 | 9 | 70 | 3.570940 | F |
84 | 85 | 1.562346 | 3.709907 | 60 | 1.695616 | 0 | 0.810930 | 7 | 30 | 3.587677 | T |
85 | 86 | 3.302849 | 3.518980 | 64 | -1.386294 | 1 | 2.327278 | 7 | 60 | 3.630986 | T |
86 | 87 | 2.024193 | 3.731699 | 58 | 1.638997 | 0 | -1.386294 | 6 | 0 | 3.680091 | T |
87 | 88 | 1.731656 | 3.369018 | 62 | -1.386294 | 1 | 0.300105 | 7 | 30 | 3.712352 | T |
88 | 89 | 2.807594 | 4.718052 | 65 | -1.386294 | 1 | 2.463853 | 7 | 60 | 3.984344 | T |
89 | 90 | 1.562346 | 3.695110 | 76 | 0.936093 | 1 | 0.810930 | 7 | 75 | 3.993603 | T |
90 | 91 | 3.246491 | 4.101817 | 68 | -1.386294 | 0 | -1.386294 | 6 | 0 | 4.029806 | T |
91 | 92 | 2.532903 | 3.677566 | 61 | 1.348073 | 1 | -1.386294 | 7 | 15 | 4.129551 | T |
92 | 93 | 2.830268 | 3.876396 | 68 | -1.386294 | 1 | 1.321756 | 7 | 60 | 4.385147 | T |
93 | 94 | 3.821004 | 3.896909 | 44 | -1.386294 | 1 | 2.169054 | 7 | 40 | 4.684443 | T |
94 | 95 | 2.907447 | 3.396185 | 52 | -1.386294 | 1 | 2.463853 | 7 | 10 | 5.143124 | F |
95 | 96 | 2.882564 | 3.773910 | 68 | 1.558145 | 1 | 1.558145 | 7 | 80 | 5.477509 | T |
96 | 97 | 3.471966 | 3.974998 | 68 | 0.438255 | 1 | 2.904165 | 7 | 20 | 5.582932 | F |
97 rows × 11 columns
一共97行,11列
df.info()
<class 'pandas.core.frame.DataFrame'>
RangeIndex: 97 entries, 0 to 96
Data columns (total 11 columns):
Unnamed: 0 97 non-null int64
lcavol 97 non-null float64
lweight 97 non-null float64
age 97 non-null int64
lbph 97 non-null float64
svi 97 non-null int64
lcp 97 non-null float64
gleason 97 non-null int64
pgg45 97 non-null int64
lpsa 97 non-null float64
train 97 non-null object
dtypes: float64(5), int64(5), object(1)
memory usage: 8.4+ KB
在这个数据集中并没有缺失数据
boolmaping = {'T':'1','F':'0'}
df['trainbool'] = df['train'].map(boolmaping)
inplace = True
df
Unnamed: 0 | lcavol | lweight | age | lbph | svi | lcp | gleason | pgg45 | lpsa | train | trainbool | |
---|---|---|---|---|---|---|---|---|---|---|---|---|
0 | 1 | -0.579818 | 2.769459 | 50 | -1.386294 | 0 | -1.386294 | 6 | 0 | -0.430783 | T | 1 |
1 | 2 | -0.994252 | 3.319626 | 58 | -1.386294 | 0 | -1.386294 | 6 | 0 | -0.162519 | T | 1 |
2 | 3 | -0.510826 | 2.691243 | 74 | -1.386294 | 0 | -1.386294 | 7 | 20 | -0.162519 | T | 1 |
3 | 4 | -1.203973 | 3.282789 | 58 | -1.386294 | 0 | -1.386294 | 6 | 0 | -0.162519 | T | 1 |
4 | 5 | 0.751416 | 3.432373 | 62 | -1.386294 | 0 | -1.386294 | 6 | 0 | 0.371564 | T | 1 |
5 | 6 | -1.049822 | 3.228826 | 50 | -1.386294 | 0 | -1.386294 | 6 | 0 | 0.765468 | T | 1 |
6 | 7 | 0.737164 | 3.473518 | 64 | 0.615186 | 0 | -1.386294 | 6 | 0 | 0.765468 | F | 0 |
7 | 8 | 0.693147 | 3.539509 | 58 | 1.536867 | 0 | -1.386294 | 6 | 0 | 0.854415 | T | 1 |
8 | 9 | -0.776529 | 3.539509 | 47 | -1.386294 | 0 | -1.386294 | 6 | 0 | 1.047319 | F | 0 |
9 | 10 | 0.223144 | 3.244544 | 63 | -1.386294 | 0 | -1.386294 | 6 | 0 | 1.047319 | F | 0 |
10 | 11 | 0.254642 | 3.604138 | 65 | -1.386294 | 0 | -1.386294 | 6 | 0 | 1.266948 | T | 1 |
11 | 12 | -1.347074 | 3.598681 | 63 | 1.266948 | 0 | -1.386294 | 6 | 0 | 1.266948 | T | 1 |
12 | 13 | 1.613430 | 3.022861 | 63 | -1.386294 | 0 | -0.597837 | 7 | 30 | 1.266948 | T | 1 |
13 | 14 | 1.477049 | 2.998229 | 67 | -1.386294 | 0 | -1.386294 | 7 | 5 | 1.348073 | T | 1 |
14 | 15 | 1.205971 | 3.442019 | 57 | -1.386294 | 0 | -0.430783 | 7 | 5 | 1.398717 | F | 0 |
15 | 16 | 1.541159 | 3.061052 | 66 | -1.386294 | 0 | -1.386294 | 6 | 0 | 1.446919 | T | 1 |
16 | 17 | -0.415515 | 3.516013 | 70 | 1.244155 | 0 | -0.597837 | 7 | 30 | 1.470176 | T | 1 |
17 | 18 | 2.288486 | 3.649359 | 66 | -1.386294 | 0 | 0.371564 | 6 | 0 | 1.492904 | T | 1 |
18 | 19 | -0.562119 | 3.267666 | 41 | -1.386294 | 0 | -1.386294 | 6 | 0 | 1.558145 | T | 1 |
19 | 20 | 0.182322 | 3.825375 | 70 | 1.658228 | 0 | -1.386294 | 6 | 0 | 1.599388 | T | 1 |
20 | 21 | 1.147402 | 3.419365 | 59 | -1.386294 | 0 | -1.386294 | 6 | 0 | 1.638997 | T | 1 |
21 | 22 | 2.059239 | 3.501043 | 60 | 1.474763 | 0 | 1.348073 | 7 | 20 | 1.658228 | F | 0 |
22 | 23 | -0.544727 | 3.375880 | 59 | -0.798508 | 0 | -1.386294 | 6 | 0 | 1.695616 | T | 1 |
23 | 24 | 1.781709 | 3.451574 | 63 | 0.438255 | 0 | 1.178655 | 7 | 60 | 1.713798 | T | 1 |
24 | 25 | 0.385262 | 3.667400 | 69 | 1.599388 | 0 | -1.386294 | 6 | 0 | 1.731656 | F | 0 |
25 | 26 | 1.446919 | 3.124565 | 68 | 0.300105 | 0 | -1.386294 | 6 | 0 | 1.766442 | F | 0 |
26 | 27 | 0.512824 | 3.719651 | 65 | -1.386294 | 0 | -0.798508 | 7 | 70 | 1.800058 | T | 1 |
27 | 28 | -0.400478 | 3.865979 | 67 | 1.816452 | 0 | -1.386294 | 7 | 20 | 1.816452 | F | 0 |
28 | 29 | 1.040277 | 3.128951 | 67 | 0.223144 | 0 | 0.048790 | 7 | 80 | 1.848455 | T | 1 |
29 | 30 | 2.409644 | 3.375880 | 65 | -1.386294 | 0 | 1.619388 | 6 | 0 | 1.894617 | T | 1 |
... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... |
67 | 68 | 2.198335 | 4.050915 | 72 | 2.307573 | 0 | -0.430783 | 7 | 10 | 2.962692 | T | 1 |
68 | 69 | -0.446287 | 4.408547 | 69 | -1.386294 | 0 | -1.386294 | 6 | 0 | 2.962692 | T | 1 |
69 | 70 | 1.193922 | 4.780383 | 72 | 2.326302 | 0 | -0.798508 | 7 | 5 | 2.972975 | T | 1 |
70 | 71 | 1.864080 | 3.593194 | 60 | -1.386294 | 1 | 1.321756 | 7 | 60 | 3.013081 | T | 1 |
71 | 72 | 1.160021 | 3.341093 | 77 | 1.749200 | 0 | -1.386294 | 7 | 25 | 3.037354 | T | 1 |
72 | 73 | 1.214913 | 3.825375 | 69 | -1.386294 | 1 | 0.223144 | 7 | 20 | 3.056357 | F | 0 |
73 | 74 | 1.838961 | 3.236716 | 60 | 0.438255 | 1 | 1.178655 | 9 | 90 | 3.075006 | F | 0 |
74 | 75 | 2.999226 | 3.849083 | 69 | -1.386294 | 1 | 1.909542 | 7 | 20 | 3.275256 | T | 1 |
75 | 76 | 3.141130 | 3.263849 | 68 | -0.051293 | 1 | 2.420368 | 7 | 50 | 3.337547 | T | 1 |
76 | 77 | 2.010895 | 4.433789 | 72 | 2.122262 | 0 | 0.500775 | 7 | 60 | 3.392829 | T | 1 |
77 | 78 | 2.537657 | 4.354784 | 78 | 2.326302 | 0 | -1.386294 | 7 | 10 | 3.435599 | T | 1 |
78 | 79 | 2.648300 | 3.582129 | 69 | -1.386294 | 1 | 2.583998 | 7 | 70 | 3.457893 | T | 1 |
79 | 80 | 2.779440 | 3.823192 | 63 | -1.386294 | 0 | 0.371564 | 7 | 50 | 3.513037 | F | 0 |
80 | 81 | 1.467874 | 3.070376 | 66 | 0.559616 | 0 | 0.223144 | 7 | 40 | 3.516013 | T | 1 |
81 | 82 | 2.513656 | 3.473518 | 57 | 0.438255 | 0 | 2.327278 | 7 | 60 | 3.530763 | T | 1 |
82 | 83 | 2.613007 | 3.888754 | 77 | -0.527633 | 1 | 0.559616 | 7 | 30 | 3.565298 | T | 1 |
83 | 84 | 2.677591 | 3.838376 | 65 | 1.115142 | 0 | 1.749200 | 9 | 70 | 3.570940 | F | 0 |
84 | 85 | 1.562346 | 3.709907 | 60 | 1.695616 | 0 | 0.810930 | 7 | 30 | 3.587677 | T | 1 |
85 | 86 | 3.302849 | 3.518980 | 64 | -1.386294 | 1 | 2.327278 | 7 | 60 | 3.630986 | T | 1 |
86 | 87 | 2.024193 | 3.731699 | 58 | 1.638997 | 0 | -1.386294 | 6 | 0 | 3.680091 | T | 1 |
87 | 88 | 1.731656 | 3.369018 | 62 | -1.386294 | 1 | 0.300105 | 7 | 30 | 3.712352 | T | 1 |
88 | 89 | 2.807594 | 4.718052 | 65 | -1.386294 | 1 | 2.463853 | 7 | 60 | 3.984344 | T | 1 |
89 | 90 | 1.562346 | 3.695110 | 76 | 0.936093 | 1 | 0.810930 | 7 | 75 | 3.993603 | T | 1 |
90 | 91 | 3.246491 | 4.101817 | 68 | -1.386294 | 0 | -1.386294 | 6 | 0 | 4.029806 | T | 1 |
91 | 92 | 2.532903 | 3.677566 | 61 | 1.348073 | 1 | -1.386294 | 7 | 15 | 4.129551 | T | 1 |
92 | 93 | 2.830268 | 3.876396 | 68 | -1.386294 | 1 | 1.321756 | 7 | 60 | 4.385147 | T | 1 |
93 | 94 | 3.821004 | 3.896909 | 44 | -1.386294 | 1 | 2.169054 | 7 | 40 | 4.684443 | T | 1 |
94 | 95 | 2.907447 | 3.396185 | 52 | -1.386294 | 1 | 2.463853 | 7 | 10 | 5.143124 | F | 0 |
95 | 96 | 2.882564 | 3.773910 | 68 | 1.558145 | 1 | 1.558145 | 7 | 80 | 5.477509 | T | 1 |
96 | 97 | 3.471966 | 3.974998 | 68 | 0.438255 | 1 | 2.904165 | 7 | 20 | 5.582932 | F | 0 |
97 rows × 12 columns
trainbool是将T和F转化成bool值
df.head()
Unnamed: 0 | lcavol | lweight | age | lbph | svi | lcp | gleason | pgg45 | lpsa | train | trainbool | |
---|---|---|---|---|---|---|---|---|---|---|---|---|
0 | 1 | -0.579818 | 2.769459 | 50 | -1.386294 | 0 | -1.386294 | 6 | 0 | -0.430783 | T | 1 |
1 | 2 | -0.994252 | 3.319626 | 58 | -1.386294 | 0 | -1.386294 | 6 | 0 | -0.162519 | T | 1 |
2 | 3 | -0.510826 | 2.691243 | 74 | -1.386294 | 0 | -1.386294 | 7 | 20 | -0.162519 | T | 1 |
3 | 4 | -1.203973 | 3.282789 | 58 | -1.386294 | 0 | -1.386294 | 6 | 0 | -0.162519 | T | 1 |
4 | 5 | 0.751416 | 3.432373 | 62 | -1.386294 | 0 | -1.386294 | 6 | 0 | 0.371564 | T | 1 |
import matplotlib.pyplot as plt
import seaborn as sns
from sklearn import preprocessing
fig = plt.figure(figsize=(20,12))
corr = df.corr()
sns.heatmap(corr,annot=True)
<matplotlib.axes._subplots.AxesSubplot at 0x20d4bddc208>