前列腺癌数据分析

import pandas as pd
df = pd.read_table(r'C:\Users\HP\Downloads\prostate.data',engine="python",encoding="utf-8")
df
Unnamed: 0 lcavol lweight age lbph svi lcp gleason pgg45 lpsa train
0 1 -0.579818 2.769459 50 -1.386294 0 -1.386294 6 0 -0.430783 T
1 2 -0.994252 3.319626 58 -1.386294 0 -1.386294 6 0 -0.162519 T
2 3 -0.510826 2.691243 74 -1.386294 0 -1.386294 7 20 -0.162519 T
3 4 -1.203973 3.282789 58 -1.386294 0 -1.386294 6 0 -0.162519 T
4 5 0.751416 3.432373 62 -1.386294 0 -1.386294 6 0 0.371564 T
5 6 -1.049822 3.228826 50 -1.386294 0 -1.386294 6 0 0.765468 T
6 7 0.737164 3.473518 64 0.615186 0 -1.386294 6 0 0.765468 F
7 8 0.693147 3.539509 58 1.536867 0 -1.386294 6 0 0.854415 T
8 9 -0.776529 3.539509 47 -1.386294 0 -1.386294 6 0 1.047319 F
9 10 0.223144 3.244544 63 -1.386294 0 -1.386294 6 0 1.047319 F
10 11 0.254642 3.604138 65 -1.386294 0 -1.386294 6 0 1.266948 T
11 12 -1.347074 3.598681 63 1.266948 0 -1.386294 6 0 1.266948 T
12 13 1.613430 3.022861 63 -1.386294 0 -0.597837 7 30 1.266948 T
13 14 1.477049 2.998229 67 -1.386294 0 -1.386294 7 5 1.348073 T
14 15 1.205971 3.442019 57 -1.386294 0 -0.430783 7 5 1.398717 F
15 16 1.541159 3.061052 66 -1.386294 0 -1.386294 6 0 1.446919 T
16 17 -0.415515 3.516013 70 1.244155 0 -0.597837 7 30 1.470176 T
17 18 2.288486 3.649359 66 -1.386294 0 0.371564 6 0 1.492904 T
18 19 -0.562119 3.267666 41 -1.386294 0 -1.386294 6 0 1.558145 T
19 20 0.182322 3.825375 70 1.658228 0 -1.386294 6 0 1.599388 T
20 21 1.147402 3.419365 59 -1.386294 0 -1.386294 6 0 1.638997 T
21 22 2.059239 3.501043 60 1.474763 0 1.348073 7 20 1.658228 F
22 23 -0.544727 3.375880 59 -0.798508 0 -1.386294 6 0 1.695616 T
23 24 1.781709 3.451574 63 0.438255 0 1.178655 7 60 1.713798 T
24 25 0.385262 3.667400 69 1.599388 0 -1.386294 6 0 1.731656 F
25 26 1.446919 3.124565 68 0.300105 0 -1.386294 6 0 1.766442 F
26 27 0.512824 3.719651 65 -1.386294 0 -0.798508 7 70 1.800058 T
27 28 -0.400478 3.865979 67 1.816452 0 -1.386294 7 20 1.816452 F
28 29 1.040277 3.128951 67 0.223144 0 0.048790 7 80 1.848455 T
29 30 2.409644 3.375880 65 -1.386294 0 1.619388 6 0 1.894617 T
... ... ... ... ... ... ... ... ... ... ... ...
67 68 2.198335 4.050915 72 2.307573 0 -0.430783 7 10 2.962692 T
68 69 -0.446287 4.408547 69 -1.386294 0 -1.386294 6 0 2.962692 T
69 70 1.193922 4.780383 72 2.326302 0 -0.798508 7 5 2.972975 T
70 71 1.864080 3.593194 60 -1.386294 1 1.321756 7 60 3.013081 T
71 72 1.160021 3.341093 77 1.749200 0 -1.386294 7 25 3.037354 T
72 73 1.214913 3.825375 69 -1.386294 1 0.223144 7 20 3.056357 F
73 74 1.838961 3.236716 60 0.438255 1 1.178655 9 90 3.075006 F
74 75 2.999226 3.849083 69 -1.386294 1 1.909542 7 20 3.275256 T
75 76 3.141130 3.263849 68 -0.051293 1 2.420368 7 50 3.337547 T
76 77 2.010895 4.433789 72 2.122262 0 0.500775 7 60 3.392829 T
77 78 2.537657 4.354784 78 2.326302 0 -1.386294 7 10 3.435599 T
78 79 2.648300 3.582129 69 -1.386294 1 2.583998 7 70 3.457893 T
79 80 2.779440 3.823192 63 -1.386294 0 0.371564 7 50 3.513037 F
80 81 1.467874 3.070376 66 0.559616 0 0.223144 7 40 3.516013 T
81 82 2.513656 3.473518 57 0.438255 0 2.327278 7 60 3.530763 T
82 83 2.613007 3.888754 77 -0.527633 1 0.559616 7 30 3.565298 T
83 84 2.677591 3.838376 65 1.115142 0 1.749200 9 70 3.570940 F
84 85 1.562346 3.709907 60 1.695616 0 0.810930 7 30 3.587677 T
85 86 3.302849 3.518980 64 -1.386294 1 2.327278 7 60 3.630986 T
86 87 2.024193 3.731699 58 1.638997 0 -1.386294 6 0 3.680091 T
87 88 1.731656 3.369018 62 -1.386294 1 0.300105 7 30 3.712352 T
88 89 2.807594 4.718052 65 -1.386294 1 2.463853 7 60 3.984344 T
89 90 1.562346 3.695110 76 0.936093 1 0.810930 7 75 3.993603 T
90 91 3.246491 4.101817 68 -1.386294 0 -1.386294 6 0 4.029806 T
91 92 2.532903 3.677566 61 1.348073 1 -1.386294 7 15 4.129551 T
92 93 2.830268 3.876396 68 -1.386294 1 1.321756 7 60 4.385147 T
93 94 3.821004 3.896909 44 -1.386294 1 2.169054 7 40 4.684443 T
94 95 2.907447 3.396185 52 -1.386294 1 2.463853 7 10 5.143124 F
95 96 2.882564 3.773910 68 1.558145 1 1.558145 7 80 5.477509 T
96 97 3.471966 3.974998 68 0.438255 1 2.904165 7 20 5.582932 F

97 rows × 11 columns

一共97行,11列

df.info()
<class 'pandas.core.frame.DataFrame'>
RangeIndex: 97 entries, 0 to 96
Data columns (total 11 columns):
Unnamed: 0    97 non-null int64
lcavol        97 non-null float64
lweight       97 non-null float64
age           97 non-null int64
lbph          97 non-null float64
svi           97 non-null int64
lcp           97 non-null float64
gleason       97 non-null int64
pgg45         97 non-null int64
lpsa          97 non-null float64
train         97 non-null object
dtypes: float64(5), int64(5), object(1)
memory usage: 8.4+ KB

在这个数据集中并没有缺失数据

boolmaping = {'T':'1','F':'0'}
df['trainbool'] = df['train'].map(boolmaping)
inplace = True
df
Unnamed: 0 lcavol lweight age lbph svi lcp gleason pgg45 lpsa train trainbool
0 1 -0.579818 2.769459 50 -1.386294 0 -1.386294 6 0 -0.430783 T 1
1 2 -0.994252 3.319626 58 -1.386294 0 -1.386294 6 0 -0.162519 T 1
2 3 -0.510826 2.691243 74 -1.386294 0 -1.386294 7 20 -0.162519 T 1
3 4 -1.203973 3.282789 58 -1.386294 0 -1.386294 6 0 -0.162519 T 1
4 5 0.751416 3.432373 62 -1.386294 0 -1.386294 6 0 0.371564 T 1
5 6 -1.049822 3.228826 50 -1.386294 0 -1.386294 6 0 0.765468 T 1
6 7 0.737164 3.473518 64 0.615186 0 -1.386294 6 0 0.765468 F 0
7 8 0.693147 3.539509 58 1.536867 0 -1.386294 6 0 0.854415 T 1
8 9 -0.776529 3.539509 47 -1.386294 0 -1.386294 6 0 1.047319 F 0
9 10 0.223144 3.244544 63 -1.386294 0 -1.386294 6 0 1.047319 F 0
10 11 0.254642 3.604138 65 -1.386294 0 -1.386294 6 0 1.266948 T 1
11 12 -1.347074 3.598681 63 1.266948 0 -1.386294 6 0 1.266948 T 1
12 13 1.613430 3.022861 63 -1.386294 0 -0.597837 7 30 1.266948 T 1
13 14 1.477049 2.998229 67 -1.386294 0 -1.386294 7 5 1.348073 T 1
14 15 1.205971 3.442019 57 -1.386294 0 -0.430783 7 5 1.398717 F 0
15 16 1.541159 3.061052 66 -1.386294 0 -1.386294 6 0 1.446919 T 1
16 17 -0.415515 3.516013 70 1.244155 0 -0.597837 7 30 1.470176 T 1
17 18 2.288486 3.649359 66 -1.386294 0 0.371564 6 0 1.492904 T 1
18 19 -0.562119 3.267666 41 -1.386294 0 -1.386294 6 0 1.558145 T 1
19 20 0.182322 3.825375 70 1.658228 0 -1.386294 6 0 1.599388 T 1
20 21 1.147402 3.419365 59 -1.386294 0 -1.386294 6 0 1.638997 T 1
21 22 2.059239 3.501043 60 1.474763 0 1.348073 7 20 1.658228 F 0
22 23 -0.544727 3.375880 59 -0.798508 0 -1.386294 6 0 1.695616 T 1
23 24 1.781709 3.451574 63 0.438255 0 1.178655 7 60 1.713798 T 1
24 25 0.385262 3.667400 69 1.599388 0 -1.386294 6 0 1.731656 F 0
25 26 1.446919 3.124565 68 0.300105 0 -1.386294 6 0 1.766442 F 0
26 27 0.512824 3.719651 65 -1.386294 0 -0.798508 7 70 1.800058 T 1
27 28 -0.400478 3.865979 67 1.816452 0 -1.386294 7 20 1.816452 F 0
28 29 1.040277 3.128951 67 0.223144 0 0.048790 7 80 1.848455 T 1
29 30 2.409644 3.375880 65 -1.386294 0 1.619388 6 0 1.894617 T 1
... ... ... ... ... ... ... ... ... ... ... ... ...
67 68 2.198335 4.050915 72 2.307573 0 -0.430783 7 10 2.962692 T 1
68 69 -0.446287 4.408547 69 -1.386294 0 -1.386294 6 0 2.962692 T 1
69 70 1.193922 4.780383 72 2.326302 0 -0.798508 7 5 2.972975 T 1
70 71 1.864080 3.593194 60 -1.386294 1 1.321756 7 60 3.013081 T 1
71 72 1.160021 3.341093 77 1.749200 0 -1.386294 7 25 3.037354 T 1
72 73 1.214913 3.825375 69 -1.386294 1 0.223144 7 20 3.056357 F 0
73 74 1.838961 3.236716 60 0.438255 1 1.178655 9 90 3.075006 F 0
74 75 2.999226 3.849083 69 -1.386294 1 1.909542 7 20 3.275256 T 1
75 76 3.141130 3.263849 68 -0.051293 1 2.420368 7 50 3.337547 T 1
76 77 2.010895 4.433789 72 2.122262 0 0.500775 7 60 3.392829 T 1
77 78 2.537657 4.354784 78 2.326302 0 -1.386294 7 10 3.435599 T 1
78 79 2.648300 3.582129 69 -1.386294 1 2.583998 7 70 3.457893 T 1
79 80 2.779440 3.823192 63 -1.386294 0 0.371564 7 50 3.513037 F 0
80 81 1.467874 3.070376 66 0.559616 0 0.223144 7 40 3.516013 T 1
81 82 2.513656 3.473518 57 0.438255 0 2.327278 7 60 3.530763 T 1
82 83 2.613007 3.888754 77 -0.527633 1 0.559616 7 30 3.565298 T 1
83 84 2.677591 3.838376 65 1.115142 0 1.749200 9 70 3.570940 F 0
84 85 1.562346 3.709907 60 1.695616 0 0.810930 7 30 3.587677 T 1
85 86 3.302849 3.518980 64 -1.386294 1 2.327278 7 60 3.630986 T 1
86 87 2.024193 3.731699 58 1.638997 0 -1.386294 6 0 3.680091 T 1
87 88 1.731656 3.369018 62 -1.386294 1 0.300105 7 30 3.712352 T 1
88 89 2.807594 4.718052 65 -1.386294 1 2.463853 7 60 3.984344 T 1
89 90 1.562346 3.695110 76 0.936093 1 0.810930 7 75 3.993603 T 1
90 91 3.246491 4.101817 68 -1.386294 0 -1.386294 6 0 4.029806 T 1
91 92 2.532903 3.677566 61 1.348073 1 -1.386294 7 15 4.129551 T 1
92 93 2.830268 3.876396 68 -1.386294 1 1.321756 7 60 4.385147 T 1
93 94 3.821004 3.896909 44 -1.386294 1 2.169054 7 40 4.684443 T 1
94 95 2.907447 3.396185 52 -1.386294 1 2.463853 7 10 5.143124 F 0
95 96 2.882564 3.773910 68 1.558145 1 1.558145 7 80 5.477509 T 1
96 97 3.471966 3.974998 68 0.438255 1 2.904165 7 20 5.582932 F 0

97 rows × 12 columns

trainbool是将T和F转化成bool值

df.head()
Unnamed: 0 lcavol lweight age lbph svi lcp gleason pgg45 lpsa train trainbool
0 1 -0.579818 2.769459 50 -1.386294 0 -1.386294 6 0 -0.430783 T 1
1 2 -0.994252 3.319626 58 -1.386294 0 -1.386294 6 0 -0.162519 T 1
2 3 -0.510826 2.691243 74 -1.386294 0 -1.386294 7 20 -0.162519 T 1
3 4 -1.203973 3.282789 58 -1.386294 0 -1.386294 6 0 -0.162519 T 1
4 5 0.751416 3.432373 62 -1.386294 0 -1.386294 6 0 0.371564 T 1
import matplotlib.pyplot as plt
import seaborn as sns
from sklearn import preprocessing
fig = plt.figure(figsize=(20,12))
corr = df.corr()
sns.heatmap(corr,annot=True)
<matplotlib.axes._subplots.AxesSubplot at 0x20d4bddc208>

前列腺癌数据分析


上一篇:如何只返回实体类中的部分字段


下一篇:js 三元运算符以及|| 和 && 测试