设计思路
输出结果
train_boston_data.shape (1460, 81)
Id MSSubClass MSZoning ... SaleType SaleCondition SalePrice
0 1 60 RL ... WD Normal 208500
1 2 20 RL ... WD Normal 181500
2 3 60 RL ... WD Normal 223500
3 4 70 RL ... WD Abnorml 140000
4 5 60 RL ... WD Normal 250000
[5 rows x 81 columns]
train_t.head() LotFrontage GarageArea SalePrice
0 65.0 548 208500
1 80.0 460 181500
2 68.0 608 223500
3 60.0 642 140000
4 84.0 836 250000
after scale,train_t.head() LotFrontage GarageArea SalePrice
0 0.207668 0.386460 0.276159
1 0.255591 0.324401 0.240397
2 0.217252 0.428773 0.296026
3 0.191693 0.452750 0.185430
4 0.268371 0.589563 0.331126
LotFrontage GarageArea
0 0.207668 0.386460
1 0.255591 0.324401
2 0.217252 0.428773
3 0.191693 0.452750
4 0.268371 0.589563
Id MSSubClass LotFrontage ... MoSold YrSold SalePrice
Id 1.000000 0.011156 -0.010601 ... 0.021172 0.000712 -0.021917
MSSubClass 0.011156 1.000000 -0.386347 ... -0.013585 -0.021407 -0.084284
LotFrontage -0.010601 -0.386347 1.000000 ... 0.011200 0.007450 0.351799
LotArea -0.033226 -0.139781 0.426095 ... 0.001205 -0.014261 0.263843
OverallQual -0.028365 0.032628 0.251646 ... 0.070815 -0.027347 0.790982
OverallCond 0.012609 -0.059316 -0.059213 ... -0.003511 0.043950 -0.077856
YearBuilt -0.012713 0.027850 0.123349 ... 0.012398 -0.013618 0.522897
YearRemodAdd -0.021998 0.040581 0.088866 ... 0.021490 0.035743 0.507101
MasVnrArea -0.050298 0.022936 0.193458 ... -0.005965 -0.008201 0.477493
BsmtFinSF1 -0.005024 -0.069836 0.233633 ... -0.015727 0.014359 0.386420
BsmtFinSF2 -0.005968 -0.065649 0.049900 ... -0.015211 0.031706 -0.011378
BsmtUnfSF -0.007940 -0.140759 0.132644 ... 0.034888 -0.041258 0.214479
TotalBsmtSF -0.015415 -0.238518 0.392075 ... 0.013196 -0.014969 0.613581
1stFlrSF 0.010496 -0.251758 0.457181 ... 0.031372 -0.013604 0.605852
2ndFlrSF 0.005590 0.307886 0.080177 ... 0.035164 -0.028700 0.319334
LowQualFinSF -0.044230 0.046474 0.038469 ... -0.022174 -0.028921 -0.025606
GrLivArea 0.008273 0.074853 0.402797 ... 0.050240 -0.036526 0.708624
BsmtFullBath 0.002289 0.003491 0.100949 ... -0.025361 0.067049 0.227122
BsmtHalfBath -0.020155 -0.002333 -0.007234 ... 0.032873 -0.046524 -0.016844
FullBath 0.005587 0.131608 0.198769 ... 0.055872 -0.019669 0.560664
HalfBath 0.006784 0.177354 0.053532 ... -0.009050 -0.010269 0.284108
BedroomAbvGr 0.037719 -0.023438 0.263170 ... 0.046544 -0.036014 0.168213
KitchenAbvGr 0.002951 0.281721 -0.006069 ... 0.026589 0.031687 -0.135907
TotRmsAbvGrd 0.027239 0.040380 0.352096 ... 0.036907 -0.034516 0.533723
Fireplaces -0.019772 -0.045569 0.266639 ... 0.046357 -0.024096 0.466929
GarageYrBlt 0.000072 0.085072 0.070250 ... 0.005337 -0.001014 0.486362
GarageCars 0.016570 -0.040110 0.285691 ... 0.040522 -0.039117 0.640409
GarageArea 0.017634 -0.098672 0.344997 ... 0.027974 -0.027378 0.623431
WoodDeckSF -0.029643 -0.012579 0.088521 ... 0.021011 0.022270 0.324413
OpenPorchSF -0.000477 -0.006100 0.151972 ... 0.071255 -0.057619 0.315856
EnclosedPorch 0.002889 -0.012037 0.010700 ... -0.028887 -0.009916 -0.128578
3SsnPorch -0.046635 -0.043825 0.070029 ... 0.029474 0.018645 0.044584
ScreenPorch 0.001330 -0.026030 0.041383 ... 0.023217 0.010694 0.111447
PoolArea 0.057044 0.008283 0.206167 ... -0.033737 -0.059689 0.092404
MiscVal -0.006242 -0.007683 0.003368 ... -0.006495 0.004906 -0.021190
MoSold 0.021172 -0.013585 0.011200 ... 1.000000 -0.145721 0.046432
YrSold 0.000712 -0.021407 0.007450 ... -0.145721 1.000000 -0.028923
SalePrice -0.021917 -0.084284 0.351799 ... 0.046432 -0.028923 1.000000
[38 rows x 38 columns]
coef and intercept: [0.21627565 0.41024884] 0.0543428481373919
cost after log: -3.850369422061899 -4.52343070892457
best w1 and w2 after GD: 0.10003438525600654 0.30004957896248946
核心代码
LiR = linear_model.LinearRegression()
LiR.fit(X_train, y_train)
print('coef and intercept: ',LiR.coef_,LiR.intercept_)
def CalCostByW(train_df,slope):
w1_lists=[];w2_lists=[];cost_lists=[]
for i in range (30):
for j in range(30):
w1= slope*i+0.1
w2= slope*j+0.3
w1_lists.append(w1); w2_lists.append(w2)
cost_lists.append(cost(train_df,train_df.LotFrontage,train_df.GarageArea,w1,w2))
# print (cost(train_df))
return w1_lists,w2_lists,cost_lists
w1_lists,w2_lists,cost_lists=CalCostByW(train_t,0.01)