多次线性回归尝试通过将一个线性方程拟合到观察数据,来表示两个或多个特征与响应之间的关系;
# modified of code from 100-Days-of-ML-Code
# Day3_Multiple_Linear_Regression
# imporint libraries
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
np.set_printoptions(edgeitems=50, linewidth=500)
# importing dataset
dataset = pd.read_csv('50_Startups.csv')
X = dataset.iloc[:, :-1].values
Y = dataset.iloc[:, 4].values
# encoding categorical data
from sklearn.preprocessing import LabelEncoder, OneHotEncoder
labelencoder = LabelEncoder()
X[:, 3] = labelencoder.fit_transform(X[:, 3])
print('X after label encoder \n ', X)
onehotencoder = OneHotEncoder()
X = onehotencoder.fit_transform(X).toarray()
# avoiding dummy variable trap
print('X after onehot:\n ', X)
X = X[:, 1:]
print('X after avoiding dummy trap \n', X)
# splitting the dataset into the training sets and test sets
from sklearn.model_selection import train_test_split
X_train, X_test, Y_train, Y_test = train_test_split(X, Y, test_size=0.2, random_state=0)
# Step 2: fitting multiple linear regression to the training sets
from sklearn.linear_model import LinearRegression
regressor = LinearRegression()
regressor.fit(X_train, Y_train)
# Step 3: predicting the test sets results
Y_pred = regressor.predict(X_test)
print('Y_pred \n', Y_pred)
曼车斯基 发布了12 篇原创文章 · 获赞 5 · 访问量 7456 私信 关注