用excel中数据分析功能做线性回归练习
分别选取20、200、2000(或20000)组数据,进行练习。记录回归方程式、R2
20:
200:
2000:
用jupyter编程(不借助第三方库),用最小二乘法
import numpy as np
import matplotlib.pyplot as plt
%matplotlib inline
points = np.genfromtxt("weights_heights.csv",delimiter=",")
#将wh.csv文件中的数据赋值给points
#将points中的数据分别赋给x,y,求回归方程y=ax+b
x=points[1:21,2];
y=points[1:21,1];
#根据自己需要使用数据的个数更改[]中的值
pccs = np.corrcoef(x, y)
c,d=pccs
e,f=c
x_mean = np.mean(x)
y_mean = np.mean(y)
xsize = x.size
zi = (x * y).sum() - xsize * x_mean *y_mean
mu = (x ** 2).sum() - xsize * x_mean ** 2
a = zi / mu
b = y_mean - a * x_mean
a = np.around(a,decimals=2)
b = np.around(b,decimals=2)
print(f'回归线方程:y = {a}x + {b}')
print(f'相关系数为{f}')
#使用第三方库skleran画出拟合曲线
y1 = a*x + b
plt.scatter(x,y)
plt.plot(x,y1,c='r')
20:
200:
2000:
用jupyter编程,借助skleran
from sklearn import linear_model #表示,可以调用sklearn中的linear_model模块进行线性回归。
import matplotlib.pyplot as plt
import numpy as np
%matplotlib inline
data = np.genfromtxt("weights_heights.csv",delimiter=",")
data1=data[1:21]#根据所取数据更改值
x=[example[2] for example in data1]
y=[example[1] for example in data1]
pccs = np.corrcoef(x, y)
c,d=pccs
e,f=c
X = np.asarray(x).reshape(-1, 1)
Y = np.asarray(y).reshape(-1, 1)
model = linear_model.LinearRegression()
model.fit(X,Y)
b=model.intercept_[0] #截距
a=model.coef_[0]#线性模型的系数
a1=a[0]
print(f'回归线方程:y = {a1}x + {b}')
print(f'相关系数为{f}')
y1 = a1*X + b
plt.scatter(X,Y)
plt.plot(x,y1,c='r')
20:
200:
2000: