完整理论和公式推导详见:
https://www.jianshu.com/p/61614bffc2c6-
理论
代码实现
import sklearn.datasets as datasets
import numpy as np
from sklearn.linear_model import LinearRegression
from sklearn.model_selection import train_test_split
from sklearn.metrics import r2_score
data = datasets.load_boston()
X = data['data']
y = data['target']
X_train,X_test,y_train,y_test = train_test_split(X,y,test_size=0.2)
# 封装自己的多元线性回归方法
class MyLinearRegression:
# 初始化
def __init__(self):
self.theta = None
self.interception_ = None
self.theta_ = None
def fit(self,X,y):
# 合成X_b
X_ones = np.ones((X.shape[0],1))
X_b = np.hstack((X_ones, X))
self.theta = np.linalg.inv(X_b.T.dot(X_b)).dot(X_b.T).dot(y)
self.interception_ = self.theta[0]
self.theta_ = self.theta[1:]
return self
def predict(self,X_test):
return X_test.dot(self.theta_) + self.interception_
def coef_(self):
return self.theta_
def intercept_(self):
return self.interception_
def r2_score(self, X_test, y_test):
mes = np.sum((self.predict(X_test)-y_test)**2)/X_test.shape[0]
va = np.sum((y_test-np.mean(y))**2)/X_test.shape[0]
re = 1-(mes/va)
return re
- 测试自己的线性回归
# 测试自己的线性回归
Mylr = MyLinearRegression()
Mylr.fit(X_train,y_train)
y_pre = Mylr.predict(X_test)
plt.figure(figsize = (12,8))
plt.plot(y_test,color='b', label = 'True')
plt.plot(y_pre,color='red', label = 'Predict')
plt.legend(loc='best')
plt.show()
print('R方得分:',Mylr.r2_score(X_test,y_test))
- 测试sklearn的线性回归
# 测试sklearn的线性回归
from sklearn.metrics import r2_score
lr = LinearRegression()
lr.fit(X_train,y_train)
y_ = lr.predict(X_test)
plt.figure(figsize = (12,8))
plt.plot(y_test,color='b', label = 'True')
plt.plot(y_,color='red', label = 'Predict')
plt.legend(loc='best')
plt.show()
r2_score(y_test, y_)