一元线性回归
1、导入库
#!/usr/bin/env python
# -*-coding:utf-8-*-
import numpy as np
import matplotlib.pyplot as plt
2、定义线性回归模型
def predict(x, a, b):
y_predict = a * x + b
return y_predict
一元线性回归的模型如下:根据给定的数据集求解出a和b,即可确定适合该数据集的模型。那么
3、用最二乘法求解线性回归的系数
def train_linear_model(x, y):
# 最小二乘法求参数
data_mean_x = np.mean(x) # x均值
data_mean_y = np.mean(y) # y均值
data_var = np.var(x, ddof=1) # 方差
data_cov = np.cov(x, y, ddof=1)[0, 1] # 协方差.由于np.cov()返回的是协方差矩阵,这里只需要x,y的协方差
# data_cov = sum((x-data_mean_x)*(y-data_mean_y))/((len(x)-1)*1.0) # 计算结果等同于上一行
a = data_cov/data_var
b = data_mean_y-a*data_mean_x
return a, b
根据参考文献[1]中介绍的方法,自己用代码实现了一下求解最小二乘法。
4、画结果图
def draw_picture(x_train, y_train, y_train_predict, x_test=None, y_test_predict=None):
# 画训练集散点图
fig = plt.figure()
ax = fig.add_subplot(111)
train_point = plt.scatter(x_train, y_train, c='c', s=50, linewidth=0)
train_predict_line, = plt.plot(x_train, y_train_predict, c='r', linewidth=2)
train_predict_point = plt.scatter(x_train, y_train_predict, marker='*', s=150, c='g', linewidth=0)
for i in range(len(x_train)):
residual, = plt.plot([x_train[i], x_train[i]], [y_train[i], y_train_predict[i]],
c='y', linewidth=2)
# 画测试集散点图
if x_test!=None and y_test_predict!=None:
test_predict_point = plt.scatter(x_test, y_test_predict, marker='*', s=150, c='b', linewidth=0)
plt.legend([train_point, train_predict_line, train_predict_point, residual, test_predict_point],
["train_point", "train_predict_line", "train_predict_point", "residual", "test_predict_point"],
loc='lower right')
else:
plt.legend([train_point, train_predict_line, train_predict_point, residual],
["train_point", "train_predict_line", "train_predict_point", "residual"], loc='lower right')
plt.grid(color='b', linewidth=0.3, linestyle='--')
plt.title('Linear Regression')
ax.set_ylabel('Y')
ax.set_xlabel('X')
fig.show()
5、主函数
def main():
# 训练集
x_train = [6, 8, 10, 14, 18]
y_train = [7, 9, 13, 17.5, 18]
x_train=np.mat(x_train)
y_train=np.mat(y_train)
# 训练
a, b = train_linear_model(x_train, y_train)
# 预测
y_train_predict = predict(x_train, a, b)
x_test = 12
y_test_predict = predict(x_test, a, b)
x_train = x_train.tolist()[0] # 将矩阵转换为列表,方便接下来的画图操作
y_train = y_train.tolist()[0]
y_train_predict = y_train_predict.tolist()[0]
# 画图
draw_picture(x_train, y_train, y_train_predict, x_test, y_test_predict)
if __name__ == '__main__':
main()
参考文献:[1]. [Python 机器学习系列之线性回归篇深度详细 ](https://www.jianshu.com/p/738f6092ef53)