线性回归的求解方法是最小二乘法,具体原理:
保证所有数据偏差的平方和最小
证明步骤
损失函数:
化简
对L求偏导数
from numpy import *
import matplotlib.pyplot as plt
#1. Get data from file
def load_data_set(filename):
num_feat = len(open(filename).readline().split('\t')) -1
data_mat = []
label_mat = []
fr = open(filename)
for line in fr.readlines():
line_arr = []
cur_line = line.strip().split('\t')
for i in range(num_feat):
line_arr.append(float(cur_line[i]))
data_mat.append(line_arr)
label_mat.append(line_arr[-1])
return data_mat, label_mat
# 2. Get w of function coefficient
def stand_regres(x_arr, y_arr):
x_mat = mat(x_arr)
y_mat = mat(y_arr)
xTx = x_mat.T*x_mat
# is can inverse
if linalg.det(xTx) == 0.0:
print "this matrix is singular, cannot do inverse!!!"
return
ws = xTx.I * (x_mat.T*y_mat)
return ws
xArr, yArr = load_data_set('ext0.txt')
ws = stand_regres(xArr, xArr)
print ws
xMat = mat(xArr)
yMat = mat(yArr)
xCopy = xMat.copy()
xCopy.sort(0)
yHat = xCopy*ws
fig = plt.figure()
# 1 row 1 col the 1 block
ax = fig.add_subplot(111)
# must array
ax.scatter(xMat[:, 1].flatten().A[0], yMat.T[:, 0].flatten().A[0])
ax.plot(xCopy[:, 1], yHat)
plt.show()
```