image.png
import numpy as np
import matplotlib.pyplot as plt
#逻辑回归 牛顿法
#加载数据,shu
def load_data(file):
data,label = [],[]
for line in file.readlines():
lineArr = line.strip().split()
data.append([1.0, float(lineArr[0] ), float( lineArr[1])] )
for i in range(80):
if i < 40 :
label.append(1)
else :
label.append(0)
xmat = np.mat(data) #x集,共80个
ymat = np.mat(label).reshape(80,1) #标签集
return xmat,ymat
def w_calc (xmat,ymat,maxIter = 7):
W = np.zeros((3,1))
m = xmat.shape[0] #80
for i in range(maxIter):
h = 1.0/(1+np.exp(-(xmat*W)) ) #sigmoid函数 (80,1)
grad = (1.0/m)*xmat.T*(h-ymat) #梯度 (3,1) = (3,80) * (80,1) 重点
H = (1.0/m)*xmat.T*np.diag((h*(1-h).T).A[0] ).T*xmat #(3,3) =(3,80)*(80,1)*(1,80)*(80,3)
W -= np.linalg.inv(H)*grad #(3,1) =
return W
file = open("ex4x.dat")
xmat,ymat = load_data(file)
W = w_calc(xmat,ymat)
print('w:',W)
w0=W[0,0]
w1=W[1,0]
w2=W[2,0]
plotx1 = np.arange(20,50,2)
plotx2 = -w0/w2-w1/w2*plotx1 #x2轴
plt.plot(plotx1,plotx2,c='r',label='decision boundary')
plt.scatter(xmat[:,1][ymat==0].A,xmat[:,2][ymat==0].A,marker='o',label='label=neg')
plt.scatter(xmat[:,1][ymat==1].A,xmat[:,2][ymat==1].A,marker='+',label='lebel=pos')
plt.grid()
plt.legend()
plt.show()
其中求海森矩阵是重点,之前看了很多代码都没有用到 np.diag() 求对角矩阵,导致结果不准确。这样写了后由于牛顿法下降很快,迭代7次左右即可。