Logistic Regression

logisic回归梯度更新公式.png
数据分布.png
#!/usr/bin/python3

import numpy as np 
from math import *
import matplotlib.pyplot as plt

def Simulation():
    X=np.random.rand(100,2)
    X=np.mat(X)
    X1=X[:,0]
    X2=X[:,1]
    Noise=np.random.normal(0,0.09,[100,1])
    y=X1+X2+Noise
    #y=X1+X2+Noise
    y=np.where(y> 1, 1, 0)
    #return y,X1,X2,Y
    d=np.hstack([y,X1,X2])
    return d

def visualize(d):
    #The Label 0 and 1 data
    k=np.where(d[:,0]==1)[0]
    Label_T=d[k]
    k=np.where(d[:,0]==0)[0]
    Label_F=d[k]
    plt.figure()
    plt.scatter(Label_T[:,1].tolist(),Label_T[:,2].tolist(),c='r',marker="^",label="T")
    plt.scatter(Label_F[:,1].tolist(),Label_F[:,2].tolist(),c='b',marker="s",label="F")
    plt.title('Scatter Plot Of Simulation Data  ')
    plt.xlabel('X1')
    plt.ylabel('X2')
    plt.legend(loc='upper right')
    #plt.show()
    return plt

def draw_boundary(w,d):
    plt=visualize(d)
    x1=np.linspace(0,1,20)
    x2=-( w[0,0]+w[0,1]*x1)/w[0,2]
    plt.plot(x1,x2)
    plt.show()


def Sigmoid(X,W):
    Y=1/(1+np.exp(-X*(W.T)))
    return Y


def GD(iteratons,X,Y,alpha=0.01):
    N=X.shape[1]+1
    M=X.shape[0]
    W=np.random.rand(1,N)
    X0=np.ones((M,1))
    X=np.hstack([X0,X])
    for i in range(iteratons):
        H=Sigmoid(X,W)
        delta_W=(H-Y).T*X
        W+=-alpha*delta_W/M
    return W

def SGD(threshold,X,Y,alpha=0.01):
    N=X.shape[1]+1
    M=X.shape[0]
    W=np.random.rand(1,N)
    X0=np.ones((M,1))
    X=np.hstack([X0,X])
    while 1:
        i=np.random.randint(0,M,[1,1]) #randomly select 10 samples 
        i=i.tolist()[0]
        SX=X[i,:];SY=Y[i,]
        H=Sigmoid(SX,W)
        delta_W=(H-SY).T*SX
        delta_w=np.sum(delta_W)
        if abs(delta_w)<threshold: break
        W+=-alpha*delta_W/M
    return W

if __name__ == '__main__':
    #Generating Simulation data
    d=Simulation()
    Y=d[:,0]
    X=d[:,1:3]
    #visualize
    plt=visualize(d)
    plt.show()
    #GD
    w=GD(10000,X,Y)
    draw_boundary(w,d)
    #SGD
    w=SGD(0.2,X,Y)
    draw_boundary(w,d)
设置学习率alpha很重要，感觉随机梯度下降不容易收敛。
logistic回归决策边界.png
Logistic Regression

推荐阅读更多精彩内容