逻辑回归

逻辑回归是一个分类算法,利用回归来做分类.它可以处理二元分类以及多元分类,逻辑回归与线性回归不同主要体现在以下两点:
1.sigmiod函数
2.损失函数

import copy
import numpy as np
import matplotlib.pyplot as plt

def loadata(filename):
    fr = open(filename)
    data = []
    for line in fr.readlines():
        data.append(line.strip().split('\t'))
    data = np.array(data, dtype=np.float32)
    m, n = data.shape
    x = np.mat(np.c_[data[:, 0:n-1], np.ones((m,1))])
    y = np.mat(data[:, n-1]).reshape((m,1))
    return data, x, y

def sigmoid(x, theta):
    return 1.0/(1.0 + np.exp(-x * theta))

def J(theta, x, y):
    m, n = x.shape
    h = sigmoid(x, theta)
    J = (-1.0/m)*(np.log(h).T*y+np.log(1.0-h).T*(1-y))
    return J

def gradient(x, y, maxloop, rate, eplsion):
    m, n = x.shape
    error = float('inf')
    errors = []
    theta = np.ones((n, 1))
    thetas = []
    count = 0
    while count < maxloop and error > eplsion:
        count += 1
        h = sigmoid(x, theta)
        thetas.append(copy.deepcopy(theta))
        theta -= rate * (1.0/m) * x.T * (h-y)
        error = J(theta, x, y)
        errors.append(copy.deepcopy(error))
        if error < eplsion:
            break
    return thetas, errors, count

def sgd(x, y, maxloop, rate, eplsion):
    m, n = x.shape
    error = float('inf')
    errors = []
    theta = np.ones((n, 1))
    thetas = []
    count = 0
    for i in range(maxloop):
        count = i
            if i >= m:
            i = i%m
        h = sigmoid(x[i], theta)
        theta -= rate*(1.0/m)*x[i].T*(h-y[i])
            thetas.append(copy.deepcopy(theta))
        error = J(theta, x, y)
            errors.append(error)
            if error < eplsion:
                break
    return thetas, errors, count+1

def paint(data, thetas, errors, count, rate):
    m, n = data.shape
    data1 = []
    data2 = []
    for i in range(m):
        if data[i, 2] == 1.0:
            data1.append(data[i, 0:2])
        else:
            data2.append(data[i, 0:2])
    data1 = np.array(data1)
    data2 = np.array(data2)
    plt.figure()
    plt.scatter(np.array(data1)[:, 0], np.array(data1)[:, 1], marker='o', c='b')
    plt.scatter(np.array(data2)[:, 0], np.array(data2)[:, 1], marker='*', c='b')
    title = 'rate=%2f, itercount=%d, error=%2f \n' % (rate, count, errors[-1])
    x = np.arange(-5, 5, 0.01)
    plt.plot(x, -(thetas[-1][0]*x + thetas[-1][2])/thetas[-1][1], c='r')
    plt.xlabel('x')
    plt.ylabel('y')
    plt.title(title)
    plt.show()
    plt.figure()
    plt.plot(np.array(errors).flatten(), range(count), c='b')
    plt.xlabel('count')
    plt.ylabel('error')
    plt.title('error')
    plt.show()
    plt.subplot(3,1,1)
    plt.plot(np.array(thetas).reshape(count, 3)[:, 0], range(count))
    plt.subplot(3,1,2)
        plt.plot(np.array(thetas).reshape(count, 3)[:, 1], range(count))
    plt.subplot(3,1,3)
        plt.plot(np.array(thetas).reshape(count, 3)[:, 2], range(count))
    plt.show()
    

if __name__ == "__main__":
    data, x, y = loadata('linear.txt')
    thetas, errors, count = sgd(x, y, 10000, 0.01, 0.01)
    print thetas, errors, count
    paint(data, thetas, errors, count, 0.01)

最后编辑于
©著作权归作者所有,转载或内容合作请联系作者
【社区内容提示】社区部分内容疑似由AI辅助生成,浏览时请结合常识与多方信息审慎甄别。
平台声明:文章内容(如有图片或视频亦包括在内)由作者上传并发布,文章内容仅代表作者本人观点,简书系信息发布平台,仅提供信息存储服务。

相关阅读更多精彩内容

友情链接更多精彩内容