本文采用的训练方法是牛顿法(Newton Method)。
代码
import numpy as np
class LogisticRegression(object):
"""
Logistic Regression Classifier training by Newton Method
"""
def __init__(self, error: float = 0.7, max_epoch: int = 100):
"""
:param error: float, if the distance between new weight and
old weight is less than error, the process
of traing will break.
:param max_epoch: if training epoch >= max_epoch the process
of traing will break.
"""
self.error = error
self.max_epoch = max_epoch
self.weight = None
self.sign = np.vectorize(lambda x: 1 if x >= 0.5 else 0)
def p_func(self, X_):
"""Get P(y=1 | x)
:param X_: shape = (n_samples + 1, n_features)
:return: shape = (n_samples)
"""
tmp = np.exp(self.weight @ X_.T)
return tmp / (1 + tmp)
def diff(self, X_, y, p):
"""Get derivative
:param X_: shape = (n_samples, n_features + 1)
:param y: shape = (n_samples)
:param p: shape = (n_samples) P(y=1 | x)
:return: shape = (n_features + 1) first derivative
"""
return -(y - p) @ X_
def hess_mat(self, X_, p):
"""Get Hessian Matrix
:param p: shape = (n_samples) P(y=1 | x)
:return: shape = (n_features + 1, n_features + 1) second derivative
"""
hess = np.zeros((X_.shape[1], X_.shape[1]))
for i in range(X_.shape[0]):
hess += self.X_XT[i] * p[i] * (1 - p[i])
return hess
def newton_method(self, X_, y):
"""Newton Method to calculate weight
:param X_: shape = (n_samples + 1, n_features)
:param y: shape = (n_samples)
:return: None
"""
self.weight = np.ones(X_.shape[1])
self.X_XT = []
for i in range(X_.shape[0]):
t = X_[i, :].reshape((-1, 1))
self.X_XT.append(t @ t.T)
for _ in range(self.max_epoch):
p = self.p_func(X_)
diff = self.diff(X_, y, p)
hess = self.hess_mat(X_, p)
new_weight = self.weight - (np.linalg.inv(hess) @ diff.reshape((-1, 1))).flatten()
if np.linalg.norm(new_weight - self.weight) <= self.error:
break
self.weight = new_weight
def fit(self, X, y):
"""
:param X_: shape = (n_samples, n_features)
:param y: shape = (n_samples)
:return: self
"""
X_ = np.c_[np.ones(X.shape[0]), X]
self.newton_method(X_, y)
return self
def predict(self, X) -> np.array:
"""
:param X: shape = (n_samples, n_features]
:return: shape = (n_samples]
"""
X_ = np.c_[np.ones(X.shape[0]), X]
return self.sign(self.p_func(X_))
测试代码
import matplotlib.pyplot as plt
import sklearn.datasets
def plot_decision_boundary(pred_func, X, y, title=None):
"""分类器画图函数,可画出样本点和决策边界
:param pred_func: predict函数
:param X: 训练集X
:param y: 训练集Y
:return: None
"""
# Set min and max values and give it some padding
x_min, x_max = X[:, 0].min() - .5, X[:, 0].max() + .5
y_min, y_max = X[:, 1].min() - .5, X[:, 1].max() + .5
h = 0.01
# Generate a grid of points with distance h between them
xx, yy = np.meshgrid(np.arange(x_min, x_max, h), np.arange(y_min, y_max, h))
# Predict the function value for the whole gid
Z = pred_func(np.c_[xx.ravel(), yy.ravel()])
Z = Z.reshape(xx.shape)
# Plot the contour and training examples
plt.contourf(xx, yy, Z, cmap=plt.cm.Spectral)
plt.scatter(X[:, 0], X[:, 1], s=40, c=y, cmap=plt.cm.Spectral)
if title:
plt.title(title)
plt.show()
效果
更多机器学习代码,请访问 https://github.com/WiseDoge/plume