numpy实现
两层的逻辑回归(分类模型)只是拿来做回归而已
这里涉及到矩阵求导
我只是简单的按照形状来推断向量的顺序
找到诀窍:顺序和是否需要转置 可以先不要管梯度
去掉梯度符号 看看怎么变换能够得到当前变量 然后加上梯度即可
举个例子:
··
相对顺序和转置都不变
代码:
from sklearn.datasets import load_boston
from sklearn import preprocessing
import numpy as np
# 载入数据,并预处理
X, y = load_boston(return_X_y=True)
X = preprocessing.scale(X[:100, :])
y = preprocessing.scale(y[:100].reshape(-1, 1))
# 定义超参数
data_size, D_input, D_output, D_hidden = X.shape[0], X.shape[1], 1, 50
lr = 1e-5
epoch = 200000
w1 = np.random.randn(D_input, D_hidden)
w2 = np.random.randn(D_hidden, D_output)
def sigmoid(x):
return 1 / (1 + np.exp(-x))
for i in range(epoch):
# 前向传播
h = np.dot(X, w1)
h_ = sigmoid(h)
y_ = np.dot(h, w2)
# 打印误差
mse = np.sum((y - y_) ** 2)
if i % 10 == 0:
print('epoch: {} loss: {:.4f}'.format(i, mse))
# 误差反向传播
g_y_ = 2 * (y_ - y)
g_w2 = np.dot(h_.T, g_y_)
g_h_ = np.dot(g_y_, w2.T)
g_h = g_h_ * sigmoid(h) * (1 - sigmoid(h))
g_w1 = np.dot(X.T, g_h)
# 参数更新
w1 -= lr * g_w1
w2 -= lr * g_w2
pytorch
from sklearn.datasets import load_boston
from sklearn import preprocessing
import torch
dtype = torch.FloatTensor
# dtype = torch.cuda.FloatTensor
# 载入数据,并预处理
X, y = load_boston(return_X_y=True)
X = preprocessing.scale(X[:100, :])
y = preprocessing.scale(y[:100].reshape(-1, 1))
# 定义超参数
data_size, D_input, D_output, D_hidden = X.shape[0], X.shape[1], 1, 50
lr = 1e-5
epoch = 200000
# 转换为Tensor
# X = torch.Tensor(X).type(dtype)
# y = torch.Tensor(y).type(dtype)
X = torch.from_numpy(X).type(dtype)
y = torch.from_numpy(y).type(dtype)
# 定义训练参数
w1 = torch.randn(D_input, D_hidden).type(dtype)
w2 = torch.randn(D_hidden, D_output).type(dtype)
# 进行训练
for i in range(epoch):
# 前向传播
h = torch.mm(X, w1) # 计算隐层
h_relu = h.clamp(min=0) # relu
# y_pred = torch.mm(h_relu, w2) # 输出层
y_pred = h_relu.mm(w2) # 输出层
# loss计算,使用L2损失函数
loss = (y_pred - y).pow(2).sum()
if i % 10000 == 0:
print('epoch: {} loss: {:.4f}'.format(i, loss))
# 反向传播,计算梯度
grad_y_pred = 2.0 * (y_pred - y)
grad_w2 = torch.mm(h_relu.t(), grad_y_pred)
grad_h_relu = torch.mm(grad_y_pred, w2.t())
# relu函数的倒数 右半段=1 左半段=0
grad_h = grad_h_relu.clone()
grad_h[h < 0] = 0```
grad_w1 = torch.mm(X.t(), grad_h)
# 更新计算的梯度
w1 -= lr * grad_w1
w2 -= lr * grad_w2