反向传播示例
一、 单层网络的梯度下降和反向传播
单因子线性模型
- 线性模型定义
损失函数
损失函数求偏导(偏导代入a的值即为梯度)
-
更新参数
定义学习率lr,代入梯度值,得到a更新后的值
-
代码示例
- 构建数据集
import matplotlib.pyplot as plt import torch from torch.utils.data import TensorDataset, DataLoader x = torch.arange(1, 100, 2) noise = torch.randn(50) y = x * 2 + 10 # y = y + noise t_data_set = TensorDataset(x, y) dl = DataLoader(t_data_set, batch_size=5) a = torch.tensor(20.0, requires_grad=True) b = torch.tensor(30.0, requires_grad=True)
-
epoch 循环
for epoch in range(100): all_loss = 0 for xt, yt in dl: # 损失函数 y_pred = a * xt + b loss = torch.sum((y_pred - yt) ** 2) / 2 all_loss += loss.data # 梯度归零 if a.grad: a.grad.data.zero_() b.grad.data.zero_() # 反向传播 loss.backward() # 更新数据 a.data = a.data - a.grad.data * 1e-4 b.data = b.data - b.grad.data * 1e-3
多层网络反向传播
单因子多层网络梯度下降和反向传播
多层线性模型定义
损失函数
先对a2、b2求导
链式法则说明
基于链式法则对a1、b1求导
-
代码示例
import seaborn as sns import matplotlib.pyplot as plt import torch from torch.utils.data import TensorDataset, DataLoader import pandas as pd x = torch.arange(1, 100, 2) noise = torch.randn(50) y = x * 2 + 10 # y = y + noise t_data_set = TensorDataset(x, y) dl = DataLoader(t_data_set, batch_size=5) # 两层神经网络 a1 = torch.tensor(20.0, requires_grad=True) b1 = torch.tensor(30.0, requires_grad=True) a2 = torch.tensor(20.0, requires_grad=True) b2 = torch.tensor(30.0, requires_grad=True) flag = 0 for epoch in range(1): all_loss = 0 for xt, yt in dl: # 损失函数 liner1 = a1 * xt + b1 y_pred = a2 * liner1 + b2 loss = torch.sum((y_pred - yt) ** 2) / 2 all_loss += loss.data # 梯度归零 if flag != 0: a1.grad.data.zero_() b1.grad.data.zero_() a2.grad.data.zero_() b2.grad.data.zero_() else: flag = 1 loss.backward() print(f"a1:{a1.data}, a2:{a2.data}, b1:{b1.data},b2:{b2.data}") print(f"自动计算的梯度:a1_grad:{a1.grad}, a2_grad:{a2.grad}, b1_grad:{b1.grad},b_grad:{b2.grad}") print(f"x: {xt}, y: {yt}, 第一层结果:{liner1}, 第二层结果:{y_pred}") print(f"计算对a2梯度:{torch.sum(torch.mul(y_pred - yt, liner1))}") print(f"loss对liner1的梯度:{a2 * (y_pred - yt)}") print(f"liner1对a1的梯度:{xt}") print(f"loss对a1的梯度:{torch.sum(torch.mul(xt, a2 * (y_pred - yt)))}") print(f"loss对b1的梯度:{torch.sum(a2 * (y_pred - yt))}") # 更新数据 a1.data = a1.data - a1.grad.data * 1e-4 b1.data = b1.data - b1.grad.data * 1e-3 a2.data = a2.data - a2.grad.data * 1e-4 b2.data = b2.data - b2.grad.data * 1e-3 break print(f"epoch:{epoch}, now a:{a1}, now b:{b1}, now loss: {all_loss / len(dl)}") # y_pre = a * x + b # plt.plot(x.detach().numpy(), y.detach().numpy(), 'go', label='data', alpha=0.3) # plt.plot(x.detach().numpy(), y_pre.detach().numpy(), # label='predicted', alpha=1) # plt.legend() # plt.show()
多层网络多因子梯度下降和反向传播
- 代码示例
# 构建数据集
x = torch.randn(100, 2)
noise = torch.randn(100)
# y=10*x1+20*x2+3
y = torch.matmul(x, torch.tensor([2, 1], dtype=torch.float32)) + 3
y = y + noise
t_data_set = TensorDataset(x, y)
dl = DataLoader(t_data_set, batch_size=5)
def dy_lr(epoch_num):
if epoch_num < 120:
return 3e-3
# if 100 <= epoch_num < 1000:
# return 1e-5
else:
return 1e-3
def cal_grad(line1_output, line2_grad, line1_para):
"""
求导
:param line1_output: 上一层的输出 batch_size * 上一层神经元树
:param line2_grad: 本层的导数 batch_size * 本层 神经元树
:param line1_para: 本层的参数,即上层神经元数*本层神经元数 line1_output*line1_para = line2
:return: 本层的导数和参数的导数
"""
line1_grad = torch.matmul(line1_para, line2_grad.unsqueeze(2))
line1_a_grad = torch.matmul(line1_output.unsqueeze(2), line2_grad.unsqueeze(1))
return line1_grad.squeeze(), line1_a_grad.squeeze()
def test_backward():
"""
计算梯度
:return:
"""
line1_a = torch.randn(2, 2, requires_grad=True)
line1_b = torch.randn(1, 2, requires_grad=True)
line2_a = torch.randn(2, 2, requires_grad=True)
line2_b = torch.randn(1, 2, requires_grad=True)
line3_a = torch.randn(2, 1, requires_grad=True)
line3_b = torch.randn(1, requires_grad=True)
for xt, yt in dl:
line1_out = torch.matmul(xt, line1_a) + line1_b
line2_out = torch.matmul(line1_out, line2_a) + line2_b
line3_out = torch.matmul(line2_out, line3_a) + line3_b
output = line3_out.squeeze()
loss = torch.sum((output - yt) ** 2)**2 / 2
loss.backward()
print("x:{}, y:{}, y_pred:{}".format(xt, yt, output))
print("line1_a:{},\nline1_b:{},\nline2_a:{},\nline2_b:{},\nline3_a:{},\nline3_b:{}".format(
line1_a, line1_b, line2_a, line2_b, line3_a, line3_b
))
print("*" * 20)
print("line1_out:{},\nline2_out:{},\nline3_out:{}".format(
line1_out, line2_out, line3_out))
print("*" * 20)
print("loss:{}".format(loss))
print("*" * 20)
print("grad:\nline1_a:{},\nline1_b:{},\nline2_a:{},\nline2_b:{},\nline3_a:{},\nline3_b:{}".format(
line1_a.grad, line1_b.grad, line2_a.grad, line2_b.grad, line3_a.grad, line3_b.grad
))
print("*" * 20)
# 1. Loss 对 y_pred求导
# grad_loss_y_pre = output.detach() - yt
grad_loss_y_pre = (torch.sum((output - yt) ** 2)*2*(output - yt)).detach()
print(grad_loss_y_pre)
# 逐层求导
grad_loss_line3, grad_loss_line3_a = cal_grad(line2_out, grad_loss_y_pre.unsqueeze(1), line3_a)
grad_loss_line2, grad_loss_line2_a = cal_grad(line1_out, grad_loss_line3, line2_a)
grad_loss_line1, grad_loss_line1_a = cal_grad(xt, grad_loss_line2, line1_a)
print(
f"grad:line1_a:{grad_loss_line1_a.sum(dim=0)},\nline1:{grad_loss_line1.sum(dim=0)}")
print(
f"grad: line2_a:{grad_loss_line2_a.sum(dim=0)},\nline2:{grad_loss_line2.sum(dim=0)}")
print(
f"grad: line3_a:{grad_loss_line3_a.sum(dim=0)},\n line3:{grad_loss_line3.sum(dim=0)}")
break
test_backward()
- pytorch 简易写法
# 构建数据集
x = torch.randn(100, 2)
noise = torch.randn(100)
# y=10*x1+20*x2+3
y = torch.matmul(x, torch.tensor([2, 1], dtype=torch.float32)) + 3
y = y + noise
t_data_set = TensorDataset(x, y)
dl = DataLoader(t_data_set, batch_size=5)
def test_by_torch_model():
"""
通过torch自带模型计算结果
:return:
"""
model = nn.Sequential(nn.Linear(2, 2), nn.Linear(2, 2), nn.Linear(2, 1))
optim = Adam(model.parameters(), lr=1e-2)
criterion = nn.MSELoss()
for epoch in range(50):
all_loss = 0
for xt, yt in dl:
outputs = model(xt)
optim.zero_grad()
loss = criterion(yt, outputs.squeeze())
loss.backward()
optim.step()
all_loss += loss.detach().data
print(f"epoch:{epoch}, now loss: {all_loss / len(dl)}")
y_pred = model(x).squeeze().detach().numpy()
plt.plot([i for i in range(len(y_pred))],
y.detach().numpy(), 'go', label='data', alpha=0.3)
plt.plot([i for i in range(len(y_pred))],
y_pred, label='predicted', alpha=1)
plt.legend()
plt.show()
# test_backward()
test_by_torch_model()