使用PyTorch训练一个加法的demo:输入两个数字,计算和
- 训练模型
$ cat model_add.py
import torch
import torch.nn as nn
import torch.optim as optim
# 0. Define model (input_dim inputs -> 4 hidden -> output_dim output)
class SimpleMLP(nn.Module):
def __init__(self, input_dim, output_dim):
super().__init__()
self.fc1 = nn.Linear(input_dim, 4)
self.act1 = nn.ReLU()
self.fc2 = nn.Linear(4, output_dim)
def forward(self, x):
x0 = x;
x1 = self.fc1(x0)
x2 = self.act1(x1)
out = self.fc2(x2)
if (epoch+1) % 10 == 0:
print(f"[{epoch+1}]: output: {out}")
for name, param in self.named_parameters():
print(f" {name}: {param.tolist()}")
return out
TORCH_SEED=42
torch.manual_seed(TORCH_SEED)
input_dim = 2
output_dim = 1
# 1. Build model
model = SimpleMLP(input_dim, output_dim)
# Define Dummy Data
x_train = torch.tensor([[1.0, 2.0]])
y_train = torch.tensor([[3.0]])
# 3. Loss and optimizer
criterion = nn.MSELoss() # Mean squared error loss
optimizer = optim.SGD(model.parameters(), lr=0.01)
# 4. Train the model
num_epochs = 100
epoch = 0
for epoch in range(num_epochs):
y_pred = model(x_train) # Predicts output (y_pred) by feeding x_train into the model.
loss = criterion(y_pred, y_train) # Computes the loss between the model’s prediction and the true labels.
optimizer.zero_grad() # Resets the gradient from previous steps (required in PyTorch).
loss.backward() # Computes the gradient of the loss w.r.t. model parameters (weight and bias).
optimizer.step() # Updates the model’s weights based on the computed gradients (this is learning!).
torch.save(model.state_dict(), "model_add.pt")
print("Training complete, saved to model_add.pth")
# 5. Test the model
test_input = torch.tensor([[5.0, 7.0]])
test_output = model(test_input)
print(f"Model prediction for input: {test_input}, output: {test_output}")
完成之后model存储在本地文件:model_add.pth
- 使用
$ cat model_add_use.py
import torch
import torch.nn as nn
import torch.optim as optim
# 0. Define model (input_dim inputs -> 4 hidden -> output_dim output)
class SimpleMLP(nn.Module):
def __init__(self, input_dim, output_dim):
super().__init__()
self.fc1 = nn.Linear(input_dim, 4)
self.act1 = nn.ReLU()
self.fc2 = nn.Linear(4, output_dim)
def forward(self, x):
x0 = x;
x1 = self.fc1(x0)
x2 = self.act1(x1)
out = self.fc2(x2)
return out
TORCH_SEED=42
torch.manual_seed(TORCH_SEED)
input_dim = 2
output_dim = 1
# 1. Build model
model = SimpleMLP(input_dim, output_dim)
model.load_state_dict(torch.load("model_add.pth"))
# 5. Test the model
test_input = torch.tensor([[5.0, 6.0]])
test_output = model(test_input)
print(f"Model prediction for input: {test_input}, output: {test_output}")
运行之后会发现:
Model prediction for input: tensor([[5., 6.]]), output: tensor([[7.3747]], grad_fn=<AddmmBackward0>)
5+7的值是7.3747,而不是我们期望的12。
原因是给的训练数据太少了,而且训练时间可以再多轮。
例如当把训练数据添加到4个:
#x_train = torch.tensor([[1.0, 2.0], [2.0, 3.0], [3.0, 4.0], [4.0, 5.0]])
#y_train = torch.tensor([[3.0], [5.0], [7.0], [9.0]])
计算结果就可以达到10.5605,这就比7.3747更精确。
当再增加样本到8个:
x_train = torch.tensor([[1.0, 2.0], [2.0, 3.0], [3.0, 4.0], [4.0, 5.0], [5.0, 6.0], [6.0, 7.0], [7.0, 8.0], [8.0, 9.0]])
y_train = torch.tensor([[3.0], [5.0], [7.0], [9.0], [11.0], [13.0], [15.0], [17.0]])
再运行,得到计算结果是10.6922,有进步,但是不大了。
- 另外有一个LinearModel的测试例子,也放在这里
数字转换的,如何把一个数字转换成另一个数字:
import torch
import torch.nn as nn
import torch.optim as optim
# 0. Define model (input_dim inputs -> output_dim output)
class MyLinearModel(nn.Module):
def __init__(self, input_dim, output_dim):
super(MyLinearModel, self).__init__()
self.linear = nn.Linear(input_dim, output_dim)
def forward(self, x):
x0 = x;
out = self.linear(x0)
if (epoch+1) % 10 == 0:
print(f"[{epoch+1}]: output: {out}")
for name, param in self.named_parameters():
print(f" {name}: {param.tolist()}")
return out
TORCH_SEED=42
torch.manual_seed(TORCH_SEED)
input_dim = 1
output_dim = 1
# 1. Build model
model = MyLinearModel(input_dim, output_dim)
# Define Dummy Data: y = 3 * x + 2
x_train = torch.tensor([[1.0], [2.0], [3.0], [4.0]])
y_train = torch.tensor([[5.0], [8.0], [11.0], [14.0]])
# 3. Loss and optimizer
criterion = nn.MSELoss() # Mean squared error loss
optimizer = optim.SGD(model.parameters(), lr=0.01)
# 4. Train the model
num_epochs = 100
epoch = 0
for epoch in range(num_epochs):
y_pred = model(x_train) # Predicts output (y_pred) by feeding x_train into the model.
loss = criterion(y_pred, y_train) # Computes the loss between the model’s prediction and the true labels.
optimizer.zero_grad() # Resets the gradient from previous steps (required in PyTorch).
loss.backward() # Computes the gradient of the loss w.r.t. model parameters (weight and bias).
optimizer.step() # Updates the model’s weights based on the computed gradients (this is learning!).
print("Training complete.")
# 5. Test the model
test_input = torch.tensor([[5.0]])
test_output = model(test_input)
print(f"Model prediction for input: {test_input}, output: {test_output}")
运算结果:
Model prediction for input: tensor([[5.]]), output: tensor([[17.1906]], grad_fn=<AddmmBackward0>)
基本符合预期:3 * 5 + 2 = 17
从log可以看出训练结果得到的参数基本正确(希望是weight=3, bias=2):
linear.weight: [[3.092534065246582]]
linear.bias: [1.7279385328292847]