使用PyTorch训练一个最简单的demo

使用PyTorch训练一个加法的demo：输入两个数字，计算和

训练模型

$ cat model_add.py
import torch
import torch.nn as nn
import torch.optim as optim

# 0. Define model (input_dim inputs -> 4 hidden -> output_dim output)
class SimpleMLP(nn.Module):
    def __init__(self, input_dim, output_dim):
        super().__init__()
        self.fc1 = nn.Linear(input_dim, 4)
        self.act1 = nn.ReLU()
        self.fc2 = nn.Linear(4, output_dim)

    def forward(self, x):
        x0 = x;
        x1 = self.fc1(x0)
        x2 = self.act1(x1)
        out = self.fc2(x2)
        if (epoch+1) % 10 == 0:
            print(f"[{epoch+1}]: output: {out}")
            for name, param in self.named_parameters():
                print(f"  {name}: {param.tolist()}")
        return out

TORCH_SEED=42
torch.manual_seed(TORCH_SEED)

input_dim = 2
output_dim = 1
# 1. Build model
model = SimpleMLP(input_dim, output_dim)

# Define Dummy Data
x_train = torch.tensor([[1.0, 2.0]])
y_train = torch.tensor([[3.0]])

# 3. Loss and optimizer
criterion = nn.MSELoss()  # Mean squared error loss
optimizer = optim.SGD(model.parameters(), lr=0.01)

# 4. Train the model
num_epochs = 100
epoch = 0
for epoch in range(num_epochs):
    y_pred = model(x_train)             # Predicts output (y_pred) by feeding x_train into the model.
    loss = criterion(y_pred, y_train)   # Computes the loss between the model’s prediction and the true labels.

    optimizer.zero_grad()               # Resets the gradient from previous steps (required in PyTorch).
    loss.backward()                     # Computes the gradient of the loss w.r.t. model parameters (weight and bias).
    optimizer.step()                    # Updates the model’s weights based on the computed gradients (this is learning!).

torch.save(model.state_dict(), "model_add.pt")
print("Training complete, saved to model_add.pth")

# 5. Test the model
test_input = torch.tensor([[5.0, 7.0]])
test_output = model(test_input)
print(f"Model prediction for input: {test_input}, output: {test_output}")

完成之后model存储在本地文件：model_add.pth

使用

$ cat model_add_use.py
import torch
import torch.nn as nn
import torch.optim as optim

# 0. Define model (input_dim inputs -> 4 hidden -> output_dim output)
class SimpleMLP(nn.Module):
    def __init__(self, input_dim, output_dim):
        super().__init__()
        self.fc1 = nn.Linear(input_dim, 4)
        self.act1 = nn.ReLU()
        self.fc2 = nn.Linear(4, output_dim)

    def forward(self, x):
        x0 = x;
        x1 = self.fc1(x0)
        x2 = self.act1(x1)
        out = self.fc2(x2)
        return out

TORCH_SEED=42
torch.manual_seed(TORCH_SEED)

input_dim = 2
output_dim = 1
# 1. Build model
model = SimpleMLP(input_dim, output_dim)
model.load_state_dict(torch.load("model_add.pth"))

# 5. Test the model
test_input = torch.tensor([[5.0, 6.0]])
test_output = model(test_input)
print(f"Model prediction for input: {test_input}, output: {test_output}")

运行之后会发现：

Model prediction for input: tensor([[5., 6.]]), output: tensor([[7.3747]], grad_fn=<AddmmBackward0>)

5+7的值是7.3747，而不是我们期望的12。

原因是给的训练数据太少了，而且训练时间可以再多轮。

例如当把训练数据添加到4个：

#x_train = torch.tensor([[1.0, 2.0], [2.0, 3.0], [3.0, 4.0], [4.0, 5.0]])
#y_train = torch.tensor([[3.0], [5.0], [7.0], [9.0]])

计算结果就可以达到10.5605，这就比7.3747更精确。

当再增加样本到8个：

x_train = torch.tensor([[1.0, 2.0], [2.0, 3.0], [3.0, 4.0], [4.0, 5.0], [5.0, 6.0], [6.0, 7.0], [7.0, 8.0], [8.0, 9.0]])
y_train = torch.tensor([[3.0], [5.0], [7.0], [9.0], [11.0], [13.0], [15.0], [17.0]])

再运行，得到计算结果是10.6922，有进步，但是不大了。

另外有一个LinearModel的测试例子，也放在这里

数字转换的，如何把一个数字转换成另一个数字：

import torch
import torch.nn as nn
import torch.optim as optim

# 0. Define model (input_dim inputs -> output_dim output)
class MyLinearModel(nn.Module):
    def __init__(self, input_dim, output_dim):
        super(MyLinearModel, self).__init__()
        self.linear = nn.Linear(input_dim, output_dim)

    def forward(self, x):
        x0 = x;
        out = self.linear(x0)
        if (epoch+1) % 10 == 0:
            print(f"[{epoch+1}]: output: {out}")
            for name, param in self.named_parameters():
                print(f"  {name}: {param.tolist()}")
        return out

TORCH_SEED=42
torch.manual_seed(TORCH_SEED)

input_dim = 1
output_dim = 1
# 1. Build model
model = MyLinearModel(input_dim, output_dim)

# Define Dummy Data: y = 3 * x + 2
x_train = torch.tensor([[1.0], [2.0], [3.0], [4.0]])
y_train = torch.tensor([[5.0], [8.0], [11.0], [14.0]])

# 3. Loss and optimizer
criterion = nn.MSELoss()  # Mean squared error loss
optimizer = optim.SGD(model.parameters(), lr=0.01)

# 4. Train the model
num_epochs = 100
epoch = 0
for epoch in range(num_epochs):
    y_pred = model(x_train)             # Predicts output (y_pred) by feeding x_train into the model.
    loss = criterion(y_pred, y_train)   # Computes the loss between the model’s prediction and the true labels.

    optimizer.zero_grad()               # Resets the gradient from previous steps (required in PyTorch).
    loss.backward()                     # Computes the gradient of the loss w.r.t. model parameters (weight and bias).
    optimizer.step()                    # Updates the model’s weights based on the computed gradients (this is learning!).

print("Training complete.")

# 5. Test the model
test_input = torch.tensor([[5.0]])
test_output = model(test_input)
print(f"Model prediction for input: {test_input}, output: {test_output}")

运算结果：

Model prediction for input: tensor([[5.]]), output: tensor([[17.1906]], grad_fn=<AddmmBackward0>)

基本符合预期：3 * 5 + 2 = 17

从log可以看出训练结果得到的参数基本正确(希望是weight=3, bias=2)：

  linear.weight: [[3.092534065246582]]
  linear.bias: [1.7279385328292847]

使用PyTorch训练一个最简单的demo

推荐阅读更多精彩内容