https://mofanpy.com/tutorials/machine-learning/torch/torch-numpy

PyTorch神经网络基础-上

更改了原文中一些报错的代码，并适配在jupyter上运行

边用边参考：

print("""
输出维度方式:
    numpy时，用data.shape
    tensor时，用data.size()
""")

输出维度方式:
    numpy时，用data.shape
    tensor时，用data.size()

import torch
import numpy as np

np_data = np.arange(6).reshape((2, 3))
torch_data = torch.from_numpy(np_data)
tensor2array = torch_data.numpy()
print(
    '\nnumpy array:', np_data,          # [[0 1 2], [3 4 5]]
    '\ntorch tensor:', torch_data,      #  0  1  2 \n 3  4  5    [torch.LongTensor of size 2x3]
    '\ntensor to array:', tensor2array, # [[0 1 2], [3 4 5]]
)

numpy array: [[0 1 2]
 [3 4 5]] 
torch tensor: tensor([[0, 1, 2],
        [3, 4, 5]], dtype=torch.int32) 
tensor to array: [[0 1 2]
 [3 4 5]]

Torch中的数学运算

# abs 绝对值计算
data = [-1, -2, 1, 2]

tensor = torch.FloatTensor(data)  # 转换成32位浮点 tensor
print(
    '\nabs',
    '\nnumpy: ', np.abs(data),          # [1 2 1 2]
    '\ntorch: ', torch.abs(tensor)      # [1 2 1 2]
)

# sin   三角函数 sin
print(
    '\nsin',
    '\nnumpy: ', np.sin(data),      # [-0.84147098 -0.90929743  0.84147098  0.90929743]
    '\ntorch: ', torch.sin(tensor)  # [-0.8415 -0.9093  0.8415  0.9093]
)

# mean  均值
print(
    '\nmean',
    '\nnumpy: ', np.mean(data),         # 0.0
    '\ntorch: ', torch.mean(tensor)     # 0.0
)

abs 
numpy:  [1 2 1 2] 
torch:  tensor([1., 2., 1., 2.])

sin 
numpy:  [-0.84147098 -0.90929743  0.84147098  0.90929743] 
torch:  tensor([-0.8415, -0.9093,  0.8415,  0.9093])

mean 
numpy:  0.0 
torch:  tensor(0.)

# matrix multiplication 矩阵点乘
data = [[1,2], [3,4]]
tensor = torch.FloatTensor(data)  # 转换成32位浮点 tensor
# correct method
print(
    '\nmatrix multiplication (matmul)',
    '\nnumpy: ', np.matmul(data, data),     # [[7, 10], [15, 22]]
    '\ntorch: ', torch.mm(tensor, tensor)   # [[7, 10], [15, 22]]
)

matrix multiplication (matmul) 
numpy:  [[ 7 10]
 [15 22]] 
torch:  tensor([[ 7., 10.],
        [15., 22.]])

tensor1 = torch.Tensor([1, 2, 3, 4])
tensor1.dot(tensor1) # torch 会转换成 [1,2,3,4].dot([1,2,3,4) = 30.0

tensor(30.)

变量 Variable

import torch
from torch.autograd import Variable # torch 中 Variable 模块

# 先生鸡蛋
tensor = torch.FloatTensor([[1,2],[3,4]])
# 把鸡蛋放到篮子里, requires_grad是参不参与误差反向传播, 要不要计算梯度
variable = Variable(tensor, requires_grad=True)

print(tensor)
print(
"""
 1  2
 3  4
[torch.FloatTensor of size 2x2]
""")

print(variable)
print(
"""
Variable containing:
 1  2
 3  4
[torch.FloatTensor of size 2x2]
""")

tensor([[1., 2.],
        [3., 4.]])

 1  2
 3  4
[torch.FloatTensor of size 2x2]

tensor([[1., 2.],
        [3., 4.]], requires_grad=True)

Variable containing:
 1  2
 3  4
[torch.FloatTensor of size 2x2]

t_out = torch.mean(tensor*tensor)       # x^2
v_out = torch.mean(variable*variable)   # x^2
print(t_out)
print(v_out)    # 7.5

tensor(7.5000)
tensor(7.5000, grad_fn=<MeanBackward0>)

v_out.backward()    # 模拟 v_out 的误差反向传递

# 下面两步看不懂没关系, 只要知道 Variable 是计算图的一部分, 可以用来传递误差就好.
# v_out = 1/4 * sum(variable*variable) 这是计算图中的 v_out 计算步骤
# 针对于 v_out 的梯度就是, d(v_out)/d(variable) = 1/4*2*variable = variable/2

print(variable.grad)    # 初始 Variable 的梯度
print('''
 0.5000  1.0000
 1.5000  2.0000
''')

tensor([[0.5000, 1.0000],
        [1.5000, 2.0000]])

 0.5000  1.0000
 1.5000  2.0000

print(variable)     #  Variable 形式
print("""
Variable containing:
 1  2
 3  4
[torch.FloatTensor of size 2x2]
""")

print(variable.data)    # tensor 形式
print("""
 1  2
 3  4
[torch.FloatTensor of size 2x2]
""")

print(variable.data.numpy())    # numpy 形式
print("""
[[ 1\.  2.]
 [ 3\.  4.]]
""")

tensor([[1., 2.],
        [3., 4.]], requires_grad=True)

Variable containing:
 1  2
 3  4
[torch.FloatTensor of size 2x2]

tensor([[1., 2.],
        [3., 4.]])

 1  2
 3  4
[torch.FloatTensor of size 2x2]

[[1\. 2.]
 [3\. 4.]]

[[ 1\.  2.]
 [ 3\.  4.]]

激励函数 Activation Function

import torch
import torch.nn.functional as F     # 激励函数都在这
from torch.autograd import Variable

# 做一些假数据来观看图像
x = torch.linspace(-5, 5, 200)  # x data (tensor), shape=(100, 1)
x = Variable(x)

# 生成不同的激励函数
x_np = x.data.numpy()   # 换成 numpy array, 出图时用

# 几种常用的 激励函数
y_relu = torch.relu(x).data.numpy()
y_sigmoid = torch.sigmoid(x).data.numpy()
y_tanh = torch.tanh(x).data.numpy()
y_softplus = F.softplus(x).data.numpy()
# y_softmax = F.softmax(x)  softmax 比较特殊, 不能直接显示, 不过他是关于概率的, 用于分类

# 绘制激励函数图像
import matplotlib.pyplot as plt  # python 的可视化模块, 我有教程 (https://mofanpy.com/tutorials/data-manipulation/plt/)

plt.figure(1, figsize=(8, 6))
plt.subplot(221)
plt.plot(x_np, y_relu, c='red', label='relu')
plt.ylim((-1, 5))
plt.legend(loc='best')

plt.subplot(222)
plt.plot(x_np, y_sigmoid, c='red', label='sigmoid')
plt.ylim((-0.2, 1.2))
plt.legend(loc='best')

plt.subplot(223)
plt.plot(x_np, y_tanh, c='red', label='tanh')
plt.ylim((-1.2, 1.2))
plt.legend(loc='best')

plt.subplot(224)
plt.plot(x_np, y_softplus, c='red', label='softplus')
plt.ylim((-0.2, 6))
plt.legend(loc='best')

plt.show()

output_15_0.png

建造神经网络

关系拟合（回归）

# 建立数据集
import torch
import matplotlib.pyplot as plt

x = torch.unsqueeze(torch.linspace(-1, 1, 100), dim=1)  # x data (tensor), shape=(100, 1)
y = x.pow(2) + 0.2*torch.rand(x.size())                 # noisy y data (tensor), shape=(100, 1)

# 画图
plt.scatter(x.data.numpy(), y.data.numpy())
plt.show()

output_18_0.png

# 建立神经网络
import torch
import torch.nn.functional as F     # 激励函数都在这

class Net(torch.nn.Module):  # 继承 torch 的 Module
    def __init__(self, n_feature, n_hidden, n_output):
        super(Net, self).__init__()     # 继承 __init__ 功能
        # 定义每层用什么样的形式
        self.hidden = torch.nn.Linear(n_feature, n_hidden)   # 隐藏层线性输出
        self.predict = torch.nn.Linear(n_hidden, n_output)   # 输出层线性输出

    def forward(self, x):   # 这同时也是 Module 中的 forward 功能
        # 正向传播输入值, 神经网络分析出输出值
        x = torch.relu(self.hidden(x))      # 激励函数(隐藏层的线性值)
        x = self.predict(x)                 # 输出值
        return x

net = Net(n_feature=1, n_hidden=10, n_output=1)

print(net)  # net 的结构
print("""
Net (
  (hidden): Linear (1 -> 10)
  (predict): Linear (10 -> 1)
)
""")

Net(
  (hidden): Linear(in_features=1, out_features=10, bias=True)
  (predict): Linear(in_features=10, out_features=1, bias=True)
)

Net (
  (hidden): Linear (1 -> 10)
  (predict): Linear (10 -> 1)
)

# 训练网络
# optimizer 是训练的工具
optimizer = torch.optim.SGD(net.parameters(), lr=0.2)  # 传入 net 的所有参数, 学习率
loss_func = torch.nn.MSELoss()      # 预测值和真实值的误差计算公式 (均方差)

# 可视化训练过程
import matplotlib.pyplot as plt
%matplotlib inline
from IPython import display

for t in range(200):
    prediction = net(x)                  # 喂给 net 训练数据 x, 输出预测值
    loss = loss_func(prediction,y)       # 计算两者的误差
    optimizer.zero_grad()                # 清空上一步的残余更新参数值
    loss.backward(retain_graph=True)     # 误差反向传播, 计算参数更新值
    optimizer.step()                     # 将参数更新值施加到 net 的 parameters 上

    # 接着上面来
    if t % 5 == 0:
        # plot and show learning process
        plt.clf()
        plt.title('loss - step=%d' %t)
        plt.xlabel('x')
        plt.ylabel('y')
        plt.scatter(x.data.numpy(), y.data.numpy())
        plt.plot(x.data.numpy(), prediction.data.numpy(), 'r-', lw=5)
        plt.text(0.5, 0, 'Loss=%.4f' % loss.data.numpy(), fontdict={'size': 20, 'color':  'red'})
        plt.pause(0.5)
        display.clear_output(wait=True) # 刷新图片

output_21_0.png

区分类型 (分类)

# 建立两个二次分布数据集
import torch
import matplotlib.pyplot as plt

# 假数据
n_data = torch.ones(100, 2)         # 数据的基本形态
x0 = torch.normal(2*n_data, 1)      # 类型0 x data (tensor), shape=(100, 2)
y0 = torch.zeros(100)               # 类型0 y data (tensor), shape=(100, )
x1 = torch.normal(-2*n_data, 1)     # 类型1 x data (tensor), shape=(100, 1)
y1 = torch.ones(100)                # 类型1 y data (tensor), shape=(100, )

# 注意 x, y 数据的数据形式是一定要像下面一样 (torch.cat 是在合并数据)
x = torch.cat((x0, x1), 0).type(torch.FloatTensor)  # FloatTensor = 32-bit floating
y = torch.cat((y0, y1), ).type(torch.LongTensor)    # LongTensor = 64-bit integer

plt.scatter(x.data.numpy()[:, 0], x.data.numpy()[:, 1], c=y.data.numpy(), s=100, lw=0, cmap='RdYlGn')
plt.show()

output_24_0.png

# 建立神经网络，和上一个例子中网络的Class一样，只是输入输出数据维度不同
import torch

class Net(torch.nn.Module):     # 继承 torch 的 Module
    def __init__(self, n_feature, n_hidden, n_output):
        super(Net, self).__init__()     # 继承 __init__ 功能
        self.hidden = torch.nn.Linear(n_feature, n_hidden)   # 隐藏层线性输出
        self.out = torch.nn.Linear(n_hidden, n_output)       # 输出层线性输出

    def forward(self, x):
        # 正向传播输入值, 神经网络分析出输出值
        x = torch.relu(self.hidden(x))      # 激励函数(隐藏层的线性值)
        x = self.out(x)                 # 输出值, 但是这个不是预测值, 预测值还需要再另外计算
        return x

net = Net(n_feature=2, n_hidden=10, n_output=2) # 几个类别就几个 output

print(net)  # net 的结构
print("""
Net (
  (hidden): Linear (2 -> 10)
  (out): Linear (10 -> 2)
)
""")

Net(
  (hidden): Linear(in_features=2, out_features=10, bias=True)
  (out): Linear(in_features=10, out_features=2, bias=True)
)

Net (
  (hidden): Linear (2 -> 10)
  (out): Linear (10 -> 2)
)

# 训练网络
# optimizer 是训练的工具
optimizer = torch.optim.SGD(net.parameters(), lr=0.02)  # 传入 net 的所有参数, 学习率
# 算误差的时候, 注意真实值!不是! one-hot 形式的, 而是1D Tensor, (batch,)
# 但是预测值是2D tensor (batch, n_classes)

loss_func = torch.nn.CrossEntropyLoss()  # 交叉熵损失

# 可视化训练过程
import matplotlib.pyplot as plt
%matplotlib inline
from IPython import display
import torch.nn.functional as F     # 激励函数都在这

for t in range(50):

    out = net(x)             # 喂给 net 训练数据 x, 输出分析值
    loss = loss_func(out, y) # 计算两者的误差
    # must be (1\. nn output, 2\. target), the target label is NOT one-hotted
    optimizer.zero_grad()    # 清空上一步的残余更新参数值
    loss.backward(retain_graph=True)  # 误差反向传播, 计算参数更新值
    optimizer.step()         # 将参数更新值施加到 net 的 parameters 上

    if t % 5 == 0:
        # plot and show learning process
        # plt.cla()
        plt.title('loss - step=%d' %t)
        # 过了一道 softmax 的激励函数后的最大概率才是预测值
        prediction = torch.max(F.softmax(out), 1)[1]
        pred_y = prediction.data.numpy().squeeze()
        target_y = y.data.numpy()
        plt.scatter(x.data.numpy()[:, 0], x.data.numpy()[:, 1], c=pred_y, s=100, lw=0, cmap='RdYlGn')
        accuracy = sum(pred_y == target_y)/200\.  # 预测中有多少和真实值一样
        plt.text(1.5, -4, 'Accuracy=%.2f' % accuracy, fontdict={'size': 20, 'color':  'red'})     
        plt.pause(0.5)
        display.clear_output(wait=True) # 刷新图片

output_27_0.png

快速搭建网络

# net1用之前的搭建步骤
class Net(torch.nn.Module):
    def __init__(self, n_feature, n_hidden, n_output):
        super(Net, self).__init__()
        self.hidden = torch.nn.Linear(n_feature, n_hidden)
        self.predict = torch.nn.Linear(n_hidden, n_output)

    def forward(self, x):
        x = F.relu(self.hidden(x))
        x = self.predict(x)
        return x

net1 = Net(1, 10, 1)   # 这是我们用这种方式搭建的 net1

# net2快速搭建
net2 = torch.nn.Sequential(
    torch.nn.Linear(1, 10),
    torch.nn.ReLU(),
    torch.nn.Linear(10, 1)
)

print(net1) # net1可以在forward过程中个性化定制，如RNN
print(net2) # net2网络显示中多了激励函数部分，搭建更加简便

Net(
  (hidden): Linear(in_features=1, out_features=10, bias=True)
  (predict): Linear(in_features=10, out_features=1, bias=True)
)
Sequential(
  (0): Linear(in_features=1, out_features=10, bias=True)
  (1): ReLU()
  (2): Linear(in_features=10, out_features=1, bias=True)
)

模型的保存和提取

import torch
import matplotlib.pyplot as plt
%matplotlib inline

# 生成数据和模型
torch.manual_seed(1)    # reproducible

# 假数据
x = torch.unsqueeze(torch.linspace(-1, 1, 100), dim=1)  # x data (tensor), shape=(100, 1)
y = x.pow(2) + 0.2*torch.rand(x.size())  # noisy y data (tensor), shape=(100, 1)

def CreateAndTrainNet1():
    # 建网络
    net1 = torch.nn.Sequential(
        torch.nn.Linear(1, 10),
        torch.nn.ReLU(),
        torch.nn.Linear(10, 1)
    )
    optimizer = torch.optim.SGD(net1.parameters(), lr=0.2) # 训练器
    loss_func = torch.nn.MSELoss() # 损失函数

    # 训练
    for t in range(100):
        prediction = net1(x)              # 调用模型预测输出 
        loss = loss_func(prediction, y)   # 计算损失
        optimizer.zero_grad()             # 清空梯度
        loss.backward()                   # 反向传播，计算梯度
        optimizer.step()                  # 将参数更新到网络

    # 2种方式保存模型
    torch.save(net1, 'net.pkl')                       # 保存整个网络
    torch.save(net1.state_dict(), 'net_params.pkl')   # 只保存网络中的参数

    # 绘图
    # 调用函数结束后会释放函数内变量，所以要先绘制好图
    plt.figure(1, figsize=(10, 3))
    plt.subplot(131)
    plt.title('Net1')
    plt.scatter(x.data.numpy(), y.data.numpy())
    plt.plot(x.data.numpy(), prediction.data.numpy(), 'r-', lw=5)

# 提取整个网络
def restore_net():
    # restore entire net1 to net2
    net2 = torch.load('net.pkl')
    prediction = net2(x)

    plt.subplot(132)
    plt.title('Net2')
    plt.scatter(x.data.numpy(), y.data.numpy())
    plt.plot(x.data.numpy(), prediction.data.numpy(), 'r-', lw=5)

# 只提取网络参数
def restore_params():
    # 新建 net3
    net3 = torch.nn.Sequential(
        torch.nn.Linear(1, 10),
        torch.nn.ReLU(),
        torch.nn.Linear(10, 1)
    )

    # 将保存的参数复制到 net3
    net3.load_state_dict(torch.load('net_params.pkl'))
    prediction = net3(x)

    plt.subplot(133)
    plt.title('Net3')
    plt.scatter(x.data.numpy(), y.data.numpy())
    plt.plot(x.data.numpy(), prediction.data.numpy(), 'r-', lw=5)

# 显示结果，net1,2,3完全一致

# plot net1
# 保存 net1 (1\. 整个网络, 2\. 只有参数)
CreateAndTrainNet1()

# plot net2
# 提取整个网络
restore_net()

# plot net3
# 提取网络参数, 复制到新网络
restore_params()

plt.show()

output_36_0.png

利用DataLoader进行批训练

import torch
import torch.utils.data as Data
torch.manual_seed(1)    # reproducible

BATCH_SIZE = 5      # 批训练的数据个数
# BATCH_SIZE = 8    
# step=0 导出8个数据, step=1 时数据库中的数据不够 8个，只导出剩下的2个数据

x = torch.linspace(1, 10, 10)       # x data (torch tensor)
y = torch.linspace(10, 1, 10)       # y data (torch tensor)

# 先转换成 torch 能识别的 Dataset
torch_dataset = Data.TensorDataset(x, y)

# 把 dataset 放入 DataLoader
loader = Data.DataLoader(
    dataset=torch_dataset,      # torch TensorDataset format
    batch_size=BATCH_SIZE,      # mini batch size
    shuffle=True,               # 要不要打乱数据 (打乱比较好)
    num_workers=2,              # 多线程来读数据
)

for epoch in range(3):   # 训练所有!整套!数据 3 次
    for step, (batch_x, batch_y) in enumerate(loader):  # 每一步 loader 释放一小批数据用来学习
        # 假设这里就是你训练的地方...

        # 打出来一些数据
        print('Epoch: ', epoch, '| Step: ', step, '| batch x: ',
              batch_x.numpy(), '| batch y: ', batch_y.numpy())

Epoch:  0 | Step:  0 | batch x:  [5\. 3\. 1\. 7\. 9.] | batch y:  [ 6\.  8\. 10\.  4\.  2.]
Epoch:  0 | Step:  1 | batch x:  [ 8\. 10\.  2\.  6\.  4.] | batch y:  [3\. 1\. 9\. 5\. 7.]
Epoch:  1 | Step:  0 | batch x:  [5\. 9\. 2\. 6\. 1.] | batch y:  [ 6\.  2\.  9\.  5\. 10.]
Epoch:  1 | Step:  1 | batch x:  [ 3\.  4\.  7\. 10\.  8.] | batch y:  [8\. 7\. 4\. 1\. 3.]
Epoch:  2 | Step:  0 | batch x:  [6\. 9\. 4\. 8\. 7.] | batch y:  [5\. 2\. 7\. 3\. 4.]
Epoch:  2 | Step:  1 | batch x:  [10\.  3\.  2\.  1\.  5.] | batch y:  [ 1\.  8\.  9\. 10\.  6.]

加速神经网络训练 Speed Up Training

包括以下几种模式:

Stochastic Gradient Descent (SGD) 分批训练
Momentum 动量法加快梯度收敛
AdaGrad 每个参数有不同的学习率，对错误方向具有阻力
RMSProp 结合了上述两种方法
Adam 结合了上述三种方法

# 生成数据

import torch
import torch.utils.data as Data
import torch.nn.functional as F
import matplotlib.pyplot as plt

torch.manual_seed(1)    # reproducible

LR = 0.01
BATCH_SIZE = 32
EPOCH = 12

# fake dataset
x = torch.unsqueeze(torch.linspace(-1, 1, 1000), dim=1)
y = x.pow(2) + 0.1*torch.normal(torch.zeros(*x.size()))

# plot dataset
plt.scatter(x.numpy(), y.numpy())
plt.show()

# 使用上节内容提到的 data loader
torch_dataset = Data.TensorDataset(x, y)
loader = Data.DataLoader(dataset=torch_dataset, batch_size=BATCH_SIZE, shuffle=True, num_workers=2,)

output_40_0.png

# 为每个优化器创建一个神经网络
# 默认的 network 形式
class Net(torch.nn.Module):
    def __init__(self):
        super(Net, self).__init__()
        self.hidden = torch.nn.Linear(1, 20)   # hidden layer
        self.predict = torch.nn.Linear(20, 1)   # output layer

    def forward(self, x):
        x = F.relu(self.hidden(x))      # activation function for hidden layer
        x = self.predict(x)             # linear output
        return x

# 为每个优化器创建一个 net
net_SGD         = Net()
net_Momentum    = Net()
net_RMSprop     = Net()
net_Adam        = Net()
nets = [net_SGD, net_Momentum, net_RMSprop, net_Adam]

# 创建不同的优化器来训练不同的网络，并计算误差
# different optimizers
opt_SGD         = torch.optim.SGD(net_SGD.parameters(), lr=LR)
opt_Momentum    = torch.optim.SGD(net_Momentum.parameters(), lr=LR, momentum=0.8)
opt_RMSprop     = torch.optim.RMSprop(net_RMSprop.parameters(), lr=LR, alpha=0.9)
opt_Adam        = torch.optim.Adam(net_Adam.parameters(), lr=LR, betas=(0.9, 0.99))
optimizers      = [opt_SGD, opt_Momentum, opt_RMSprop, opt_Adam]

loss_func       = torch.nn.MSELoss()
losses_his      = [[], [], [], []]   # 记录 training 时不同神经网络的 loss

# 训练
for epoch in range(EPOCH):
    print('Epoch: ', epoch)
    for step, (b_x, b_y) in enumerate(loader):

        # 对每个优化器, 优化属于他的神经网络
        for net, opt, l_his in zip(nets, optimizers, losses_his):
            output = net(b_x)                   # get output for every net
            loss = loss_func(output, b_y)       # compute loss for every net
            opt.zero_grad()                     # clear gradients for next train
            loss.backward()                     # backpropagation, compute gradients
            opt.step()                          # apply gradients
            l_his.append(loss.data.numpy())     # loss recoder

Epoch:  0
Epoch:  1
Epoch:  2
Epoch:  3
Epoch:  4
Epoch:  5
Epoch:  6
Epoch:  7
Epoch:  8
Epoch:  9
Epoch:  10
Epoch:  11

# 绘图
labels = ['SGD', 'Momentum', 'RMSprop', 'Adam']
for i, l_his in enumerate(losses_his):
    plt.plot(l_his, label=labels[I])
plt.legend(loc='best')
plt.xlabel('Steps')
plt.ylabel('Loss')
plt.ylim((0, 0.2))
plt.show()

output_44_0.png

2023-01-11 PyTorch神经网络基础-上

2023-01-11 PyTorch神经网络基础-上

PyTorch神经网络基础-上

Torch中的数学运算

变量 Variable

激励函数 Activation Function

建造神经网络

关系拟合（回归）

区分类型 (分类)

快速搭建网络

模型的保存和提取

利用DataLoader进行批训练

加速神经网络训练 Speed Up Training