https://mofanpy.com/tutorials/machine-learning/torch/torch-numpy
PyTorch神经网络基础-上
更改了原文中一些报错的代码,并适配在jupyter上运行
边用边参考:
print("""
输出维度方式:
numpy时,用data.shape
tensor时,用data.size()
""")
输出维度方式:
numpy时,用data.shape
tensor时,用data.size()
import torch
import numpy as np
np_data = np.arange(6).reshape((2, 3))
torch_data = torch.from_numpy(np_data)
tensor2array = torch_data.numpy()
print(
'\nnumpy array:', np_data, # [[0 1 2], [3 4 5]]
'\ntorch tensor:', torch_data, # 0 1 2 \n 3 4 5 [torch.LongTensor of size 2x3]
'\ntensor to array:', tensor2array, # [[0 1 2], [3 4 5]]
)
numpy array: [[0 1 2]
[3 4 5]]
torch tensor: tensor([[0, 1, 2],
[3, 4, 5]], dtype=torch.int32)
tensor to array: [[0 1 2]
[3 4 5]]
Torch中的数学运算
# abs 绝对值计算
data = [-1, -2, 1, 2]
tensor = torch.FloatTensor(data) # 转换成32位浮点 tensor
print(
'\nabs',
'\nnumpy: ', np.abs(data), # [1 2 1 2]
'\ntorch: ', torch.abs(tensor) # [1 2 1 2]
)
# sin 三角函数 sin
print(
'\nsin',
'\nnumpy: ', np.sin(data), # [-0.84147098 -0.90929743 0.84147098 0.90929743]
'\ntorch: ', torch.sin(tensor) # [-0.8415 -0.9093 0.8415 0.9093]
)
# mean 均值
print(
'\nmean',
'\nnumpy: ', np.mean(data), # 0.0
'\ntorch: ', torch.mean(tensor) # 0.0
)
abs
numpy: [1 2 1 2]
torch: tensor([1., 2., 1., 2.])
sin
numpy: [-0.84147098 -0.90929743 0.84147098 0.90929743]
torch: tensor([-0.8415, -0.9093, 0.8415, 0.9093])
mean
numpy: 0.0
torch: tensor(0.)
# matrix multiplication 矩阵点乘
data = [[1,2], [3,4]]
tensor = torch.FloatTensor(data) # 转换成32位浮点 tensor
# correct method
print(
'\nmatrix multiplication (matmul)',
'\nnumpy: ', np.matmul(data, data), # [[7, 10], [15, 22]]
'\ntorch: ', torch.mm(tensor, tensor) # [[7, 10], [15, 22]]
)
matrix multiplication (matmul)
numpy: [[ 7 10]
[15 22]]
torch: tensor([[ 7., 10.],
[15., 22.]])
tensor1 = torch.Tensor([1, 2, 3, 4])
tensor1.dot(tensor1) # torch 会转换成 [1,2,3,4].dot([1,2,3,4) = 30.0
tensor(30.)
变量 Variable
import torch
from torch.autograd import Variable # torch 中 Variable 模块
# 先生鸡蛋
tensor = torch.FloatTensor([[1,2],[3,4]])
# 把鸡蛋放到篮子里, requires_grad是参不参与误差反向传播, 要不要计算梯度
variable = Variable(tensor, requires_grad=True)
print(tensor)
print(
"""
1 2
3 4
[torch.FloatTensor of size 2x2]
""")
print(variable)
print(
"""
Variable containing:
1 2
3 4
[torch.FloatTensor of size 2x2]
""")
tensor([[1., 2.],
[3., 4.]])
1 2
3 4
[torch.FloatTensor of size 2x2]
tensor([[1., 2.],
[3., 4.]], requires_grad=True)
Variable containing:
1 2
3 4
[torch.FloatTensor of size 2x2]
t_out = torch.mean(tensor*tensor) # x^2
v_out = torch.mean(variable*variable) # x^2
print(t_out)
print(v_out) # 7.5
tensor(7.5000)
tensor(7.5000, grad_fn=<MeanBackward0>)
v_out.backward() # 模拟 v_out 的误差反向传递
# 下面两步看不懂没关系, 只要知道 Variable 是计算图的一部分, 可以用来传递误差就好.
# v_out = 1/4 * sum(variable*variable) 这是计算图中的 v_out 计算步骤
# 针对于 v_out 的梯度就是, d(v_out)/d(variable) = 1/4*2*variable = variable/2
print(variable.grad) # 初始 Variable 的梯度
print('''
0.5000 1.0000
1.5000 2.0000
''')
tensor([[0.5000, 1.0000],
[1.5000, 2.0000]])
0.5000 1.0000
1.5000 2.0000
print(variable) # Variable 形式
print("""
Variable containing:
1 2
3 4
[torch.FloatTensor of size 2x2]
""")
print(variable.data) # tensor 形式
print("""
1 2
3 4
[torch.FloatTensor of size 2x2]
""")
print(variable.data.numpy()) # numpy 形式
print("""
[[ 1\. 2.]
[ 3\. 4.]]
""")
tensor([[1., 2.],
[3., 4.]], requires_grad=True)
Variable containing:
1 2
3 4
[torch.FloatTensor of size 2x2]
tensor([[1., 2.],
[3., 4.]])
1 2
3 4
[torch.FloatTensor of size 2x2]
[[1\. 2.]
[3\. 4.]]
[[ 1\. 2.]
[ 3\. 4.]]
激励函数 Activation Function
import torch
import torch.nn.functional as F # 激励函数都在这
from torch.autograd import Variable
# 做一些假数据来观看图像
x = torch.linspace(-5, 5, 200) # x data (tensor), shape=(100, 1)
x = Variable(x)
# 生成不同的激励函数
x_np = x.data.numpy() # 换成 numpy array, 出图时用
# 几种常用的 激励函数
y_relu = torch.relu(x).data.numpy()
y_sigmoid = torch.sigmoid(x).data.numpy()
y_tanh = torch.tanh(x).data.numpy()
y_softplus = F.softplus(x).data.numpy()
# y_softmax = F.softmax(x) softmax 比较特殊, 不能直接显示, 不过他是关于概率的, 用于分类
# 绘制激励函数图像
import matplotlib.pyplot as plt # python 的可视化模块, 我有教程 (https://mofanpy.com/tutorials/data-manipulation/plt/)
plt.figure(1, figsize=(8, 6))
plt.subplot(221)
plt.plot(x_np, y_relu, c='red', label='relu')
plt.ylim((-1, 5))
plt.legend(loc='best')
plt.subplot(222)
plt.plot(x_np, y_sigmoid, c='red', label='sigmoid')
plt.ylim((-0.2, 1.2))
plt.legend(loc='best')
plt.subplot(223)
plt.plot(x_np, y_tanh, c='red', label='tanh')
plt.ylim((-1.2, 1.2))
plt.legend(loc='best')
plt.subplot(224)
plt.plot(x_np, y_softplus, c='red', label='softplus')
plt.ylim((-0.2, 6))
plt.legend(loc='best')
plt.show()
建造神经网络
关系拟合(回归)
# 建立数据集
import torch
import matplotlib.pyplot as plt
x = torch.unsqueeze(torch.linspace(-1, 1, 100), dim=1) # x data (tensor), shape=(100, 1)
y = x.pow(2) + 0.2*torch.rand(x.size()) # noisy y data (tensor), shape=(100, 1)
# 画图
plt.scatter(x.data.numpy(), y.data.numpy())
plt.show()
# 建立神经网络
import torch
import torch.nn.functional as F # 激励函数都在这
class Net(torch.nn.Module): # 继承 torch 的 Module
def __init__(self, n_feature, n_hidden, n_output):
super(Net, self).__init__() # 继承 __init__ 功能
# 定义每层用什么样的形式
self.hidden = torch.nn.Linear(n_feature, n_hidden) # 隐藏层线性输出
self.predict = torch.nn.Linear(n_hidden, n_output) # 输出层线性输出
def forward(self, x): # 这同时也是 Module 中的 forward 功能
# 正向传播输入值, 神经网络分析出输出值
x = torch.relu(self.hidden(x)) # 激励函数(隐藏层的线性值)
x = self.predict(x) # 输出值
return x
net = Net(n_feature=1, n_hidden=10, n_output=1)
print(net) # net 的结构
print("""
Net (
(hidden): Linear (1 -> 10)
(predict): Linear (10 -> 1)
)
""")
Net(
(hidden): Linear(in_features=1, out_features=10, bias=True)
(predict): Linear(in_features=10, out_features=1, bias=True)
)
Net (
(hidden): Linear (1 -> 10)
(predict): Linear (10 -> 1)
)
# 训练网络
# optimizer 是训练的工具
optimizer = torch.optim.SGD(net.parameters(), lr=0.2) # 传入 net 的所有参数, 学习率
loss_func = torch.nn.MSELoss() # 预测值和真实值的误差计算公式 (均方差)
# 可视化训练过程
import matplotlib.pyplot as plt
%matplotlib inline
from IPython import display
for t in range(200):
prediction = net(x) # 喂给 net 训练数据 x, 输出预测值
loss = loss_func(prediction,y) # 计算两者的误差
optimizer.zero_grad() # 清空上一步的残余更新参数值
loss.backward(retain_graph=True) # 误差反向传播, 计算参数更新值
optimizer.step() # 将参数更新值施加到 net 的 parameters 上
# 接着上面来
if t % 5 == 0:
# plot and show learning process
plt.clf()
plt.title('loss - step=%d' %t)
plt.xlabel('x')
plt.ylabel('y')
plt.scatter(x.data.numpy(), y.data.numpy())
plt.plot(x.data.numpy(), prediction.data.numpy(), 'r-', lw=5)
plt.text(0.5, 0, 'Loss=%.4f' % loss.data.numpy(), fontdict={'size': 20, 'color': 'red'})
plt.pause(0.5)
display.clear_output(wait=True) # 刷新图片
区分类型 (分类)
# 建立两个二次分布数据集
import torch
import matplotlib.pyplot as plt
# 假数据
n_data = torch.ones(100, 2) # 数据的基本形态
x0 = torch.normal(2*n_data, 1) # 类型0 x data (tensor), shape=(100, 2)
y0 = torch.zeros(100) # 类型0 y data (tensor), shape=(100, )
x1 = torch.normal(-2*n_data, 1) # 类型1 x data (tensor), shape=(100, 1)
y1 = torch.ones(100) # 类型1 y data (tensor), shape=(100, )
# 注意 x, y 数据的数据形式是一定要像下面一样 (torch.cat 是在合并数据)
x = torch.cat((x0, x1), 0).type(torch.FloatTensor) # FloatTensor = 32-bit floating
y = torch.cat((y0, y1), ).type(torch.LongTensor) # LongTensor = 64-bit integer
plt.scatter(x.data.numpy()[:, 0], x.data.numpy()[:, 1], c=y.data.numpy(), s=100, lw=0, cmap='RdYlGn')
plt.show()
# 建立神经网络,和上一个例子中网络的Class一样,只是输入输出数据维度不同
import torch
class Net(torch.nn.Module): # 继承 torch 的 Module
def __init__(self, n_feature, n_hidden, n_output):
super(Net, self).__init__() # 继承 __init__ 功能
self.hidden = torch.nn.Linear(n_feature, n_hidden) # 隐藏层线性输出
self.out = torch.nn.Linear(n_hidden, n_output) # 输出层线性输出
def forward(self, x):
# 正向传播输入值, 神经网络分析出输出值
x = torch.relu(self.hidden(x)) # 激励函数(隐藏层的线性值)
x = self.out(x) # 输出值, 但是这个不是预测值, 预测值还需要再另外计算
return x
net = Net(n_feature=2, n_hidden=10, n_output=2) # 几个类别就几个 output
print(net) # net 的结构
print("""
Net (
(hidden): Linear (2 -> 10)
(out): Linear (10 -> 2)
)
""")
Net(
(hidden): Linear(in_features=2, out_features=10, bias=True)
(out): Linear(in_features=10, out_features=2, bias=True)
)
Net (
(hidden): Linear (2 -> 10)
(out): Linear (10 -> 2)
)
# 训练网络
# optimizer 是训练的工具
optimizer = torch.optim.SGD(net.parameters(), lr=0.02) # 传入 net 的所有参数, 学习率
# 算误差的时候, 注意真实值!不是! one-hot 形式的, 而是1D Tensor, (batch,)
# 但是预测值是2D tensor (batch, n_classes)
loss_func = torch.nn.CrossEntropyLoss() # 交叉熵损失
# 可视化训练过程
import matplotlib.pyplot as plt
%matplotlib inline
from IPython import display
import torch.nn.functional as F # 激励函数都在这
for t in range(50):
out = net(x) # 喂给 net 训练数据 x, 输出分析值
loss = loss_func(out, y) # 计算两者的误差
# must be (1\. nn output, 2\. target), the target label is NOT one-hotted
optimizer.zero_grad() # 清空上一步的残余更新参数值
loss.backward(retain_graph=True) # 误差反向传播, 计算参数更新值
optimizer.step() # 将参数更新值施加到 net 的 parameters 上
if t % 5 == 0:
# plot and show learning process
# plt.cla()
plt.title('loss - step=%d' %t)
# 过了一道 softmax 的激励函数后的最大概率才是预测值
prediction = torch.max(F.softmax(out), 1)[1]
pred_y = prediction.data.numpy().squeeze()
target_y = y.data.numpy()
plt.scatter(x.data.numpy()[:, 0], x.data.numpy()[:, 1], c=pred_y, s=100, lw=0, cmap='RdYlGn')
accuracy = sum(pred_y == target_y)/200\. # 预测中有多少和真实值一样
plt.text(1.5, -4, 'Accuracy=%.2f' % accuracy, fontdict={'size': 20, 'color': 'red'})
plt.pause(0.5)
display.clear_output(wait=True) # 刷新图片
快速搭建网络
# net1用之前的搭建步骤
class Net(torch.nn.Module):
def __init__(self, n_feature, n_hidden, n_output):
super(Net, self).__init__()
self.hidden = torch.nn.Linear(n_feature, n_hidden)
self.predict = torch.nn.Linear(n_hidden, n_output)
def forward(self, x):
x = F.relu(self.hidden(x))
x = self.predict(x)
return x
net1 = Net(1, 10, 1) # 这是我们用这种方式搭建的 net1
# net2快速搭建
net2 = torch.nn.Sequential(
torch.nn.Linear(1, 10),
torch.nn.ReLU(),
torch.nn.Linear(10, 1)
)
print(net1) # net1可以在forward过程中个性化定制,如RNN
print(net2) # net2网络显示中多了激励函数部分,搭建更加简便
Net(
(hidden): Linear(in_features=1, out_features=10, bias=True)
(predict): Linear(in_features=10, out_features=1, bias=True)
)
Sequential(
(0): Linear(in_features=1, out_features=10, bias=True)
(1): ReLU()
(2): Linear(in_features=10, out_features=1, bias=True)
)
模型的保存和提取
import torch
import matplotlib.pyplot as plt
%matplotlib inline
# 生成数据和模型
torch.manual_seed(1) # reproducible
# 假数据
x = torch.unsqueeze(torch.linspace(-1, 1, 100), dim=1) # x data (tensor), shape=(100, 1)
y = x.pow(2) + 0.2*torch.rand(x.size()) # noisy y data (tensor), shape=(100, 1)
def CreateAndTrainNet1():
# 建网络
net1 = torch.nn.Sequential(
torch.nn.Linear(1, 10),
torch.nn.ReLU(),
torch.nn.Linear(10, 1)
)
optimizer = torch.optim.SGD(net1.parameters(), lr=0.2) # 训练器
loss_func = torch.nn.MSELoss() # 损失函数
# 训练
for t in range(100):
prediction = net1(x) # 调用模型预测输出
loss = loss_func(prediction, y) # 计算损失
optimizer.zero_grad() # 清空梯度
loss.backward() # 反向传播,计算梯度
optimizer.step() # 将参数更新到网络
# 2种方式保存模型
torch.save(net1, 'net.pkl') # 保存整个网络
torch.save(net1.state_dict(), 'net_params.pkl') # 只保存网络中的参数
# 绘图
# 调用函数结束后会释放函数内变量,所以要先绘制好图
plt.figure(1, figsize=(10, 3))
plt.subplot(131)
plt.title('Net1')
plt.scatter(x.data.numpy(), y.data.numpy())
plt.plot(x.data.numpy(), prediction.data.numpy(), 'r-', lw=5)
# 提取整个网络
def restore_net():
# restore entire net1 to net2
net2 = torch.load('net.pkl')
prediction = net2(x)
plt.subplot(132)
plt.title('Net2')
plt.scatter(x.data.numpy(), y.data.numpy())
plt.plot(x.data.numpy(), prediction.data.numpy(), 'r-', lw=5)
# 只提取网络参数
def restore_params():
# 新建 net3
net3 = torch.nn.Sequential(
torch.nn.Linear(1, 10),
torch.nn.ReLU(),
torch.nn.Linear(10, 1)
)
# 将保存的参数复制到 net3
net3.load_state_dict(torch.load('net_params.pkl'))
prediction = net3(x)
plt.subplot(133)
plt.title('Net3')
plt.scatter(x.data.numpy(), y.data.numpy())
plt.plot(x.data.numpy(), prediction.data.numpy(), 'r-', lw=5)
# 显示结果,net1,2,3完全一致
# plot net1
# 保存 net1 (1\. 整个网络, 2\. 只有参数)
CreateAndTrainNet1()
# plot net2
# 提取整个网络
restore_net()
# plot net3
# 提取网络参数, 复制到新网络
restore_params()
plt.show()
利用DataLoader进行批训练
import torch
import torch.utils.data as Data
torch.manual_seed(1) # reproducible
BATCH_SIZE = 5 # 批训练的数据个数
# BATCH_SIZE = 8
# step=0 导出8个数据, step=1 时数据库中的数据不够 8个,只导出剩下的2个数据
x = torch.linspace(1, 10, 10) # x data (torch tensor)
y = torch.linspace(10, 1, 10) # y data (torch tensor)
# 先转换成 torch 能识别的 Dataset
torch_dataset = Data.TensorDataset(x, y)
# 把 dataset 放入 DataLoader
loader = Data.DataLoader(
dataset=torch_dataset, # torch TensorDataset format
batch_size=BATCH_SIZE, # mini batch size
shuffle=True, # 要不要打乱数据 (打乱比较好)
num_workers=2, # 多线程来读数据
)
for epoch in range(3): # 训练所有!整套!数据 3 次
for step, (batch_x, batch_y) in enumerate(loader): # 每一步 loader 释放一小批数据用来学习
# 假设这里就是你训练的地方...
# 打出来一些数据
print('Epoch: ', epoch, '| Step: ', step, '| batch x: ',
batch_x.numpy(), '| batch y: ', batch_y.numpy())
Epoch: 0 | Step: 0 | batch x: [5\. 3\. 1\. 7\. 9.] | batch y: [ 6\. 8\. 10\. 4\. 2.]
Epoch: 0 | Step: 1 | batch x: [ 8\. 10\. 2\. 6\. 4.] | batch y: [3\. 1\. 9\. 5\. 7.]
Epoch: 1 | Step: 0 | batch x: [5\. 9\. 2\. 6\. 1.] | batch y: [ 6\. 2\. 9\. 5\. 10.]
Epoch: 1 | Step: 1 | batch x: [ 3\. 4\. 7\. 10\. 8.] | batch y: [8\. 7\. 4\. 1\. 3.]
Epoch: 2 | Step: 0 | batch x: [6\. 9\. 4\. 8\. 7.] | batch y: [5\. 2\. 7\. 3\. 4.]
Epoch: 2 | Step: 1 | batch x: [10\. 3\. 2\. 1\. 5.] | batch y: [ 1\. 8\. 9\. 10\. 6.]
加速神经网络训练 Speed Up Training
包括以下几种模式:
- Stochastic Gradient Descent (SGD) 分批训练
- Momentum 动量法加快梯度收敛
- AdaGrad 每个参数有不同的学习率,对错误方向具有阻力
- RMSProp 结合了上述两种方法
- Adam 结合了上述三种方法
# 生成数据
import torch
import torch.utils.data as Data
import torch.nn.functional as F
import matplotlib.pyplot as plt
torch.manual_seed(1) # reproducible
LR = 0.01
BATCH_SIZE = 32
EPOCH = 12
# fake dataset
x = torch.unsqueeze(torch.linspace(-1, 1, 1000), dim=1)
y = x.pow(2) + 0.1*torch.normal(torch.zeros(*x.size()))
# plot dataset
plt.scatter(x.numpy(), y.numpy())
plt.show()
# 使用上节内容提到的 data loader
torch_dataset = Data.TensorDataset(x, y)
loader = Data.DataLoader(dataset=torch_dataset, batch_size=BATCH_SIZE, shuffle=True, num_workers=2,)
# 为每个优化器创建一个神经网络
# 默认的 network 形式
class Net(torch.nn.Module):
def __init__(self):
super(Net, self).__init__()
self.hidden = torch.nn.Linear(1, 20) # hidden layer
self.predict = torch.nn.Linear(20, 1) # output layer
def forward(self, x):
x = F.relu(self.hidden(x)) # activation function for hidden layer
x = self.predict(x) # linear output
return x
# 为每个优化器创建一个 net
net_SGD = Net()
net_Momentum = Net()
net_RMSprop = Net()
net_Adam = Net()
nets = [net_SGD, net_Momentum, net_RMSprop, net_Adam]
# 创建不同的优化器来训练不同的网络,并计算误差
# different optimizers
opt_SGD = torch.optim.SGD(net_SGD.parameters(), lr=LR)
opt_Momentum = torch.optim.SGD(net_Momentum.parameters(), lr=LR, momentum=0.8)
opt_RMSprop = torch.optim.RMSprop(net_RMSprop.parameters(), lr=LR, alpha=0.9)
opt_Adam = torch.optim.Adam(net_Adam.parameters(), lr=LR, betas=(0.9, 0.99))
optimizers = [opt_SGD, opt_Momentum, opt_RMSprop, opt_Adam]
loss_func = torch.nn.MSELoss()
losses_his = [[], [], [], []] # 记录 training 时不同神经网络的 loss
# 训练
for epoch in range(EPOCH):
print('Epoch: ', epoch)
for step, (b_x, b_y) in enumerate(loader):
# 对每个优化器, 优化属于他的神经网络
for net, opt, l_his in zip(nets, optimizers, losses_his):
output = net(b_x) # get output for every net
loss = loss_func(output, b_y) # compute loss for every net
opt.zero_grad() # clear gradients for next train
loss.backward() # backpropagation, compute gradients
opt.step() # apply gradients
l_his.append(loss.data.numpy()) # loss recoder
Epoch: 0
Epoch: 1
Epoch: 2
Epoch: 3
Epoch: 4
Epoch: 5
Epoch: 6
Epoch: 7
Epoch: 8
Epoch: 9
Epoch: 10
Epoch: 11
# 绘图
labels = ['SGD', 'Momentum', 'RMSprop', 'Adam']
for i, l_his in enumerate(losses_his):
plt.plot(l_his, label=labels[I])
plt.legend(loc='best')
plt.xlabel('Steps')
plt.ylabel('Loss')
plt.ylim((0, 0.2))
plt.show()