1. 各层参数的 shape
import torch
# 线性层的权重weight是一个二维矩阵(输出特征数*输入特征数)
# 线性层的权重bias是一个一维向量 size = 输出特征的长度
fc1 = torch.nn.Linear(10, 512)
print("fc1.weight.data.size() = ", fc1.weight.data.size())
# 输出:
fc1.weight.data.size() = torch.Size([512, 10])
fc1.bias.data.size() = torch.Size([512])
# 卷积核通道数 = 卷积输入数据的通道数
# 卷积核的个数 = 卷积层输出的通道数
import torch
# 2d卷积层的权重weight是一个四维的向量(输出通道数*输入通道数*卷积核size_1*卷积核size_2)
# 2d卷积层的权重bias是一个一维的向量 size = 输出通道数
conv1 = torch.nn.Conv2d(in_channels=1, out_channels=8, kernel_size=(3, 16))
print("conv1.weight.data.size() = ", conv1.weight.data.size())
print("conv1.bias.data.size() = ", conv1.bias.data.size())
# 输出
conv1.weight.data.size() = torch.Size([8, 1, 3, 16])
conv1.bias.data.size() = torch.Size([8])
import torch
# BN层的权重weight是一个一维向量 size = num_features
# BN层的权重bias是一个一维向量 size = num_features
bn1 = torch.nn.BatchNorm2d(64)
print("bn1.weight.data.size() = ", bn1.weight.data.size())
print("bn1.bias.data.size() = ", bn1.bias.data.size())
# 输出
bn1.weight.data.size() = torch.Size([64])
bn1.bias.data.size() = torch.Size([64])
2. 打印整个神经网络各层的参数量
import torch
import torch.nn as nn
# 使用的是torchsummary提供的summary方法
from torchsummary import summary
# 定义的神经网络
class Module(nn.Module):
def __init__(self):
super(Module, self).__init__()
self.conv1 = nn.Conv2d(3, 10, kernel_size=3, padding=1, bias=False)
self.bn = nn.BatchNorm2d(10)
self.maxPooling = nn.MaxPool2d(2)
self.fc = nn.Linear(640, 10)
self.relu = nn.ReLU(inplace=True)
def forward(self, x):
x = self.conv1(x)
x = self.maxPooling(x)
x = self.bn(x)
x = self.relu(x)
# Flatten data from (64, 10, 8, 8) to (64,640)
x = x.view(x.size(0), -1)
x = self.fc(x)
return x
if __name__ == '__main__':
model = Module()
summary(model, input_size=(3, 16, 16), batch_size=-1)
Layer (type) Output Shape Param #
Conv2d-1 [-1, 10, 16, 16] 270
MaxPool2d-2 [-1, 10, 8, 8] 0
BatchNorm2d-3 [-1, 10, 8, 8] 20
ReLU-4 [-1, 10, 8, 8] 0
Linear-5 [-1, 10] 6,410
Total params: 6,700
Trainable params: 6,700
Non-trainable params: 0
Input size (MB): 0.00
Forward/backward pass size (MB): 0.03
Params size (MB): 0.03
Estimated Total Size (MB): 0.06
3. 自定义神经网络每层的参数
class Module(nn.Module):
def __init__(self):
super(Module, self).__init__()
self.conv1 = nn.Conv2d(3, 10, kernel_size=3, padding=1, bias=False)
self.bn = nn.BatchNorm2d(10)
self.maxPooling = nn.MaxPool2d(2)
self.fc = nn.Linear(640, 10)
self.relu = nn.ReLU(inplace=True)
# 调用参数初始化方法指定参数值
def forward(self, x):
x = self.conv1(x)
x = self.maxPooling(x)
x = self.bn(x)
x = self.relu(x)
# Flatten data from (64, 10, 8, 8) to (64,640)
x = x.view(x.size(0), -1)
x = self.fc(x)
return x
def _model_param_init(self):
self.conv1.weight.data = torch.arange(270).reshape(10, 3, 3, 3)
# self.conv1.bias.data = torch.arrange(270).reshape(10, 3, 3, 3)
self.bn.weight.data = torch.arange(10)
self.bn.bias.data = torch.arange(10)
self.fc.weight.data = torch.arange(6400).reshape(10, 640)
self.fc.bias.data = torch.arange(10)
if __name__ == '__main__':
model = Module()
for name, parameters in model.named_parameters():
print(name, ':', parameters)
conv1.weight : Parameter containing:
tensor([[[[ 0, 1, 2],
[ 3, 4, 5],
[ 6, 7, 8]],
[[ 9, 10, 11],
[ 12, 13, 14],
[ 15, 16, 17]],
[[ 18, 19, 20],
[ 21, 22, 23],
[ 24, 25, 26]]],
[[[ 27, 28, 29],
[ 30, 31, 32],
[ 33, 34, 35]],
[[ 36, 37, 38],
[ 39, 40, 41],
[ 42, 43, 44]],
[[ 45, 46, 47],
[ 48, 49, 50],
[ 51, 52, 53]]],
[[[ 54, 55, 56],
[ 57, 58, 59],
[ 60, 61, 62]],
[[ 63, 64, 65],
[ 66, 67, 68],
[ 69, 70, 71]],
[[ 72, 73, 74],
[ 75, 76, 77],
[ 78, 79, 80]]],
[[[ 81, 82, 83],
[ 84, 85, 86],
[ 87, 88, 89]],
[[ 90, 91, 92],
[ 93, 94, 95],
[ 96, 97, 98]],
[[ 99, 100, 101],
[102, 103, 104],
[105, 106, 107]]],
[[[108, 109, 110],
[111, 112, 113],
[114, 115, 116]],
[[117, 118, 119],
[120, 121, 122],
[123, 124, 125]],
[[126, 127, 128],
[129, 130, 131],
[132, 133, 134]]],
[[[135, 136, 137],
[138, 139, 140],
[141, 142, 143]],
[[144, 145, 146],
[147, 148, 149],
[150, 151, 152]],
[[153, 154, 155],
[156, 157, 158],
[159, 160, 161]]],
[[[162, 163, 164],
[165, 166, 167],
[168, 169, 170]],
[[171, 172, 173],
[174, 175, 176],
[177, 178, 179]],
[[180, 181, 182],
[183, 184, 185],
[186, 187, 188]]],
[[[189, 190, 191],
[192, 193, 194],
[195, 196, 197]],
[[198, 199, 200],
[201, 202, 203],
[204, 205, 206]],
[[207, 208, 209],
[210, 211, 212],
[213, 214, 215]]],
[[[216, 217, 218],
[219, 220, 221],
[222, 223, 224]],
[[225, 226, 227],
[228, 229, 230],
[231, 232, 233]],
[[234, 235, 236],
[237, 238, 239],
[240, 241, 242]]],
[[[243, 244, 245],
[246, 247, 248],
[249, 250, 251]],
[[252, 253, 254],
[255, 256, 257],
[258, 259, 260]],
[[261, 262, 263],
[264, 265, 266],
[267, 268, 269]]]], requires_grad=True)
bn.weight : Parameter containing:
tensor([0, 1, 2, 3, 4, 5, 6, 7, 8, 9], requires_grad=True)
bn.bias : Parameter containing:
tensor([0, 1, 2, 3, 4, 5, 6, 7, 8, 9], requires_grad=True)
fc.weight : Parameter containing:
tensor([[ 0, 1, 2, ..., 637, 638, 639],
[ 640, 641, 642, ..., 1277, 1278, 1279],
[1280, 1281, 1282, ..., 1917, 1918, 1919],
[4480, 4481, 4482, ..., 5117, 5118, 5119],
[5120, 5121, 5122, ..., 5757, 5758, 5759],
[5760, 5761, 5762, ..., 6397, 6398, 6399]], requires_grad=True)
fc.bias : Parameter containing:
tensor([0, 1, 2, 3, 4, 5, 6, 7, 8, 9], requires_grad=True)