我平时常用的分类网络

姓名：毕晓鹏

学号：19021210824

【嵌牛导读】自己平时积累了一些pytorch的训练trick，分享给大家

【嵌牛鼻子】深度学习，分类网络

【嵌牛提问】了解其他的网络吗

【嵌牛正文】

resnet

平时经常用到resnet网络当作backbone,resnet结构清晰，比较方便单独拿出一层来单独操作。

BasicBlock采用的是两个3x3的卷积核

Bottleneck先用1x1卷积核降维，然后3x3卷积核，1x1卷积核升维，所以残差块输入通道和输出通道是不变的。

resnet层数计算：【3，4，6，3】（3+4+6+3）*3 + 2 = 50

3，4，6，3为残差块数量，其中每个块里面含有三层，然后加上分类层和第一层一共50

1x1卷积核作用：

降维：减小参数量，减小了通道数量，后面进行下一步操作，需要的卷积核数量就少了。

升维：跨通道信息组合，降维的也会有不同通道的信息组合，另外可以feature maps size不变的（即不损失分辨率）的前提下大幅增加非线性特性

resne网络整体结构

# encoding: utf-8"""@author: liaoxingyu@contact: sherlockliao01@gmail.com"""importmathimporttorchfromtorchimportnndefconv3x3(in_planes,out_planes,stride=1):"""3x3 convolution with padding"""returnnn.Conv2d(in_planes,out_planes,kernel_size=3,stride=stride,padding=1,bias=False)classBasicBlock(nn.Module):expansion=1def__init__(self,inplanes,planes,stride=1,downsample=None):super(BasicBlock,self).__init__()self.conv1=conv3x3(inplanes,planes,stride)self.bn1=nn.BatchNorm2d(planes)self.relu=nn.ReLU(inplace=True)self.conv2=conv3x3(planes,planes)self.bn2=nn.BatchNorm2d(planes)self.downsample=downsampleself.stride=stridedefforward(self,x):residual=xout=self.conv1(x)out=self.bn1(out)out=self.relu(out)out=self.conv2(out)out=self.bn2(out)ifself.downsampleisnotNone:residual=self.downsample(x)out+=residualout=self.relu(out)returnout#输入 in_planes #先压缩通道然后恢复到 inplanesclassBottleneck(nn.Module):expansion=4def__init__(self,inplanes,planes,stride=1,downsample=None):super(Bottleneck,self).__init__()self.conv1=nn.Conv2d(inplanes,planes,kernel_size=1,bias=False)self.bn1=nn.BatchNorm2d(planes)self.conv2=nn.Conv2d(planes,planes,kernel_size=3,stride=stride,padding=1,bias=False)self.bn2=nn.BatchNorm2d(planes)self.conv3=nn.Conv2d(planes,planes*4,kernel_size=1,bias=False)self.bn3=nn.BatchNorm2d(planes*4)self.relu=nn.ReLU(inplace=True)self.downsample=downsampleself.stride=stridedefforward(self,x):residual=xout=self.conv1(x)out=self.bn1(out)out=self.relu(out)out=self.conv2(out)out=self.bn2(out)out=self.relu(out)out=self.conv3(out)out=self.bn3(out)ifself.downsampleisnotNone:residual=self.downsample(x)out+=residualout=self.relu(out)returnoutclassResNet(nn.Module):def__init__(self,last_stride=1,block=Bottleneck,layers=[3,4,6,3]):#输入通道self.inplanes=64super().__init__()self.conv1=nn.Conv2d(3,64,kernel_size=7,stride=2,padding=3,bias=False)self.bn1=nn.BatchNorm2d(64)self.relu=nn.ReLU(inplace=True)self.maxpool=nn.MaxPool2d(kernel_size=3,stride=2,padding=1)self.layer1=self._make_layer(block,64,layers[0])self.layer2=self._make_layer(block,128,layers[1],stride=2)self.layer3=self._make_layer(block,256,layers[2],stride=2)self.layer4=self._make_layer(block,512,layers[3],stride=last_stride)def_make_layer(self,block,planes,blocks,stride=1):downsample=None#把通道数量调整为planes*4ifstride!=1orself.inplanes!=planes*block.expansion:downsample=nn.Sequential(nn.Conv2d(self.inplanes,planes*block.expansion,kernel_size=1,stride=stride,bias=False),nn.BatchNorm2d(planes*block.expansion),)layers=[]layers.append(block(self.inplanes,planes,stride,downsample))self.inplanes=planes*block.expansion#blocks 每层含有的block数量foriinrange(1,blocks):layers.append(block(self.inplanes,planes))returnnn.Sequential(*layers)defforward(self,x):#conv1 7x7 64 stride=2x=self.conv1(x)x=self.bn1(x)x=self.relu(x)#max pool 3x3 stride=2x=self.maxpool(x)x=self.layer1(x)x=self.layer2(x)x=self.layer3(x)x=self.layer4(x)returnxdefload_param(self,model_path):#加载预训练模型param_dict=torch.load(model_path)# param_dict = torch.load("/home/bi/Downloads/resnet50-19c8e357.pth")print("load pretain success!")foriinparam_dict:if'fc'ini:continueself.state_dict()[i].copy_(param_dict[i])defrandom_init(self):forminself.modules():ifisinstance(m,nn.Conv2d):n=m.kernel_size[0]*m.kernel_size[1]*m.out_channelsm.weight.data.normal_(0,math.sqrt(2./n))elifisinstance(m,nn.BatchNorm2d):m.weight.data.fill_(1)m.bias.data.zero_()defresnet50(last_stride):returnResNet(last_stride=1,block=Bottleneck,layers=[3,4,6,3])

resnext

resnext使用了分组卷积的形式来提高网络的性能

分组卷积效果好的原因：

不同group可以学到不同的信息，可以参考alexnet

代码上只是残差块不同

class Bottleneck(nn.Module):

"""

RexNeXt bottleneck type C

"""

expansion = 4

def __init__(self, inplanes, planes, with_ibn, baseWidth, cardinality, stride=1, downsample=None):

""" Constructor

Args:

inplanes: input channel dimensionality

planes: output channel dimensionality

baseWidth: base width. default 4

cardinality: num of convolution groups. default 32

stride: conv stride. Replaces pooling layer.

"""

super(Bottleneck, self).__init__()

D = int(math.floor(planes * (baseWidth / 64)))

C = cardinality

self.conv1 = nn.Conv2d(inplanes, D * C, kernel_size=1, stride=1, padding=0, bias=False)

if with_ibn:

self.bn1 = IBN(D * C)

else:

self.bn1 = nn.BatchNorm2d(D * C)

self.conv2 = nn.Conv2d(D * C, D * C, kernel_size=3, stride=stride, padding=1, groups=C, bias=False)

self.bn2 = nn.BatchNorm2d(D * C)

self.conv3 = nn.Conv2d(D * C, planes * 4, kernel_size=1, stride=1, padding=0, bias=False)

self.bn3 = nn.BatchNorm2d(planes * 4)

self.relu = nn.ReLU(inplace=True)

self.downsample = downsample

def forward(self, x):

residual = x

out = self.conv1(x)

out = self.bn1(out)

out = self.relu(out)

out = self.conv2(out)

out = self.bn2(out)

out = self.relu(out)

out = self.conv3(out)

out = self.bn3(out)

if self.downsample is not None:

residual = self.downsample(x)

out += residual

out = self.relu(out)

return out

senet

senet模块操作

首先是Squeeze操作，我们顺着空间维度来进行特征压缩，将每个二维的特征通道变成一个实数，这个实数某种程度上具有全局的感受野，并且输出的维度和输入的特征通道数相匹配。它表征着在特征通道上响应的全局分布，而且使得靠近输入的层也可以获得全局的感受野，这一点在很多任务中都是非常有用的。

其次是Excitation操作，它是一个类似于循环神经网络中门的机制。通过参数来为每个特征通道生成权重，其中参数被学习用来显式地建模特征通道间的相关性。

最后是一个Reweight的操作，我们将Excitation的输出的权重看做是进过特征选择后的每个特征通道的重要性，然后通过乘法逐通道加权到先前的特征上，完成在通道维度上的对原始特征的重标定。

实现

from torch import nn

class SELayer(nn.Module):

def __init__(self, channel, reduction=16):

super(SELayer, self).__init__()

self.avg_pool = nn.AdaptiveAvgPool2d(1)

self.fc = nn.Sequential(

nn.Linear(channel, int(channel / reduction), bias=False),

nn.ReLU(inplace=True),

nn.Linear(int(channel / reduction), channel, bias=False),

nn.Sigmoid()

)

def forward(self, x):

b, c, _, _ = x.size()

y = self.avg_pool(x).view(b, c)

y = self.fc(y).view(b, c, 1, 1)

return x * y.expand_as(x)

densenet

densenet的特点

特征重用：使用密集链接，使用了不同层次的特征，一般分类网络只使用最高层次的特征，densenet保留了低维度的特征，对特征的利用率比较高。

参数少，因为每个通道数很少

占用显存高，因为做前向传播时，不但需要当前层特征，还需要之前的特征。

res2net

网络将原来串行的方式，通过在通道上进行拆分，不同尺度特征的提取，在融合特征，提高网络的复杂度和表达能力。上图表示在一个block中有4个尺度。

图中的横向箭头是加操作，融合不同感受野的特征图。

残差块实现代码为：

class Bottle2neck(nn.Module):

expansion = 4

def __init__(self, inplanes, planes, stride=1, downsample=None, baseWidth=26, scale=4, stype='normal'):

""" Constructor

Args:

inplanes: input channel dimensionality

planes: output channel dimensionality

stride: conv stride. Replaces pooling layer.

downsample: None when stride = 1

baseWidth: basic width of conv3x3

scale: number of scale.

type: 'normal': normal set. 'stage': first block of a new stage.

"""

super(Bottle2neck, self).__init__()

width = int(math.floor(planes * (baseWidth / 64.0)))

self.conv1 = nn.Conv2d(inplanes, width * scale, kernel_size=1, bias=False)

self.bn1 = nn.BatchNorm2d(width * scale)

if scale == 1:

self.nums = 1

else:

self.nums = scale - 1

if stype == 'stage':

self.pool = nn.AvgPool2d(kernel_size=3, stride=stride, padding=1)

convs = []

bns = []

for i in range(self.nums):

convs.append(nn.Conv2d(width, width, kernel_size=3, stride=stride, padding=1, bias=False))

bns.append(nn.BatchNorm2d(width))

self.convs = nn.ModuleList(convs)

self.bns = nn.ModuleList(bns)

self.conv3 = nn.Conv2d(width * scale, planes * self.expansion, kernel_size=1, bias=False)

self.bn3 = nn.BatchNorm2d(planes * self.expansion)

self.relu = nn.ReLU(inplace=True)

self.downsample = downsample

self.stype = stype

self.scale = scale

self.width = width

def forward(self, x):

residual = x

out = self.conv1(x)

out = self.bn1(out)

out = self.relu(out)

spx = torch.split(out, self.width, 1)

for i in range(self.nums):

if i == 0 or self.stype == 'stage':

sp = spx[i]

else:

sp = sp + spx[i]

sp = self.convs[i](sp)

sp = self.relu(self.bns[i](sp))

if i == 0:

out = sp

else:

out = torch.cat((out, sp), 1)

if self.scale != 1 and self.stype == 'normal':

out = torch.cat((out, spx[self.nums]), 1)

elif self.scale != 1 and self.stype == 'stage':

out = torch.cat((out, self.pool(spx[self.nums])), 1)

out = self.conv3(out)

out = self.bn3(out)

if self.downsample is not None:

residual = self.downsample(x)

out += residual

out = self.relu(out)

return out

resnest

ResNeSt 的全称是：Split-Attention Networks，也就是特别引入了Split-Attention模块。

论文借鉴了

GoogleNet 采用了Multi-path机制，其中每个网络块均由不同的卷积kernels组成。

ResNeXt在ResNet bottle模块中采用组卷积，将multi-path结构转换为统一操作。

SE-Net 通过自适应地重新校准通道特征响应来引入通道注意力（channel-attention）机制。

SK-Net 通过两个网络分支引入特征图注意力（feature-map attention）。

ResNeSt 和 SE-Net、SK-Net 的对应图示如下：

其中上图中都包含的 Split Attention模块如下图所示：

参考：