Max Pooling前向过程
import numpy as np
def max_pooling_forward(z, pooling, strides=(2, 2), padding=(0, 0)):
"""
最大池化前向过程
:param z: 卷积层矩阵,形状(N,C,H,W),N为batch_size,C为通道数
:param pooling: 池化大小(k1,k2)
:param strides: 步长
:param padding: 0填充
:return:
"""
N, C, H, W = z.shape
# 零填充
padding_z = np.lib.pad(z, ((0, 0), (0, 0), (padding[0], padding[0]), (padding[1], padding[1])), 'constant', constant_values=0)
# 输出的高度和宽度
out_h = (H + 2 * padding[0] - pooling[0]) // strides[0] + 1
out_w = (W + 2 * padding[1] - pooling[1]) // strides[1] + 1
pool_z = np.zeros((N, C, out_h, out_w))
for n in np.arange(N):
for c in np.arange(C):
for i in np.arange(out_h):
for j in np.arange(out_w):
pool_z[n, c, i, j] = np.max(padding_z[n, c,
strides[0] * i:strides[0] * i + pooling[0],
strides[1] * j:strides[1] * j + pooling[1]])
return pool_z
Max Pooling反向过程
def max_pooling_backward(next_dz, z, pooling, strides=(2, 2), padding=(0, 0)):
"""
最大池化反向过程
:param next_dz:损失函数关于最大池化输出的损失
:param z: 卷积层矩阵,形状(N,C,H,W),N为batch_size,C为通道数
:param pooling: 池化大小(k1,k2)
:param strides: 步长
:param padding: 0填充
:return:
"""
N, C, H, W = z.shape
_, _, out_h, out_w = next_dz.shape
# 零填充
padding_z = np.lib.pad(z, ((0, 0), (0, 0), (padding[0], padding[0]), (padding[1], padding[1])), 'constant',
constant_values=0)
# 零填充后的梯度
padding_dz = np.zeros_like(padding_z)
for n in np.arange(N):
for c in np.arange(C):
for i in np.arange(out_h):
for j in np.arange(out_w):
# 找到最大值的那个元素坐标,将梯度传给这个坐标
flat_idx = np.argmax(padding_z[n, c,
strides[0] * i:strides[0] * i + pooling[0],
strides[1] * j:strides[1] * j + pooling[1]])
h_idx = strides[0] * i + flat_idx // pooling[1]
w_idx = strides[1] * j + flat_idx % pooling[1]
padding_dz[n, c, h_idx, w_idx] += next_dz[n, c, i, j]
# 返回时剔除零填充
return _remove_padding(padding_dz, padding)
Average Pooling前向过程
def avg_pooling_forward(z, pooling, strides=(2, 2), padding=(0, 0)):
"""
平均池化前向过程
:param z: 卷积层矩阵,形状(N,C,H,W),N为batch_size,C为通道数
:param pooling: 池化大小(k1,k2)
:param strides: 步长
:param padding: 0填充
:return:
"""
N, C, H, W = z.shape
# 零填充
padding_z = np.lib.pad(z, ((0, 0), (0, 0), (padding[0], padding[0]), (padding[1], padding[1])), 'constant',
constant_values=0)
# 输出的高度和宽度
out_h = (H + 2 * padding[0] - pooling[0]) // strides[0] + 1
out_w = (W + 2 * padding[1] - pooling[1]) // strides[1] + 1
pool_z = np.zeros((N, C, out_h, out_w))
for n in np.arange(N):
for c in np.arange(C):
for i in np.arange(out_h):
for j in np.arange(out_w):
pool_z[n, c, i, j] = np.mean(padding_z[n, c,
strides[0] * i:strides[0] * i + pooling[0],
strides[1] * j:strides[1] * j + pooling[1]])
return pool_z
Average Pooling反向过程
def avg_pooling_backward(next_dz, z, pooling, strides=(2, 2), padding=(0, 0)):
"""
平均池化反向过程
:param next_dz:损失函数关于最大池化输出的损失
:param z: 卷积层矩阵,形状(N,C,H,W),N为batch_size,C为通道数
:param pooling: 池化大小(k1,k2)
:param strides: 步长
:param padding: 0填充
:return:
"""
N, C, H, W = z.shape
_, _, out_h, out_w = next_dz.shape
# 零填充
padding_z = np.lib.pad(z, ((0, 0), (0, 0), (padding[0], padding[0]), (padding[1], padding[1])), 'constant',
constant_values=0)
# 零填充后的梯度
padding_dz = np.zeros_like(padding_z)
for n in np.arange(N):
for c in np.arange(C):
for i in np.arange(out_h):
for j in np.arange(out_w):
# 每个神经元均分梯度
padding_dz[n, c,
strides[0] * i:strides[0] * i + pooling[0],
strides[1] * j:strides[1] * j + pooling[1]] += next_dz[n, c, i, j] / (pooling[0] * pooling[1])
# 返回时剔除零填充
return _remove_padding(padding_dz, padding)
Global Max Pooling前向过程
def global_max_pooling_forward(z):
"""
全局最大池化前向过程
:param z: 卷积层矩阵,形状(N,C,H,W),N为batch_size,C为通道数
:return:
"""
return np.max(np.max(z, axis=-1), -1)
Global Max Pooling反向过程
def global_max_pooling_forward(next_dz, z):
"""
全局最大池化反向过程
:param next_dz: 全局最大池化梯度,形状(N,C)
:param z: 卷积层矩阵,形状(N,C,H,W),N为batch_size,C为通道数
:return:
"""
N, C, H, W = z.shape
dz = np.zeros_like(z)
for n in np.arange(N):
for c in np.arange(C):
# 找到最大值所在坐标,梯度传给这个坐标
idx = np.argmax(z[n, c, :, :])
h_idx = idx // W
w_idx = idx % W
dz[n, c, h_idx, w_idx] = next_dz[n, c]
return dz
Global Average Pooling前向传播
def global_avg_pooling_forward(z):
"""
全局平均池化前向过程
:param z: 卷积层矩阵,形状(N,C,H,W),N为batch_size,C为通道数
:return:
"""
return np.mean(np.mean(z, axis=-1), axis=-1)
Global Average Pooling反向过程
def global_avg_pooling_backward(next_dz, z):
"""
全局平均池化反向过程
:param next_dz: 全局最大池化梯度,形状(N,C)
:param z: 卷积层矩阵,形状(N,C,H,W),N为batch_size,C为通道数
:return:
"""
N, C, H, W = z.shape
dz = np.zeros_like(z)
for n in np.arange(N):
for c in np.arange(C):
# 梯度平分给相关神经元
dz[n, c, :, :] += next_dz[n, c] / (H * W)
return dz
最大池化和平均池化
class PoolingLayer(Layer):
"""A parent class of MaxPooling2D and AveragePooling2D
"""
def __init__(self, pool_shape=(2, 2), stride=1, padding=0):
self.pool_shape = pool_shape
self.stride = stride
self.padding = padding
self.trainable = True
def forward_pass(self, X, training=True):
self.layer_input = X
batch_size, channels, height, width = X.shape
_, out_height, out_width = self.output_shape()
X = X.reshape(batch_size*channels, 1, height, width)
X_col = image_to_column(X, self.pool_shape, self.stride, self.padding)
# MaxPool or AveragePool specific method
output = self._pool_forward(X_col)
output = output.reshape(out_height, out_width, batch_size, channels)
output = output.transpose(2, 3, 0, 1)
return output
def backward_pass(self, accum_grad):
batch_size, _, _, _ = accum_grad.shape
channels, height, width = self.input_shape
accum_grad = accum_grad.transpose(2, 3, 0, 1).ravel()
# MaxPool or AveragePool specific method
accum_grad_col = self._pool_backward(accum_grad)
accum_grad = column_to_image(accum_grad_col, (batch_size * channels, 1, height, width), self.pool_shape, self.stride, 0)
accum_grad = accum_grad.reshape((batch_size,) + self.input_shape)
return accum_grad
def output_shape(self):
channels, height, width = self.input_shape
out_height = (height - self.pool_shape[0]) / self.stride + 1
out_width = (width - self.pool_shape[1]) / self.stride + 1
assert out_height % 1 == 0
assert out_width % 1 == 0
return channels, int(out_height), int(out_width)
class MaxPooling2D(PoolingLayer):
def _pool_forward(self, X_col):
arg_max = np.argmax(X_col, axis=0).flatten()
output = X_col[arg_max, range(arg_max.size)]
self.cache = arg_max
return output
def _pool_backward(self, accum_grad):
accum_grad_col = np.zeros((np.prod(self.pool_shape), accum_grad.size))
arg_max = self.cache
accum_grad_col[arg_max, range(accum_grad.size)] = accum_grad
return accum_grad_col
class AveragePooling2D(PoolingLayer):
def _pool_forward(self, X_col):
output = np.mean(X_col, axis=0)
return output
def _pool_backward(self, accum_grad):
accum_grad_col = np.zeros((np.prod(self.pool_shape), accum_grad.size))
accum_grad_col[:, range(accum_grad.size)] = 1. / accum_grad_col.shape[0] * accum_grad
return accum_grad_col