- 首先加入一个
self.reg = tf_contrib.layers.l2_regularizer(1e-10)
,然后在tf.get_variable
的时候有regularizer
设置为我们刚才的self.reg
。
- 接着在建图的时候
loss_reg = tf.add_n(tf.get_collection(tf.GraphKeys.REGULARIZATION_LOSSES))
像这样使用tf.GraphKeys.REGULARIZATION_LOSSES
项来获取需要正则的值,然后加起来就是loss_reg
的term
- 如果不使用正则化的时候那么就设置
config.weight_decay
一个极小的系数1e-10
即可,但不能设置为0.0
会报错。
- 一般使用的正则项系数为
1e-3
即可。
- 比较有意思的一个东西是,如果将正则化项的加改成减以后
L_model + L_reg to L_model - L_reg
,训练optimizer loss
是往负的方向在跑的,而且是负的慢慢变大,但是还是能够训练起来(准确率比之前要低,之前是0.947,现在是0.914),可以看到总的非正则项部分的loss是无限趋近于0的,因此我们优化的目标是让原始的loss尽可能的趋近于0,且,但是如果给负数加上一个tf.abs
那么就训练不起来了,因为找不到参数更新的方向,即原始的loss还是2.3左右但是却不能再优化了,因此准确率非常低,一个启示就是正则项首先必须要让原始的loss
趋于0
,且原始的loss
必须为正的。至于为什么加号变减号了还能进行优化是因为两个loss
项的目标是让总的loss
更小前者让loss更小的方式是让自己趋于0,后者让总的loss变小的方式是更加的趋于一个负数,因此也是可以进行优化的。
tf.get_variable(
name='filter',
shape=kernel_shape,
initializer=self.weight_init,
regularizer=self.reg
)
x = tf.layers.dense(x,
units, kernel_initializer=self.weight_init,
bias_initializer=self.bias_init,
kernel_regularizer=self.reg
)
import tensorflow as tf
import tensorflow.contrib as tf_contrib
from common import config
class Model():
def __init__(self):
# set the initializer of conv_weight and conv_bias
self.weight_init = tf_contrib.layers.variance_scaling_initializer(factor=1.0,
mode='FAN_IN', uniform=False)
self.bias_init = tf.zeros_initializer()
self.reg = tf_contrib.layers.l2_regularizer(config.weight_decay)
def _conv_layer(self, name, inp, kernel_shape, stride, padding='SAME',is_training=False):
with tf.variable_scope(name) as scope:
conv_filter = tf.get_variable(name='filter', shape=kernel_shape,
initializer=self.weight_init, regularizer=self.reg)
conv_bias = tf.get_variable(name='bias', shape=kernel_shape[-1],
initializer=self.bias_init)
x = tf.nn.conv2d(inp, conv_filter, strides=[1, stride, stride, 1],
padding=padding, data_format='NHWC')
x = tf.nn.bias_add(x, conv_bias, data_format='NHWC')
x = tf.layers.batch_normalization(x, axis=3, training=is_training)
x = tf.nn.relu(x)
return x
def _pool_layer(self, name, inp, ksize, stride, padding='SAME', mode='MAX'):
assert mode in ['MAX', 'AVG'], 'the mode of pool must be MAX or AVG'
if mode == 'MAX':
x = tf.nn.max_pool(inp, ksize=[1, ksize, ksize, 1], strides=[1, stride, stride, 1],
padding=padding, name=name, data_format='NHWC')
elif mode == 'AVG':
x = tf.nn.avg_pool(inp, ksize=[1, ksize, ksize, 1], strides=[1, stride, stride, 1],
padding=padding, name=name, data_format='NHWC')
return x
def _fc_layer(self, name, inp, units, dropout=0.5):
with tf.variable_scope(name) as scope:
shape = inp.get_shape().as_list()
dim = 1
for d in shape[1:]:
dim *= d
x = tf.reshape(inp, [-1, dim]) # flatten
if dropout > 0:
x = tf.nn.dropout(x, keep_prob=dropout, name='dropout')
x = tf.layers.dense(x, units, kernel_initializer=self.weight_init,
bias_initializer=self.bias_init, kernel_regularizer=self.reg)
return x
#def _softmax_layer(self, name, inp):
# x = tf.nn.softmax(inp, name=name)
# return x
def build(self):
data = tf.placeholder(tf.float32, shape=(None,)+config.image_shape+(config.nr_channel,),
name='data')
label = tf.placeholder(tf.int32, shape=(None,), name='label')
# convert the format of label to one-hot
label_onehot = tf.one_hot(label, config.nr_class, dtype=tf.int32)
# a setting for bn
is_training = tf.placeholder(tf.bool, name='is_training')
# conv1
x = self._conv_layer(name='conv1', inp=data,
kernel_shape=[3, 3, config.nr_channel, 16], stride=1,
is_training=is_training) # Nx32x32x32
x = self._pool_layer(name='pool1', inp=x, ksize=2, stride=2, mode='MAX') # Nx16x16x16
# conv2
x = self._conv_layer(name='conv21', inp=x, kernel_shape=[3, 3, 16, 32],
stride=1, is_training=is_training)
x = self._conv_layer(name='conv22', inp=x, kernel_shape=[3, 3, 32, 32],
stride=1, is_training=is_training)
x = self._pool_layer(name='pool2', inp=x, ksize=2, stride=2, mode='MAX') # Nx8x8x32
# conv3
x = self._conv_layer(name='conv31', inp=x, kernel_shape=[3, 3, 32, 64],
stride=1, is_training=is_training)
x = self._conv_layer(name='conv32', inp=x, kernel_shape=[3, 3, 64, 64],
stride=1, is_training=is_training)
x = self._pool_layer(name='pool3', inp=x, ksize=2, stride=2, mode='MAX') # Nx4x4x64
# conv4
x = self._conv_layer(name='conv41', inp=x, kernel_shape=[3, 3, 64, 128],
stride=1, is_training=is_training)
x = self._conv_layer(name='conv42', inp=x, kernel_shape=[3, 3, 128, 128],
stride=1, is_training=is_training)
x = self._pool_layer(name='pool4', inp=x, ksize=4, stride=4, mode='AVG') # Nx1x1x128
# fc1
logits = self._fc_layer(name='fc1', inp=x, units=config.nr_class, dropout=0)
placeholders = {
'data': data,
'label': label,
'is_training': is_training,
}
return placeholders, label_onehot, logits