参考:
https://affinelayer.com/pix2pix/
https://github.com/phillipi/pix2pix
无supervisor情况下tensorflow训练过程如下:
variables
...
ops
...
summary_op
...
merge_all_summarie
saver
init_op
with tf.Session() as sess:
writer = tf.tf.train.SummaryWriter()
sess.run(init)
saver.restore()
for ...:
train
merged_summary = sess.run(merge_all_summarie)
writer.add_summary(merged_summary,i)
saver.save
参数
parser = argparse.ArgumentParser()
parser.add_argument("--mode", required=True, choices=["train", "test", "export"])
parser.add_argument("--separable_conv", action="store_true", help="use separable convolutions in the generator")
parser.add_argument("--ngf", type=int, default=64, help="number of generator filters in first conv layer")
a = parser.parse_args()
type、default、required、help、action("store_true"表示存储为True)
lab空间: L分量用于表示像素的亮度,取值范围是[0,100],表示从纯黑到纯白;a表示从红色到绿色的范围,取值范围是[127,-128];b表示从黄色到蓝色的范围,取值范围是[127,-128]。
Discriminator卷积层
def discrim_conv(batch_input, out_channels, stride):
padded_input = tf.pad(batch_input, [[0, 0], [1, 1], [1, 1], [0, 0]], mode="CONSTANT")
return tf.layers.conv2d(padded_input, out_channels, kernel_size=4, strides=(stride, stride), padding="valid", kernel_initializer=tf.random_normal_initializer(0, 0.02))
pad四个数组分别表示input的四个维度:[batch, in_height, in_width, in_channels],tf.random_normal_initializer(m, d)
正态随机,m均值,d标准差。
Generator卷积层
def gen_conv(batch_input, out_channels):
# [batch, in_height, in_width, in_channels] => [batch, out_height, out_width, out_channels]
initializer = tf.random_normal_initializer(0, 0.02)
if a.separable_conv:
return tf.layers.separable_conv2d(batch_input, out_channels, kernel_size=4, strides=(2, 2), padding="same", depthwise_initializer=initializer, pointwise_initializer=initializer)
else:
return tf.layers.conv2d(batch_input, out_channels, kernel_size=4, strides=(2, 2), padding="same", kernel_initializer=initializer)
same padding之后宽高变为height/stride。
separable_conv2d可参考:
但depthwise_initializer以后depthwise filter的个数是多少还不清楚。
Generator反卷积层
def gen_deconv(batch_input, out_channels):
# [batch, in_height, in_width, in_channels] => [batch, out_height, out_width, out_channels]
initializer = tf.random_normal_initializer(0, 0.02)
if a.separable_conv:
_b, h, w, _c = batch_input.shape
resized_input = tf.image.resize_images(batch_input, [h * 2, w * 2], method=tf.image.ResizeMethod.NEAREST_NEIGHBOR)
return tf.layers.separable_conv2d(resized_input, out_channels, kernel_size=4, strides=(1, 1), padding="same", depthwise_initializer=initializer, pointwise_initializer=initializer)
else:
return tf.layers.conv2d_transpose(batch_input, out_channels, kernel_size=4, strides=(2, 2), padding="same", kernel_initializer=initializer)
leaky ReLU
def lrelu(x, a):
with tf.name_scope("lrelu"):
# adding these together creates the leak part and linear part
# then cancels them out by subtracting/adding an absolute value term
# leak: a*x/2 - a*abs(x)/2
# linear: x/2 + abs(x)/2
# this block looks like it has 2 inputs on the graph unless we do this
x = tf.identity(x)
return (0.5 * (1 + a)) * x + (0.5 * (1 - a)) * tf.abs(x)
identity的作用还不清楚。
Batch Normalization
def batchnorm(inputs):
return tf.layers.batch_normalization(inputs, axis=3, epsilon=1e-5, momentum=0.1, training=True, gamma_initializer=tf.random_normal_initializer(1.0, 0.02))
其中momentum指训练时通过moving average:计算训练集的均值和方差。
关于axis的理解可以参考:
载入图像
Examples = collections.namedtuple("Examples", "paths, inputs, targets, count, steps_per_epoch")
def load_examples():
...
with tf.name_scope("load_images"):
path_queue = tf.train.string_input_producer(input_paths, shuffle=a.mode == "train")
reader = tf.WholeFileReader()
decode = tf.image.decode_jpeg
paths, contents = reader.read(path_queue)
raw_input = decode(contents)
raw_input = tf.image.convert_image_dtype(raw_input, dtype=tf.float32)
assertion = tf.assert_equal(tf.shape(raw_input)[2], 3, message="image does not have 3 channels")
with tf.control_dependencies([assertion]):
# 先执行control_dependencies内的操作。
raw_input = tf.identity(raw_input)
...
paths_batch, inputs_batch, targets_batch = tf.train.batch([paths, input_images, target_images], batch_size=a.batch_size)
steps_per_epoch = int(math.ceil(len(input_paths) / a.batch_size))
return Examples(
paths=paths_batch,
inputs=inputs_batch,
targets=targets_batch,
count=len(input_paths),
steps_per_epoch=steps_per_epoch,
)
整个过程的解释可以参考:
- 使用tf.train.string_input_producer函数把我们需要的全部文件打包为一个tf内部的queue类型,之后tf开文件就从这个queue中取目录了。
- 搞一个reader,不同reader对应不同的文件结构。
- 用reader的read方法,这个方法需要一个IO类型的参数,就是我们上边string_input_producer输出的那个queue了,reader从这个queue中取一个文件目录,然后打开它进行一次读取。
- 我们就要用tf.train.batch或者tf.train.shuffle_batch这个函数把一个一个小样本的tensor打包成一个高一维度的样本batch,这些函数的输入是单个样本,输出就是4D的样本batch了,其内部原理似乎是创建了一个queue,然后不断调用你的单样本tensor获得样本,直到queue里边有足够的样本,然后一次返回一堆样本,组成样本batch。
以上是常用的标准流程。
Generator
def create_generator(generator_inputs, generator_outputs_channels):
layers = []
# encoder_1: [batch, 256, 256, in_channels] => [batch, 128, 128, ngf]
with tf.variable_scope("encoder_1"):
output = gen_conv(generator_inputs, a.ngf)
# ngf: number of generator filters
layers.append(output)
layer_specs = [
a.ngf * 2, # encoder_2: [batch, 128, 128, ngf] => [batch, 64, 64, ngf * 2]
a.ngf * 4, # encoder_3: [batch, 64, 64, ngf * 2] => [batch, 32, 32, ngf * 4]
a.ngf * 8, # encoder_4: [batch, 32, 32, ngf * 4] => [batch, 16, 16, ngf * 8]
a.ngf * 8, # encoder_5: [batch, 16, 16, ngf * 8] => [batch, 8, 8, ngf * 8]
a.ngf * 8, # encoder_6: [batch, 8, 8, ngf * 8] => [batch, 4, 4, ngf * 8]
a.ngf * 8, # encoder_7: [batch, 4, 4, ngf * 8] => [batch, 2, 2, ngf * 8]
a.ngf * 8, # encoder_8: [batch, 2, 2, ngf * 8] => [batch, 1, 1, ngf * 8]
]
for out_channels in layer_specs:
with tf.variable_scope("encoder_%d" % (len(layers) + 1)):
rectified = lrelu(layers[-1], 0.2)
# [batch, in_height, in_width, in_channels] => [batch, in_height/2, in_width/2, out_channels]
convolved = gen_conv(rectified, out_channels)
output = batchnorm(convolved)
layers.append(output)
layer_specs = [
(a.ngf * 8, 0.5), # decoder_8: [batch, 1, 1, ngf * 8] => [batch, 2, 2, ngf * 8 * 2]
(a.ngf * 8, 0.5), # decoder_7: [batch, 2, 2, ngf * 8 * 2] => [batch, 4, 4, ngf * 8 * 2]
(a.ngf * 8, 0.5), # decoder_6: [batch, 4, 4, ngf * 8 * 2] => [batch, 8, 8, ngf * 8 * 2]
(a.ngf * 8, 0.0), # decoder_5: [batch, 8, 8, ngf * 8 * 2] => [batch, 16, 16, ngf * 8 * 2]
(a.ngf * 4, 0.0), # decoder_4: [batch, 16, 16, ngf * 8 * 2] => [batch, 32, 32, ngf * 4 * 2]
(a.ngf * 2, 0.0), # decoder_3: [batch, 32, 32, ngf * 4 * 2] => [batch, 64, 64, ngf * 2 * 2]
(a.ngf, 0.0), # decoder_2: [batch, 64, 64, ngf * 2 * 2] => [batch, 128, 128, ngf * 2]
]
num_encoder_layers = len(layers)
for decoder_layer, (out_channels, dropout) in enumerate(layer_specs):
skip_layer = num_encoder_layers - decoder_layer - 1
with tf.variable_scope("decoder_%d" % (skip_layer + 1)):
if decoder_layer == 0:
# first decoder layer doesn't have skip connections
# since it is directly connected to the skip_layer
input = layers[-1]
else:
input = tf.concat([layers[-1], layers[skip_layer]], axis=3)
rectified = tf.nn.relu(input)
# [batch, in_height, in_width, in_channels] => [batch, in_height*2, in_width*2, out_channels]
output = gen_deconv(rectified, out_channels)
output = batchnorm(output)
if dropout > 0.0:
output = tf.nn.dropout(output, keep_prob=1 - dropout)
layers.append(output)
# decoder_1: [batch, 128, 128, ngf * 2] => [batch, 256, 256, generator_outputs_channels]
with tf.variable_scope("decoder_1"):
input = tf.concat([layers[-1], layers[0]], axis=3)
rectified = tf.nn.relu(input)
output = gen_deconv(rectified, generator_outputs_channels)
output = tf.tanh(output)
layers.append(output)
return layers[-1]
Discriminator
def create_discriminator(discrim_inputs, discrim_targets):
n_layers = 3
layers = []
# 2x [batch, height, width, in_channels] => [batch, height, width, in_channels * 2]
input = tf.concat([discrim_inputs, discrim_targets], axis=3)
# layer_1: [batch, 256, 256, in_channels * 2] => [batch, 128, 128, ndf]
with tf.variable_scope("layer_1"):
convolved = discrim_conv(input, a.ndf, stride=2)
rectified = lrelu(convolved, 0.2)
layers.append(rectified)
# layer_2: [batch, 128, 128, ndf] => [batch, 64, 64, ndf * 2]
# layer_3: [batch, 64, 64, ndf * 2] => [batch, 32, 32, ndf * 4]
# layer_4: [batch, 32, 32, ndf * 4] => [batch, 31, 31, ndf * 8]
for i in range(n_layers):
with tf.variable_scope("layer_%d" % (len(layers) + 1)):
out_channels = a.ndf * min(2 ** (i + 1), 8)
stride = 1 if i == n_layers - 1 else 2 # last layer here has stride 1
convolved = discrim_conv(layers[-1], out_channels, stride=stride)
normalized = batchnorm(convolved)
rectified = lrelu(normalized, 0.2)
layers.append(rectified)
# layer_5: [batch, 31, 31, ndf * 8] => [batch, 30, 30, 1]
with tf.variable_scope("layer_%d" % (len(layers) + 1)):
convolved = discrim_conv(rectified, out_channels=1, stride=1)
output = tf.sigmoid(convolved)
layers.append(output)
return layers[-1]
Model
Model = collections.namedtuple("Model", "outputs, predict_real, predict_fake, discrim_loss, discrim_grads_and_vars, gen_loss_GAN, gen_loss_L1, gen_grads_and_vars, train")
def create_model(inputs, targets):
with tf.variable_scope("generator"):
out_channels = int(targets.get_shape()[-1])
outputs = create_generator(inputs, out_channels)
# create two copies of discriminator, one for real pairs and one for fake pairs
# they share the same underlying variables
with tf.name_scope("real_discriminator"):
with tf.variable_scope("discriminator"):
# 2x [batch, height, width, channels] => [batch, 30, 30, 1]
predict_real = create_discriminator(inputs, targets)
with tf.name_scope("fake_discriminator"):
with tf.variable_scope("discriminator", reuse=True):
# 2x [batch, height, width, channels] => [batch, 30, 30, 1]
predict_fake = create_discriminator(inputs, outputs)
with tf.name_scope("discriminator_loss"):
# minimizing -tf.log will try to get inputs to 1
# predict_real => 1
# predict_fake => 0
discrim_loss = tf.reduce_mean(-(tf.log(predict_real + EPS) + tf.log(1 - predict_fake + EPS)))
with tf.name_scope("generator_loss"):
# predict_fake => 1
# abs(targets - outputs) => 0
gen_loss_GAN = tf.reduce_mean(-tf.log(predict_fake + EPS))
gen_loss_L1 = tf.reduce_mean(tf.abs(targets - outputs))
gen_loss = gen_loss_GAN * a.gan_weight + gen_loss_L1 * a.l1_weight
with tf.name_scope("discriminator_train"):
discrim_tvars = [var for var in tf.trainable_variables() if var.name.startswith("discriminator")]
discrim_optim = tf.train.AdamOptimizer(a.lr, a.beta1)
discrim_grads_and_vars = discrim_optim.compute_gradients(discrim_loss, var_list=discrim_tvars)
discrim_train = discrim_optim.apply_gradients(discrim_grads_and_vars)
with tf.name_scope("generator_train"):
with tf.control_dependencies([discrim_train]):
gen_tvars = [var for var in tf.trainable_variables() if var.name.startswith("generator")]
gen_optim = tf.train.AdamOptimizer(a.lr, a.beta1)
gen_grads_and_vars = gen_optim.compute_gradients(gen_loss, var_list=gen_tvars)
gen_train = gen_optim.apply_gradients(gen_grads_and_vars)
ema = tf.train.ExponentialMovingAverage(decay=0.99)
update_losses = ema.apply([discrim_loss, gen_loss_GAN, gen_loss_L1])
global_step = tf.train.get_or_create_global_step()
incr_global_step = tf.assign(global_step, global_step+1)
return Model(
predict_real=predict_real,
predict_fake=predict_fake,
discrim_loss=ema.average(discrim_loss),
discrim_grads_and_vars=discrim_grads_and_vars,
gen_loss_GAN=ema.average(gen_loss_GAN),
gen_loss_L1=ema.average(gen_loss_L1),
gen_grads_and_vars=gen_grads_and_vars,
outputs=outputs,
train=tf.group(update_losses, incr_global_step, gen_train),
)
AdamOptimizer:
ExponentialMovingAverage:
Variables & Ops
def main():
if a.seed is None:
a.seed = random.randint(0, 2**31 - 1)
tf.set_random_seed(a.seed)
np.random.seed(a.seed)
random.seed(a.seed)
if not os.path.exists(a.output_dir):
os.makedirs(a.output_dir)
examples = load_examples()
print("examples count = %d" % examples.count)
# inputs and targets are [batch_size, height, width, channels]
model = create_model(examples.inputs, examples.targets)
inputs = deprocess(examples.inputs)
targets = deprocess(examples.targets)
outputs = deprocess(model.outputs)
def convert(image):
if a.aspect_ratio != 1.0:
# upscale to correct aspect ratio
size = [CROP_SIZE, int(round(CROP_SIZE * a.aspect_ratio))]
image = tf.image.resize_images(image, size=size, method=tf.image.ResizeMethod.BICUBIC)
return tf.image.convert_image_dtype(image, dtype=tf.uint8, saturate=True)
# reverse any processing on images so they can be written to disk or displayed to user
with tf.name_scope("convert_inputs"):
converted_inputs = convert(inputs)
with tf.name_scope("convert_targets"):
converted_targets = convert(targets)
with tf.name_scope("convert_outputs"):
converted_outputs = convert(outputs)
with tf.name_scope("encode_images"):
display_fetches = {
"paths": examples.paths,
"inputs": tf.map_fn(tf.image.encode_png, converted_inputs, dtype=tf.string, name="input_pngs"),
"targets": tf.map_fn(tf.image.encode_png, converted_targets, dtype=tf.string, name="target_pngs"),
"outputs": tf.map_fn(tf.image.encode_png, converted_outputs, dtype=tf.string, name="output_pngs"),
}
Summaries
with tf.name_scope("inputs_summary"):
tf.summary.image("inputs", converted_inputs)
with tf.name_scope("targets_summary"):
tf.summary.image("targets", converted_targets)
with tf.name_scope("outputs_summary"):
tf.summary.image("outputs", converted_outputs)
with tf.name_scope("predict_real_summary"):
tf.summary.image("predict_real", tf.image.convert_image_dtype(model.predict_real, dtype=tf.uint8))
with tf.name_scope("predict_fake_summary"):
tf.summary.image("predict_fake", tf.image.convert_image_dtype(model.predict_fake, dtype=tf.uint8))
tf.summary.scalar("discriminator_loss", model.discrim_loss)
tf.summary.scalar("generator_loss_GAN", model.gen_loss_GAN)
tf.summary.scalar("generator_loss_L1", model.gen_loss_L1)
for var in tf.trainable_variables():
tf.summary.histogram(var.op.name + "/values", var)
for grad, var in model.discrim_grads_and_vars + model.gen_grads_and_vars:
tf.summary.histogram(var.op.name + "/gradients", grad)
Session
with tf.name_scope("parameter_count"):
parameter_count = tf.reduce_sum([tf.reduce_prod(tf.shape(v)) for v in tf.trainable_variables()])
saver = tf.train.Saver(max_to_keep=1)
logdir = a.output_dir if (a.trace_freq > 0 or a.summary_freq > 0) else None
sv = tf.train.Supervisor(logdir=logdir, save_summaries_secs=0, saver=None)
with sv.managed_session() as sess:
print("parameter_count =", sess.run(parameter_count))
if a.checkpoint is not None:
print("loading model from checkpoint")
checkpoint = tf.train.latest_checkpoint(a.checkpoint)
saver.restore(sess, checkpoint)
max_steps = 2**32
if a.max_epochs is not None:
max_steps = examples.steps_per_epoch * a.max_epochs
if a.max_steps is not None:
max_steps = a.max_steps
if a.mode == "test":
# testing
# at most, process the test data once
start = time.time()
max_steps = min(examples.steps_per_epoch, max_steps)
for step in range(max_steps):
results = sess.run(display_fetches)
filesets = save_images(results)
for i, f in enumerate(filesets):
print("evaluated image", f["name"])
index_path = append_index(filesets)
print("wrote index at", index_path)
print("rate", (time.time() - start) / max_steps)
else:
# training
start = time.time()
for step in range(max_steps):
def should(freq):
return freq > 0 and ((step + 1) % freq == 0 or step == max_steps - 1)
options = None
run_metadata = None
if should(a.trace_freq):
options = tf.RunOptions(trace_level=tf.RunOptions.FULL_TRACE)
run_metadata = tf.RunMetadata()
fetches = {
"train": model.train,
"global_step": sv.global_step,
}
if should(a.progress_freq):
fetches["discrim_loss"] = model.discrim_loss
fetches["gen_loss_GAN"] = model.gen_loss_GAN
fetches["gen_loss_L1"] = model.gen_loss_L1
if should(a.summary_freq):
fetches["summary"] = sv.summary_op
if should(a.display_freq):
fetches["display"] = display_fetches
results = sess.run(fetches, options=options, run_metadata=run_metadata)
if should(a.summary_freq):
print("recording summary")
sv.summary_writer.add_summary(results["summary"], results["global_step"])
if should(a.display_freq):
print("saving display images")
filesets = save_images(results["display"], step=results["global_step"])
append_index(filesets, step=True)
if should(a.trace_freq):
print("recording trace")
sv.summary_writer.add_run_metadata(run_metadata, "step_%d" % results["global_step"])
if should(a.progress_freq):
# global_step will have the correct step count if we resume from a checkpoint
train_epoch = math.ceil(results["global_step"] / examples.steps_per_epoch)
train_step = (results["global_step"] - 1) % examples.steps_per_epoch + 1
rate = (step + 1) * a.batch_size / (time.time() - start)
remaining = (max_steps - step) * a.batch_size / rate
print("progress epoch %d step %d image/sec %0.1f remaining %dm" % (train_epoch, train_step, rate, remaining / 60))
print("discrim_loss", results["discrim_loss"])
print("gen_loss_GAN", results["gen_loss_GAN"])
print("gen_loss_L1", results["gen_loss_L1"])
if should(a.save_freq):
print("saving model")
saver.save(sess, os.path.join(a.output_dir, "model"), global_step=sv.global_step)
if sv.should_stop():
break
结果
还算不错,但仍没有文章里那么清晰。