猫狗数据集介绍
对kaggle中的猫狗数据集使用Alexnet网络进行训练,该数据集包括25000张训练图片,12500张测试图片,包括猫和狗两种图片。
代码整体介绍
1.alexnet.py:定义alexnet网络
2.datagenerator.py: 对数据集做预处理,定义输入的方式
3.validate_image.py:对图像进行测试
4.main.py:主函数,在训练集上训练
alexnet.py
import tensorflow as tf
def alexnet(x,keep_prob,num_classes):
#conv1
with tf.name_scope('conv1') as scope:
kernel = tf.Variable(tf.truncated_normal([11,11,3,96],dtype=tf.float32,stddev=1e-1),name='weights')
conv = tf.nn.conv2d(x,kernel,[1,4,4,1],padding='SAME')
biases = tf.Variable(tf.constant(0.0,shape=[96],dtype=tf.float32),trainable=True,name='biases')
bias = tf.nn.bias_add(conv,biases)
conv1 = tf.nn.relu(bias,name=scope)
#lr1
with tf.name_scope('lrn1') as scope:
lrn1 = tf.nn.local_response_normalization(conv1,alpha=1e-4,beta=0.75,depth_radius=2,bias=2.0)
#pool1
with tf.name_scope('pool1') as scope:
pool1 = tf.nn.max_pool(lrn1,ksize=[1,3,3,1],strides=[1,2,2,1],padding='VALID')
#conv2
with tf.name_scope('conv2') as scope:
pool1_groups = tf.split(axis=3,value=pool1,num_or_size_splits=2)
kernel = tf.Variable(tf.truncated_normal([5,5,48,256],dtype=tf.float32,stddev=1e-1),name='weights')
kernel_groups = tf.split(axis=3,value=kernel,num_or_size_splits=2)
conv_up = tf.nn.conv2d(pool1_groups[0],kernel_groups[0],[1,1,1,1],padding='SAME')
conv_down = tf.nn.conv2d(pool1_groups[1],kernel_groups[1],[1,1,1,1],padding='SAME')
biases = tf.Variable(tf.constant(0.0,shape=[256],dtype=tf.float32),trainable=True,name='biases')
biases_groups = tf.split(axis=0,value=biases,num_or_size_splits=2)
bias_up = tf.nn.bias_add(conv_up,biases_groups[0])
bias_down = tf.nn.bias_add(conv_down,biases_groups[1])
bias = tf.concat(axis=3,values=[bias_up,bias_down])
conv2 = tf.nn.relu(bias,name=scope)
#lrn2
with tf.name_scope('lrn2') as scope:
lrn2 = tf.nn.local_response_normalization(conv2,alpha=1e-4,beta=0.75,depth_radius=2,bias=2.0)
#pool2
with tf.name_scope('pool2') as scope:
pool2 = tf.nn.max_pool(lrn2,ksize=[1,3,3,1],strides=[1,2,2,1],padding='VALID')
#conv3
with tf.name_scope('conv3') as scope:
kernel = tf.Variable(tf.truncated_normal([3,3,256,384],dtype=tf.float32,stddev=1e-1),name='weights')
conv = tf.nn.conv2d(pool2,kernel,[1,1,1,1],padding='SAME')
biases = tf.Variable(tf.constant(0.0,shape=[384],dtype=tf.float32),trainable=True,name='biases')
bias = tf.nn.bias_add(conv,biases)
conv3 = tf.nn.relu(bias,name=scope)
with tf.name_scope("conv4") as scope:
conv3_groups = tf.split(axis=3,value=conv3,num_or_size_splits=2)
kernel = tf.Variable(tf.truncated_normal([3,3,192,384],dtype=tf.float32,stddev=1e-1),name='weights')
kernel_groups = tf.split(axis=3,value=kernel,num_or_size_splits=2)
conv_up = tf.nn.conv2d(conv3_groups[0],kernel_groups[0],[1,1,1,1],padding="SAME")
conv_down = tf.nn.conv2d(conv3_groups[1],kernel_groups[1],[1,1,1,1],padding="SAME")
biases = tf.Variable(tf.constant(0.0,shape=[384],dtype=tf.float32),trainable=True,name='biases')
biases_groups = tf.split(axis=0,value=biases,num_or_size_splits=2)
bias_up = tf.nn.bias_add(conv_up,biases_groups[0])
bias_down = tf.nn.bias_add(conv_down,biases_groups[1])
bias = tf.concat(axis=3,values=[bias_up,bias_down])
conv4 = tf.nn.relu(bias,name=scope)
with tf.name_scope("conv5") as scope:
conv4_groups = tf.split(axis=3,value=conv4,num_or_size_splits=2)
kernel = tf.Variable(tf.truncated_normal([3,3,192,256],dtype=tf.float32,stddev=1e-1),name='weights')
kernel_groups = tf.split(axis=3,value=kernel,num_or_size_splits=2)
conv_up = tf.nn.conv2d(conv4_groups[0],kernel_groups[0],[1,1,1,1],padding='SAME')
conv_down = tf.nn.conv2d(conv4_groups[1],kernel_groups[1],[1,1,1,1],padding='SAME')
biases = tf.Variable(tf.constant(0.0,shape=[256],dtype=tf.float32),trainable=True,name='biases')
biases_groups = tf.split(axis=0,value=biases,num_or_size_splits=2)
bias_up = tf.nn.bias_add(conv_up,biases_groups[0])
bias_down = tf.nn.bias_add(conv_down,biases_groups[1])
bias = tf.concat(axis=3,values=[bias_up,bias_down])
conv5 = tf.nn.relu(bias,name=scope)
with tf.name_scope("pool5") as scope:
pool5 = tf.nn.max_pool(conv5,ksize=[1,3,3,1],strides=[1,2,2,1],padding='VALID')
with tf.name_scope("flattened6") as scope:
flattened = tf.reshape(pool5,shape=[-1,6*6*256])
with tf.name_scope("fc6") as scope:
weights = tf.Variable(tf.truncated_normal([6*6*256,4096],dtype=tf.float32,stddev=1e-1),name='weights')
biases = tf.Variable(tf.constant(0.0,shape=[4096],dtype=tf.float32),trainable=True,name='biases')
bias = tf.nn.xw_plus_b(flattened,weights,biases)
fc6 = tf.nn.relu(bias)
with tf.name_scope("dropout6") as scope:
dropout6 = tf.nn.dropout(fc6,keep_prob)
with tf.name_scope("fc7") as scope:
weights = tf.Variable(tf.truncated_normal([4096,4096],dtype=tf.float32,stddev=1e-1),name='weights')
biases = tf.Variable(tf.constant(0.0,shape=[4096],dtype=tf.float32),trainable=True,name='biases')
bias = tf.nn.xw_plus_b(dropout6,weights,biases)
fc7 = tf.nn.relu(bias)
with tf.name_scope("dropout7") as scope:
dropout7 = tf.nn.dropout(fc7,keep_prob)
with tf.name_scope("fc8") as scope:
weights = tf.Variable(tf.truncated_normal([4096,num_classes],dtype=tf.float32,stddev=1e-1),name='weights')
biases = tf.Variable(tf.constant(0.0,shape=[num_classes],dtype=tf.float32),trainable=True,name='biases')
fc8 = tf.nn.xw_plus_b(dropout7,weights,biases)
return fc8
datagenerator.py
import numpy as np
import tensorflow as tf
from tensorflow.python.framework import dtypes
from tensorflow.python.framework.ops import convert_to_tensor
from tensorflow.data import Dataset
VGG_MEAN = tf.constant([123.68,116.779,103.939],dtype=tf.float32)
#图片数据转换为三维数据
class ImageDataGenerator(object):
def __init__(self,images,labels,batch_size,num_classes,image_format='jpg',shuffle=True):
self.img_paths = images
self.labels = labels
self.data_size = len(self.labels)
self.num_classes = num_classes
self.image_format = image_format
if shuffle:
self._shuffle_lists()
self.img_paths = convert_to_tensor(self.img_paths,dtype=dtypes.string)
self.labels = convert_to_tensor(self.labels,dtype=dtypes.int32)
data = tf.data.Dataset.from_tensor_slices((self.img_paths,self.labels))
data = data.map(self._parse_function_train)
data = data.batch(batch_size)
self.data = data
def _shuffle_lists(self):
path = self.img_paths
labels = self.labels
permutation = np.random.permutation(self.data_size)
self.img_paths = []
self.labels = []
for i in permutation:
self.img_paths.append(path[i])
self.labels.append(labels[i])
def _parse_function_train(self,filename,label):
one_hot = tf.one_hot(label,self.num_classes)
img_string = tf.read_file(filename)
if self.image_format == "jpg":
img_decoded = tf.image.decode_jpeg(img_string,channels=3)
elif self.image_format == "png":
img_decoded = tf.image.decode_png(img_string,channels=3)
else:
print("Error")
img_resized = tf.image.resize_images(img_decoded,[227,227])
img_centered = tf.subtract(img_resized,VGG_MEAN)
img_bgr = img_centered[:,:,::-1]
return img_bgr,one_hot
main.py
import numpy as np
from VGG16_model import vgg16
import os
import tensorflow as tf
from alexnet import alexnet
from datagenerator import ImageDataGenerator
from datetime import datetime
import glob
from tensorflow.data import Iterator
def main():
#超参数
learning_rate = 1e-3
num_epochs = 1
train_batch_size = 8
dropout_rate = 0.5
num_classes = 2
#format_size = [120,120]
display_step = 20
filewriter_path = './tsboard/'
checkpoint_path = './checkpoints/'
file_name_of_class = ['cat','dog']
image_format = "jpg"
train_dataset_paths="/home/dataset/kaggle/train/"
#训练数据预处理
train_image_paths = []
train_labels = []
train_image_paths = np.array(glob.glob(train_dataset_paths+'*.'+image_format)).tolist()
print("train_image_length:",len(train_image_paths))
for image_path in train_image_paths:
image_file_name = image_path.split('/')[-1]
for i in range(num_classes):
if file_name_of_class[i] in image_file_name:
train_labels.append(i)
break
#调用生成器
train_data = ImageDataGenerator(
images = train_image_paths,
labels = train_labels,
batch_size = train_batch_size,
num_classes = num_classes,
image_format = image_format,
shuffle = True)
#定义迭代器
print(train_data.data.output_types,train_data.data.output_shapes)
with tf.name_scope("input"):
train_iterator = Iterator.from_structure(train_data.data.output_types,train_data.data.output_shapes)
training_initalizer = train_iterator.make_initializer(train_data.data)
train_next_batch = train_iterator.get_next()
x = tf.placeholder(tf.float32,[None,227,227,3])
y = tf.placeholder(tf.float32,[None,num_classes])
keep_prob = tf.placeholder(tf.float32)
#定义alexnet网络
logits = alexnet(x,keep_prob,num_classes)
#定义vgg网络
#x = tf.image.resize_images(x,format_size)
#logits = vgg16(x,num_classes,isTrain=True,keep_prob=0.6)
with tf.name_scope("loss"):
loss_op = tf.reduce_mean(tf.nn.softmax_cross_entropy_with_logits_v2(logits=logits,labels=y))
with tf.name_scope('optimizer'):
optimizer = tf.train.AdamOptimizer(learning_rate=learning_rate)
train_op = optimizer.minimize(loss_op)
train_prediction = tf.nn.softmax(logits)
init = tf.global_variables_initializer()
'''
#tensorboard
tf.summary.scalar('loss',loss_op)
merged_summary = tf.summary.merge_all()
writer = tf.summary.FileWriter(filewriter_path)
'''
saver = tf.train.Saver()
train_batches_per_epoch = int(np.floor(train_data.data_size/train_batch_size))
print(train_data.data_size)
print(train_batches_per_epoch)
with tf.Session() as sess:
sess.run(init)
#writer.add_graph(sess.graph)
print("{}: start training...".format(datetime.now()))
print("{}: openning tensorboard at --logdir{}".format(datetime.now(),filewriter_path))
for epoch in range(num_epochs):
sess.run(training_initalizer)
print("{}:epoch number:{} start".format(datetime.now(),epoch+1))
for step in range(500):
img_batch,label_batch = sess.run(train_next_batch)
loss,_,predictions = sess.run([loss_op,train_op,train_prediction],feed_dict={x:img_batch,y:label_batch,keep_prob:dropout_rate})
if step % display_step == 0:
print("{}:loss={}".format(datetime.now(),loss))
print("accuracy = {}".format(accuracy(predictions,label_batch)))
#s = sess.run(merged_summary,feed_dict={x:img_batch,y:label_batch,keep_prob:1.})
#writer.add_summary(s,epoch*train_batches_per_epoch+step)
#save model
print("{}:saving checkpoint of model...".format(datetime.now()))
checkpoint_name = os.path.join(checkpoint_path,'model_epoch' + str(epoch+1)+'.ckpt')
save_path = saver.save(sess,checkpoint_name)
print("{}:epoch number:{} end".format(datetime.now(),epoch+1))
def accuracy(predictions,labels):
return 100.0*np.sum(np.argmax(predictions,1)==np.argmax(labels,1))/predictions.shape[0]
if __name__ == "__main__":
main()
validate_image.py
import tensorflow as tf
from VGG16_model import vgg16
from alexnet import alexnet
import matplotlib.pyplot as plt
class_name = ['cat','dog']
def test_image(path_image,num_class):
img_string = tf.read_file(path_image)
img_decoded = tf.image.decode_png(img_string,channels=3)
img_resized = tf.image.resize_images(img_decoded,[120,120])
img_resized = tf.reshape(img_resized,shape=[1,120,120,3])
fc8 = alexnet(img_resized,1,2)
#vgg = vgg16(img_resized,num_class,return_all=True)
#score = tf.nn.softmax(vgg[-3])
score = tf.nn.softmax(fc8)
print("score:",score)
max = tf.argmax(score,1)
saver = tf.train.Saver()
with tf.Session() as sess:
sess.run(tf.global_variables_initializer())
saver.restore(sess,'./checkpoints/model_epoch1.ckpt')
#print(sess.run(vgg))
#print("score",sess.run(score))
prob = sess.run(max)[0]
plt.imshow(img_decoded.eval())
plt.title("class:"+class_name[prob])
plt.show()
if __name__ == "__main__":
test_image("/home/dataset/kaggle/test1/3572.jpg",num_class=2)