猫狗数据集地址链接:https://pan.baidu.com/s/1l6slkXqBrk87aiybcEkJ_Q?pwd=39me
提取码:39me
本文采用CNN进行猫狗识别
一、导入包
我的tensorflow,keras版本是2.7.0
import os
import zipfile
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.metrics import confusion_matrix
from sklearn.metrics import ConfusionMatrixDisplay
import matplotlib.pyplot as plt
import seaborn as sns
from tensorflow.keras import layers
from tensorflow.keras import models
from tensorflow.keras import regularizers
from tensorflow.keras import optimizers
from tensorflow.keras.callbacks import EarlyStopping
from tensorflow.keras.preprocessing import image
from tensorflow.keras.preprocessing.image import ImageDataGenerator, load_img
二、获取标签
根据文件夹下的图片名称获取标签用dataframe保存
path = 'train/' ###图片放在所在根目录从所分享百度云中下载
filenames = os.listdir(path)
filenames[:5]
label = []
for filename in filenames:
if filename.split('.')[0] =='cat':
label.append('cat')
else:
label.append('dog')
df = pd.DataFrame({
'name':filenames,
'label':label
})
##查看两个标签的数量
print(df['label'].value_counts())
##查看图片样例
##load_img(path+'dog.1283.jpg')
三、建立模型
这里是卷积和池化过程
model = models.Sequential()
##通过卷积获取图像的局部特征 图像输入大小256*256*3
model.add(layers.Conv2D(32, (3, 3), activation='relu', input_shape=(256, 256, 3)))
###MAXPooling 以在保留基本信息的同时大大减小尺寸的输出图像
model.add(layers.MaxPooling2D((2, 2)))
model.add(layers.Conv2D(64, (3, 3), activation='relu'))
model.add(layers.MaxPooling2D((2, 2)))
model.add(layers.Conv2D(64, (3, 3), activation='relu'))
model.add(layers.MaxPooling2D((2, 2)))
model.add(layers.Conv2D(128, (3, 3), activation='relu'))
model.add(layers.MaxPooling2D((2, 2)))
model.add(layers.Conv2D(128, (3, 3), activation='relu'))
model.add(layers.MaxPooling2D((2, 2)))
model.add(layers.Flatten())
model.add(layers.Dense(512, activation='relu'))
model.add(layers.Dense(1, activation='sigmoid'))
model.compile(optimizer='Adam', loss='binary_crossentropy', metrics='acc')
4、拆分训练集, 验证集和测试集
train, test_val = train_test_split(df, test_size=0.2, stratify=df['label'], random_state=17)
test, val = train_test_split(test_val, test_size=0.5, stratify=test_val['label'], random_state=17)
print('train size:', train.shape[0],
'\nvalidation size:', val.shape[0],
'\ntest size:', test.shape[0],
)
print('train labels:\n',train['label'].value_counts(),
'\n\nvalidataion labels:\n',val['label'].value_counts(),
'\n\ntest labels:\n',test['label'].value_counts(),
sep='')
train_gen = ImageDataGenerator(rescale=1./255)
train_data = train_gen.flow_from_dataframe(train,
directory=path,
x_col='name',
y_col='label',
class_mode='binary',
seed=17
)
val_gen = ImageDataGenerator(rescale=1./255)
val_data = val_gen.flow_from_dataframe(val,
directory=path,
x_col='name',
y_col='label',
class_mode='binary',
seed=17
)
五、开始训练
history = model.fit(train_data,
validation_data = val_data,
epochs=10
)
##绘制曲线
loss = history.history['loss']
val_loss = history.history['val_loss']
plt.figure(figsize=(15,8))
plt.plot(loss, label='Train loss')
plt.plot(val_loss,'--', label='Val loss')
plt.title('Training and validation loss')
plt.xticks(np.arange(0,10))
plt.yticks(np.arange(0, 0.7, 0.05))
plt.grid()
plt.legend()
六、测试
##若新数据来了
test_gen = ImageDataGenerator(rescale=1./255)
test_data = test_gen.flow_from_dataframe(test,
directory=path,
x_col='name',
y_col='label',
class_mode='binary',
seed=17
)
###0表示cat 1表示dog
test_pred = model.predict(test_data)
pred_label = test_pred > 0.5
true_label = test_data.classes
model.evaluate(val_data)
##如果想测试一张
##若新数据来了
test_gen = ImageDataGenerator(rescale=1./255)
test_data = test_gen.flow_from_dataframe(test[0:1],
directory=path,
x_col='name',
y_col='label',
#class_mode='binary',
seed=17
)
###0表示cat 1表示dog
test_pred = model.predict(test_data)
pred_label = test_pred > 0.5
true_label = test_data.classes
model.evaluate(val_data)
完整代码如下
import os
import zipfile
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.metrics import confusion_matrix
from sklearn.metrics import ConfusionMatrixDisplay
import matplotlib.pyplot as plt
import seaborn as sns
from tensorflow.keras import layers
from tensorflow.keras import models
from tensorflow.keras import regularizers
from tensorflow.keras import optimizers
from tensorflow.keras.callbacks import EarlyStopping
from tensorflow.keras.preprocessing import image
from tensorflow.keras.preprocessing.image import ImageDataGenerator, load_img
path = 'train/' ###图片放在所在根目录从所分享百度云中下载
filenames = os.listdir(path)
filenames[:5]
label = []
for filename in filenames:
if filename.split('.')[0] =='cat':
label.append('cat')
else:
label.append('dog')
df = pd.DataFrame({
'name':filenames,
'label':label
})
##查看两个标签的数量
print(df['label'].value_counts())
##查看图片样例
##load_img(path+'dog.1283.jpg')
model = models.Sequential()
##通过卷积获取图像的局部特征 图像输入大小256*256*3
model.add(layers.Conv2D(32, (3, 3), activation='relu', input_shape=(256, 256, 3)))
###MAXPooling 以在保留基本信息的同时大大减小尺寸的输出图像
model.add(layers.MaxPooling2D((2, 2)))
model.add(layers.Conv2D(64, (3, 3), activation='relu'))
model.add(layers.MaxPooling2D((2, 2)))
model.add(layers.Conv2D(64, (3, 3), activation='relu'))
model.add(layers.MaxPooling2D((2, 2)))
model.add(layers.Conv2D(128, (3, 3), activation='relu'))
model.add(layers.MaxPooling2D((2, 2)))
model.add(layers.Conv2D(128, (3, 3), activation='relu'))
model.add(layers.MaxPooling2D((2, 2)))
model.add(layers.Flatten())
model.add(layers.Dense(512, activation='relu'))
model.add(layers.Dense(1, activation='sigmoid'))
model.compile(optimizer='Adam', loss='binary_crossentropy', metrics='acc')
train, test_val = train_test_split(df, test_size=0.2, stratify=df['label'], random_state=17)
test, val = train_test_split(test_val, test_size=0.5, stratify=test_val['label'], random_state=17)
print('train size:', train.shape[0],
'\nvalidation size:', val.shape[0],
'\ntest size:', test.shape[0],
)
print('train labels:\n',train['label'].value_counts(),
'\n\nvalidataion labels:\n',val['label'].value_counts(),
'\n\ntest labels:\n',test['label'].value_counts(),
sep='')
train_gen = ImageDataGenerator(rescale=1./255)
train_data = train_gen.flow_from_dataframe(train,
directory=path,
x_col='name',
y_col='label',
class_mode='binary',
seed=17
)
val_gen = ImageDataGenerator(rescale=1./255)
val_data = val_gen.flow_from_dataframe(val,
directory=path,
x_col='name',
y_col='label',
class_mode='binary',
seed=17
)
history = model.fit(train_data,
validation_data = val_data,
epochs=10
)
##绘制曲线
loss = history.history['loss']
val_loss = history.history['val_loss']
plt.figure(figsize=(15,8))
plt.plot(loss, label='Train loss')
plt.plot(val_loss,'--', label='Val loss')
plt.title('Training and validation loss')
plt.xticks(np.arange(0,10))
plt.yticks(np.arange(0, 0.7, 0.05))
plt.grid()
plt.legend()
##若新数据来了
test_gen = ImageDataGenerator(rescale=1./255)
test_data = test_gen.flow_from_dataframe(test,
directory=path,
x_col='name',
y_col='label',
class_mode='binary',
seed=17
)
###0表示cat 1表示dog
test_pred = model.predict(test_data)
pred_label = test_pred > 0.5
true_label = test_data.classes
model.evaluate(val_data)
##如果想测试一张
##若新数据来了
test_gen = ImageDataGenerator(rescale=1./255)
test_data = test_gen.flow_from_dataframe(test[0:1],
directory=path,
x_col='name',
y_col='label',
#class_mode='binary',
seed=17
)
###0表示cat 1表示dog
test_pred = model.predict(test_data)
pred_label = test_pred > 0.5
true_label = test_data.classes
model.evaluate(val_data)