之前写的学习笔记1用两种方法进行预测,今天发现有点不对。下面进行分析总结:
先来看看Classifier的源代码
#!/usr/bin/env python
"""
Classifier is an image classifier specialization of Net.
"""
import numpy as np
import caffe
class Classifier(caffe.Net):
"""
Classifier extends Net for image class prediction
by scaling, center cropping, or oversampling.
Parameters
----------
image_dims : dimensions to scale input for cropping/sampling.
Default is to scale to net input size for whole-image crop.
mean, input_scale, raw_scale, channel_swap: params for
preprocessing options.
"""
def __init__(self, model_file, pretrained_file, image_dims=None,
mean=None, input_scale=None, raw_scale=None,
channel_swap=None):
caffe.Net.__init__(self, model_file, pretrained_file, caffe.TEST)
# configure pre-processing
in_ = self.inputs[0]
self.transformer = caffe.io.Transformer(
{in_: self.blobs[in_].data.shape})
self.transformer.set_transpose(in_, (2, 0, 1))
if mean is not None:
self.transformer.set_mean(in_, mean)
if input_scale is not None:
self.transformer.set_input_scale(in_, input_scale)
if raw_scale is not None:
self.transformer.set_raw_scale(in_, raw_scale)
if channel_swap is not None:
self.transformer.set_channel_swap(in_, channel_swap)
self.crop_dims = np.array(self.blobs[in_].data.shape[2:])
if not image_dims:
image_dims = self.crop_dims
self.image_dims = image_dims
def predict(self, inputs, oversample=True):
"""
Predict classification probabilities of inputs.
Parameters
----------
inputs : iterable of (H x W x K) input ndarrays.
oversample : boolean
average predictions across center, corners, and mirrors
when True (default). Center-only prediction when False.
Returns
-------
predictions: (N x C) ndarray of class probabilities for N images and C
classes.
"""
# Scale to standardize input dimensions.
input_ = np.zeros((len(inputs),
self.image_dims[0],
self.image_dims[1],
inputs[0].shape[2]),
dtype=np.float32)
for ix, in_ in enumerate(inputs):
input_[ix] = caffe.io.resize_image(in_, self.image_dims)
if oversample:
# Generate center, corner, and mirrored crops.
input_ = caffe.io.oversample(input_, self.crop_dims)
else:
# Take center crop.
center = np.array(self.image_dims) / 2.0
crop = np.tile(center, (1, 2))[0] + np.concatenate([
-self.crop_dims / 2.0,
self.crop_dims / 2.0
])
crop = crop.astype(int)
input_ = input_[:, crop[0]:crop[2], crop[1]:crop[3], :]
# Classify
caffe_in = np.zeros(np.array(input_.shape)[[0, 3, 1, 2]],
dtype=np.float32)
for ix, in_ in enumerate(input_):
caffe_in[ix] = self.transformer.preprocess(self.inputs[0], in_)
out = self.forward_all(**{self.inputs[0]: caffe_in})
predictions = out[self.outputs[0]]
# For oversampling, average predictions across crops.
if oversample:
predictions = predictions.reshape((len(predictions) / 10, 10, -1))
predictions = predictions.mean(1)
return predictions
其中
def __init__(self, model_file, pretrained_file, image_dims=None,
mean=None, input_scale=None, raw_scale=None,
channel_swap=None):
caffe.Net.__init__(self, model_file, pretrained_file, caffe.TEST)
说明Classifier调用了caffe.Net。之前所说用两种方法其实是错误的,应该是将Classifier中的参数用代码重新初始化。也就是说,你以后用这个class会很方便,直接省去图片的初始化。往后我将统一使用Classifier!
再看caffe.io.load_image()
def load_image(filename, color=True):
"""
Load an image converting from grayscale or alpha as needed.
Parameters
----------
filename : string
color : boolean
flag for color format. True (default) loads as RGB while False
loads as intensity (if image is already grayscale).
Returns
-------
image : an image with type np.float32 in range [0, 1]
of size (H x W x 3) in RGB or
of size (H x W x 1) in grayscale.
"""
看出,caffe.io.load_image()是可以读灰度图的!!!将学习笔记3中的predict_plus.py中改成input_image=caffe.io.load_image(img,color=False)
实验成功!!!
注意:
Predict classification probabilities of inputs.
Parameters
----------
inputs : iterable of (H x W x K) input ndarrays.
oversample : boolean
average predictions across center, corners, and mirrors
when True (default). Center-only prediction when False.
Returns
-------
predictions: (N x C) ndarray of class probabilities for N images and C
predict函数中的input是一幅图片(W,H,K),后面的那个布尔值代表的是是否需要对图片crop,也就是1幅图片是否需要转化到10张图片,四个角+一个中心,水平翻转之后再来一次,总计10张。否则我们仅仅需要中心的那张就可以。返回的是一张N*C的numpy.ndarry。
对代码总结一下,如果不使用Classifier,则
import numpy as np
import sys,os
# 设置当前的工作环境在caffe下
caffe_root = '/home/xxx/caffe/'
# 我们也把caffe/python也添加到当前环境
sys.path.insert(0, caffe_root + 'python')
import caffe
os.chdir(caffe_root)#更换工作目录
# 设置网络结构
net_file=caffe_root + 'models/bvlc_reference_caffenet/deploy.prototxt'
# 添加训练之后的参数
caffe_model=caffe_root + 'models/bvlc_reference_caffenet/bvlc_reference_caffenet.caffemodel'
# 均值文件
mean_file=caffe_root + 'python/caffe/imagenet/ilsvrc_2012_mean.npy'
# 这里对任何一个程序都是通用的,就是处理图片
# 把上面添加的两个变量都作为参数构造一个Net
net = caffe.Net(net_file,caffe_model,caffe.TEST)
# 得到data的形状,这里的图片是默认matplotlib底层加载的
transformer = caffe.io.Transformer({'data': net.blobs['data'].data.shape})
# matplotlib加载的image是像素[0-1],图片的数据格式[weight,high,channels],RGB
# caffe加载的图片需要的是[0-255]像素,数据格式[channels,weight,high],BGR,那么就需要转换
# channel 放到前面
transformer.set_transpose('data', (2,0,1))
transformer.set_mean('data', np.load(mean_file).mean(1).mean(1))
# 图片像素放大到[0-255]
transformer.set_raw_scale('data', 255)
# RGB-->BGR 转换
transformer.set_channel_swap('data', (2,1,0))
# 这里才是加载图片
im=caffe.io.load_image(caffe_root+'examples/images/cat.jpg')
# 用上面的transformer.preprocess来处理刚刚加载图片
net.blobs['data'].data[...] = transformer.preprocess('data',im)
#注意,网络开始向前传播啦
out = net.forward()
# 最终的结果: 当前这个图片的属于哪个物体的概率(列表表示)
output_prob = output['prob'][0]
# 找出最大的那个概率
print 'predicted class is:', output_prob.argmax()
# 也可以找出前五名的概率
top_inds = output_prob.argsort()[::-1][:5]
print 'probabilities and labels:'
zip(output_prob[top_inds], labels[top_inds])
# 最后加载数据集进行验证
imagenet_labels_filename = caffe_root + 'data/ilsvrc12/synset_words.txt'
labels = np.loadtxt(imagenet_labels_filename, str, delimiter='\t')
top_k = net.blobs['prob'].data[0].flatten().argsort()[-1:-6:-1]
for i in np.arange(top_k.size):
print top_k[i], labels[top_k[i]]
对于使用Classifier,则
# coding=utf-8
import os
import numpy as np
from matplotlib import pyplot as plt
import cv2
import shutil
import time
#因为RGB和BGR需要调换一下才能显示
def showimage(im):
if im.ndim == 3:
im = im[:, :, ::-1]
plt.set_cmap('jet')
plt.imshow(im)
plt.show()
#特征可视化显示,padval用于调整亮度
def vis_square(data, padsize=1, padval=0):
data -= data.min()
data /= data.max()
#因为我们要把某一层的特征图都显示到一个figure上,因此需要计算每个图片占用figure多少比例,以及绘制的位置
n = int(np.ceil(np.sqrt(data.shape[0])))
padding = ((0, n ** 2 - data.shape[0]), (0, padsize), (0, padsize)) + ((0, 0),) * (data.ndim - 3)
data = np.pad(data, padding, mode='constant', constant_values=(padval, padval))
# tile the filters into an image
data = data.reshape((n, n) + data.shape[1:]).transpose((0, 2, 1, 3) + tuple(range(4, data.ndim + 1)))
data = data.reshape((n * data.shape[1], n * data.shape[3]) + data.shape[4:])
showimage(data)
#设置caffe源码所在的路径
caffe_root = '../../../caffe/'
import sys
sys.path.insert(0, caffe_root + 'python')
import caffe
#加载均值文件
mean_filename='./imagenet_mean.binaryproto'
proto_data = open(mean_filename, "rb").read()
a = caffe.io.caffe_pb2.BlobProto.FromString(proto_data)
mean = caffe.io.blobproto_to_array(a)[0]
#创建网络,并加载已经训练好的模型文件
gender_net_pretrained='./caffenet_train_iter_1500.caffemodel'
gender_net_model_file='./deploy_gender.prototxt'
gender_net = caffe.Classifier(gender_net_model_file, gender_net_pretrained,mean=mean,
channel_swap=(2,1,0),#RGB-->BGR
raw_scale=255,#把图片归一化到0~1之间
image_dims=(256, 256))#设置输入图片的大小
#预测分类及其可特征视化
gender_list=['Male','Female']
input_image = caffe.io.load_image('1.jpg')#读取图片
prediction_gender=gender_net.predict([input_image])#预测图片性别
#打印我们训练每一层的参数形状
print 'params:'
for k, v in gender_net.params.items():
print 'weight:'
print (k, v[0].data.shape)#在每一层的参数blob中,caffe用vector存储了两个blob变量,用v[0]表示weight
print 'b:'
print (k, v[1].data.shape)#用v[1]表示偏置参数
#conv1滤波器可视化
filters = gender_net.params['conv1'][0].data
vis_square(filters.transpose(0, 2, 3, 1))
#conv2滤波器可视化
'''''filters = gender_net.params['conv2'][0].data
vis_square(filters[:48].reshape(48**2, 5, 5))'''
#特征图
print 'feature maps:'
for k, v in gender_net.blobs.items():
print (k, v.data.shape);
feat = gender_net.blobs[k].data[0,0:4]#显示名字为k的网络层,第一张图片所生成的4张feature maps
vis_square(feat, padval=1)
#显示原图片,以及分类预测结果
str_gender=gender_list[prediction_gender[0].argmax()]
print str_gender
plt.imshow(input_image)
plt.title(str_gender)
plt.show()
blob格式为[num,ch,w,h]
使用opencv的imread()读取图像,必须转为浮点型以保存细节
图像储存顺序:BGR
cv2.imread(img,cv2.IMREAD_COLOR).astype(np.float32)
输出维度为[W,H,CH]
cv2.imread(img,cv2.IMREAD_GRAYSCALE).astype(np.float32)
输出维度为[W,H]
因此需要
input_image = input_image[:, :, np.newaxis]
变为[W,H,1]