本例使用上一节训练出来的模型,进行口罩检测。
上一节地址如下,
https://www.jianshu.com/p/0ac62d3750b1
先使用cv.FaceNet Detect出人脸位置,然后截取人脸位置的图像,送入MobileNetV2进行口罩检测。
cvFaceNet的模型文件和protodef文件,可以在如下地址下载,
model, https://gitlab.com/zhuge20100104/cpp_practice/-/blob/master/simple_learn/deep_learning/19_training_neural_network_with_keras2/res10_300x300_ssd_iter_140000.caffemodel?ref_type=heads
protodef,
https://gitlab.com/zhuge20100104/cpp_practice/-/blob/master/simple_learn/deep_learning/19_training_neural_network_with_keras2/deploy.prototxt.txt?ref_type=heads
预测视频可以在此处下载,当然也可以输出摄像头数据,因为本例使用的是docker环境,连接本机摄像头比较麻烦,所以使用视频数据输入流,
代码如下,
notebook 代码地址如下,
https://gitlab.com/zhuge20100104/cpp_practice/-/blob/master/simple_learn/deep_learning/19_training_neural_network_with_keras2/19.%20Training%20Neural%20Network%20with%20Keras%202.ipynb?ref_type=heads
# 1. 使用上一节训练的模型,进行detector
# 还需要使用OpenCV的一个FaceDetector的模型来检测人脸位置
# 引入库
# import the necessary packages
from tensorflow.keras.applications.mobilenet_v2 import preprocess_input
from tensorflow.keras.preprocessing.image import img_to_array
from tensorflow.keras.models import load_model
import numpy as np
import imutils
import time
import cv2
import os
def detect_and_predict_mask(frame, faceNet, maskNet):
# grab the dimensions of the frame and then construct a blob from it
(h, w) = frame.shape[:2]
blob = cv2.dnn.blobFromImage(frame, 1.0, (224, 224), (104.0, 177.0, 123.0))
# pass the blob through the network and obtain the face detections
faceNet.setInput(blob)
detections = faceNet.forward()
print(detections.shape)
# initialize our list of faces, their corresponding locations,
# and the list of predictions from our face mask network
faces = []
locs = []
preds = []
# loop over the detections
# n个7位的数组,
# 这7位中第2位 是 confidence
# 这7位中第 3, 4, 5, 6 位是 startX, startY, endX, endY的比例
# 根据这些值,查找confidence > 0.5的 人脸的位置
# 对人脸部分进行切图
# 然后对切图进行预测
for i in range(0, detections.shape[2]):
# extract the confidence (i.e., probability) associated with the detection
confidence = detections[0, 0, i, 2]
# filter out weak detections by ensuring the confidence is greater than
# the minimum confidence
if confidence > 0.5:
# compute the (x, y) coordinates of the bounding box for the object
box = detections[0, 0, i, 3:7] * np.array([w, h, w, h])
(startX, startY, endX, endY) = box.astype('int')
# ensuring the bounding boxes fall within the dimensions of the frame
(startX, startY) = (max(0, startX), max(0, startY))
(endX, endY) = (min(w-1, endX), min(h-1, endY))
# extract the face ROI, convert it from BGR to RGB channel,
# ordering, resize it to 224 * 224, and preprocessing it for face mask detect model
face = frame[startY: endY, startX: endX]
# face = cv2.cvtColor(face, cv2.COLOR_BGR2RGB)
face = cv2.resize(face, (224, 224))
face = img_to_array(face)
face = preprocess_input(face)
# add the face and bounding boxes to their respective lists
faces.append(face)
locs.append((startX, startY, endX, endY))
# only make predictions if at least one face was detected
if len(faces) > 0:
# for faster inferenece, we will make batch predictions on all
# faces at the same time rather than one-by-one predictions in the above 'for' loop
faces = np.array(faces, dtype='float32')
preds = maskNet.predict(faces, batch_size=32)
# return a 2-tuple of the face locations and their corresponding locations
return (locs, preds)
# load our searilized face detector model from disk
protoTxtPath = r'./deploy.prototxt.txt'
weightsPath = r'./res10_300x300_ssd_iter_140000.caffemodel'
faceNet = cv2.dnn.readNet(protoTxtPath, weightsPath)
# load the face mask detector model from disk
maskNet = load_model('./mask_detector.keras')
# 启动视频,使用模型开始检测,然后绘制到图片中
import imageio
from datetime import datetime
import matplotlib.pyplot as plt
input_video = 'masks'
video_reader = imageio.get_reader('{}.mp4'.format(input_video))
video_writer = imageio.get_writer('{}_annotated.mp4'.format(input_video), fps=10)
t0 = datetime.now()
n_frames = 0
for frame in video_reader:
n_frames += 1
# grab the frame from the video and resize it to have a maximum width of 400 pixels
frame = imutils.resize(frame, width=400)
# detect faces in the frame and determine if they are wearing a face mask or not
(locs, preds) = detect_and_predict_mask(frame, faceNet, maskNet)
# loop over the detected face locations and their corresponding locations
for (box, pred) in zip(locs, preds):
# unpack the bounding box and predictions
(startX, startY, endX, endY) = box
(mask, withoutMask) = pred
# detect the class label and color we'll use to draw the bounding box and text
label = 'Mask' if mask > withoutMask else 'No Mask'
color = (0, 255, 0) if label == 'Mask' else (0, 0, 255)
# include the probability in the label
label = '{}: {:.2f}%'.format(label, max(mask, withoutMask) * 100)
cv2.putText(frame, label, (startX, startY+20), cv2.FONT_HERSHEY_SIMPLEX, 0.45, color, 2)
cv2.rectangle(frame, (startX, startY), (endX, endY), color, 2)
video_writer.append_data(frame)
fps = n_frames/(datetime.now() - t0).total_seconds()
print('Frames processed: {}, speed: {} fps'.format(n_frames, fps))
video_reader.close()
video_writer.close()
效果如下,