来源
工业界行为识别:摔倒识别数据集收集技巧
- 使用手写AI的摔倒检测的关键点分类模型(gcn分类不如直接全连接)
- 修改了数据集的格式
- 关键点采用yolov11模型的关键点检测
- 数据集来源于手写AI
- 训练3个类别的精度不高,改成了只训练站立和摔倒两个类别
- 实验效果对于俯拍和非标准站立的误识别较高,猜测可以通过丰富数据集。
- 对于遮挡的识别率也很低
- 去掉yolov11识别的关键点分数作为训练数据,得到的结果和有关键点分数的差不多
数据集制作
- 使用yolov11模型检测画面中的人的关键点,yolov11使用的是coco的数据集,有17个关键点。
- 将关键点模型以及分数检测到的数据保存到csv文件中。
- 对于头部关键点只保留鼻子的关键点。
- 数据预处理(归一化):将关键点的坐标由全图映射到检测到人的框的相对坐标。x坐标除以人体框的宽度,y坐标除以人体框的宽度,再减去0.5,让坐标以人体为中心点。
import os
import csv
import random
from pydantic import BaseModel
from ultralytics import YOLO
header = [
'label', 'width', 'height', 'path',
# nose
'nose_x', 'nose_y', 'nose_score',
# left shoulder
'left_shoulder_x', 'left_shoulder_y', 'left_shoulder_score',
# right sholder
'right_shoulder_x', 'right_shoulder_y', 'right_shoulder_score',
# left elbow
'left_elbow_x', 'left_elbow_y', 'left_elbow_score',
# rigth elbow
'right_elbow_x', 'right_elbow_y', 'right_elbow_score',
# left wrist
'left_wrist_x', 'left_wrist_y', 'left_wrist_score',
# right wrist
'right_wrist_x', 'right_wrist_y', 'right_wrist_score',
# left hip
'left_hip_x', 'left_hip_y', 'left_hip_score',
# right hip
'right_hip_x', 'right_hip_y', 'right_hip_score',
# left knee
'left_knee_x', 'left_knee_y', 'left_knee_score',
# right knee
'right_knee_x', 'right_knee_y', 'right_knee_score',
# left ankle
'left_ankle_x', 'left_ankle_y', 'left_ankle_score',
# right ankle
'right_ankle_x', 'right_ankle_y', 'right_ankle_score'
]
class GetKeypoint(BaseModel):
NOSE: int = 0
LEFT_EYE: int = 1
RIGHT_EYE: int = 2
LEFT_EAR: int = 3
RIGHT_EAR: int = 4
LEFT_SHOULDER: int = 5
RIGHT_SHOULDER: int = 6
LEFT_ELBOW: int = 7
RIGHT_ELBOW: int = 8
LEFT_WRIST: int = 9
RIGHT_WRIST: int = 10
LEFT_HIP: int = 11
RIGHT_HIP: int = 12
LEFT_KNEE: int = 13
RIGHT_KNEE: int = 14
LEFT_ANKLE: int = 15
RIGHT_ANKLE: int = 16
get_keypoint = GetKeypoint()
def extract_keypoint(keypoint):
# nose
nose_x, nose_y, nose_score = keypoint[get_keypoint.NOSE]
# eye
left_eye_x, left_eye_y, left_eye_score = keypoint[get_keypoint.LEFT_EYE]
right_eye_x, right_eye_y, right_eye_score = keypoint[get_keypoint.RIGHT_EYE]
# ear
left_ear_x, left_ear_y, left_ear_score = keypoint[get_keypoint.LEFT_EAR]
right_ear_x, right_ear_y, right_ear_score = keypoint[get_keypoint.RIGHT_EAR]
# shoulder
left_shoulder_x, left_shoulder_y, left_shoulder_score = keypoint[get_keypoint.LEFT_SHOULDER]
right_shoulder_x, right_shoulder_y, right_shoulder_score = keypoint[get_keypoint.RIGHT_SHOULDER]
# elbow
left_elbow_x, left_elbow_y, left_elbow_score = keypoint[get_keypoint.LEFT_ELBOW]
right_elbow_x, right_elbow_y, right_elbow_score = keypoint[get_keypoint.RIGHT_ELBOW]
# wrist
left_wrist_x, left_wrist_y, left_wrist_score = keypoint[get_keypoint.LEFT_WRIST]
right_wrist_x, right_wrist_y, right_wrist_score = keypoint[get_keypoint.RIGHT_WRIST]
# hip
left_hip_x, left_hip_y, left_hip_score = keypoint[get_keypoint.LEFT_HIP]
right_hip_x, right_hip_y,right_hip_score = keypoint[get_keypoint.RIGHT_HIP]
# knee
left_knee_x, left_knee_y,left_knee_score = keypoint[get_keypoint.LEFT_KNEE]
right_knee_x, right_knee_y, right_knee_score = keypoint[get_keypoint.RIGHT_KNEE]
# ankle
left_ankle_x, left_ankle_y, left_ankle_score = keypoint[get_keypoint.LEFT_ANKLE]
right_ankle_x, right_ankle_y, right_ankle_score = keypoint[get_keypoint.RIGHT_ANKLE]
return [
nose_x, nose_y, nose_score,
left_shoulder_x, left_shoulder_y, left_shoulder_score,
right_shoulder_x, right_shoulder_y,right_shoulder_score,
left_elbow_x, left_elbow_y, left_elbow_score,
right_elbow_x, right_elbow_y, right_elbow_score,
left_wrist_x, left_wrist_y,left_wrist_score,
right_wrist_x, right_wrist_y,right_wrist_score,
left_hip_x, left_hip_y, left_hip_score,
right_hip_x, right_hip_y, right_hip_score,
left_knee_x, left_knee_y, left_knee_score,
right_knee_x, right_knee_y, right_knee_score,
left_ankle_x, left_ankle_y, left_ankle_score,
right_ankle_x, right_ankle_y, right_ankle_score
]
# 最好保证图片中只有一个人
def detect(root_path, poses = ["lying", "stand"]):
model = YOLO("yolo11s-pose.pt")
dataset_csv = []
for pose in poses:
image_path = os.path.join(root_path, pose)
# Predict with the model
image_list = os.listdir(image_path)
for im in image_list:
image = os.path.join(image_path, im)
results = model(image)
if len(results) == 0:continue
results.sort(key=lambda res: res.boxes.cpu().numpy().xywh[0][2] * res.boxes.cpu().numpy().xywh[0][3])
x1, y1, x2, y2 = results[-1].boxes.xyxy.cpu().numpy().tolist()[0]
x, y, w, h = x1, y1, x2 - x1, y2 - y1
mykeypoints = results[-1].keypoints.data.cpu().numpy().tolist()[0]
n_keypoints = [[(kp[0] - x) / w - 0.5, (kp[1] - y) / h - 0.5, kp[2]] if kp[0] > 0 and kp[1] > 0 else kp[:3] for kp in mykeypoints]
n_keypoints = extract_keypoint(n_keypoints)
n_keypoints.insert(0, pose)
n_keypoints.insert(1, w)
n_keypoints.insert(2, h)
n_keypoints.insert(3, image)
dataset_csv.append(n_keypoints)
return dataset_csv
if __name__ == "__main__":
root_path = "images/"
dataset_csv = detect(root_path)
random.shuffle(dataset_csv)
percent = 0.8
index = int(len(dataset_csv) * 0.8)
with open('train.csv', 'w', encoding='UTF8', newline='') as f:
writer = csv.writer(f)
# write the header
writer.writerow(header)
# write multiple rows
writer.writerows(dataset_csv[:index])
with open('val.csv', 'w', encoding='UTF8', newline='') as f:
writer = csv.writer(f)
# write the header
writer.writerow(header)
# write multiple rows
writer.writerows(dataset_csv[index:])
模型训练
import os
from tqdm import tqdm
import torch
import torch.nn as nn
import torch.optim
import torch.nn.functional as F
from torch.utils.data import DataLoader
import cv2
import numpy as np
import pandas as pd
class Model(nn.Module):
def __init__(self, key_points_size, nfeature, nclass):
super().__init__()
self.fc1 = nn.Linear(key_points_size * nfeature, 512)
self.fc2 = nn.Linear(512, nclass)
def forward(self, x):
x = x.view(-1, int(x.size(1) * x.size(2)))
x = F.relu(self.fc1(x))
x = F.dropout(x, 0.7, training=self.training)
return self.fc2(x)
# categories : {"lying" : 0, "stand" : 1}
class Dataset:
def __init__(self, csv_path, augment, categories):
df = pd.read_csv(csv_path)
self.augment = augment
df = np.array(df)
self.items = df
self.categories = categories
def __getitem__(self, index):
x = self.items[index][4:]
y = self.items[index][0]
y = self.categories[y]
x = np.array([x.copy()]).reshape(-1, 3).astype(float)
if self.augment:
if np.random.random() < 0.8:
value = np.random.random((len(x), 2)) * 0.2 - 0.1
x[:, :2] += value
if np.random.random() < 0.8:
mask = np.random.binomial(1, size=x.shape, p=0.2).astype(bool)
x[mask] = 0
return torch.FloatTensor(x), torch.tensor(y)
def __len__(self):
return len(self.items)
def test(model, test_loader):
model.eval()
with torch.no_grad():
correct = 0
for X, Y in test_loader:
p = model(X)
plabel = p.argmax(dim=1)
correct += (plabel == Y).sum()
accuracy = correct.item() / len(test_loader.dataset) * 100
return accuracy
categories = {"lying" : 0, "stand" : 1}
train_set = Dataset("train.csv", True, categories)
train_loader = DataLoader(train_set, batch_size=256, pin_memory=True, shuffle=True, num_workers=0)
val_set = Dataset("val.csv", True, categories)
val_loader = DataLoader(val_set, batch_size=256, pin_memory=True, shuffle=True, num_workers=0)
nepochs = 200
key_points_size = 13
future_size = 3
classes_num = 2
model = Model(key_points_size, future_size, classes_num)
loss_fn = nn.CrossEntropyLoss(weight=torch.FloatTensor([0.5, 1]))
optim = torch.optim.AdamW(model.parameters(), 1e-2)
best_accuracy = 0
for epoch in range(nepochs):
model.train()
for batch_index, (X, Y) in enumerate(train_loader):
p = model(X)
loss = loss_fn(p, Y)
optim.zero_grad()
loss.backward()
optim.step()
if epoch % 5 == 0:
acc = test(model, val_loader)
print(f"Accuracy : {acc }, loss = {loss.item()}")
if acc > best_accuracy:
torch.save(model.state_dict(), "best.pth")