问题定义:数据标注的多重性(同一对对象有多个不同的关系标签)会对视觉关系检测任务的模型训练带来多种影响(数据噪声增加、预测不问题、长尾分布问题(细粒度关系在数据集中出现频率低,导致模型难以学习)),可能导致模型误判、过拟合、难以解释。
如何缓解标注多重性问题:关系去重、关系合并、多标签学习、层级关系建模(先预测粗粒度,再细化预测细粒度关系)
层级关系建模分为3个步骤:
(1)构建关系的层级结构(手动定义或者自动学习,使得每个关系都有粗粒度和细粒度的对应关系)
(2)两阶段模型训练:第一阶段训练一个分类器,先预测粗粒度关系。基于第一阶段的预测结果,再训练一个分类器来预测细粒度关系。
(3)结合层级损失,如层级交叉熵损失、层级一致性损失、层级距离损失。
层级关系建模案例分析:
(1)构建层级关系结构:
# 定义层级关系结构
hierarchy = {
"action": ["ride", "push", "hold"],
"location": ["on", "beside"],
"attribute": ["has", "is"]
}
# 将关系映射为索引
coarse_to_idx = {coarse: idx for idx, coarse in enumerate(hierarchy.keys())}
fine_to_idx = {fine: idx for coarse, fines in hierarchy.items() for idx, fine in enumerate(fines)}
# 示例:将关系对映射为索引
coarse_label = "action"
fine_label = "ride"
coarse_idx = coarse_to_idx[coarse_label] # 粗粒度索引
fine_idx = fine_to_idx[fine_label] # 细粒度索引
(2)两阶段模型训练
第一阶段:粗粒度分类
import torch
import torch.nn as nn
import torch.optim as optim
# 定义粗粒度分类模型
class CoarseClassifier(nn.Module):
def __init__(self, input_dim, coarse_classes):
super(CoarseClassifier, self).__init__()
self.fc = nn.Linear(input_dim, coarse_classes)
def forward(self, x):
return self.fc(x)
# 训练粗粒度分类器
def train_coarse_classifier(model, dataloader, epochs=10, lr=0.001):
optimizer = optim.Adam(model.parameters(), lr=lr)
criterion = nn.CrossEntropyLoss()
for epoch in range(epochs):
for x, coarse_label in dataloader:
optimizer.zero_grad()
coarse_logits = model(x)
loss = criterion(coarse_logits, coarse_label)
loss.backward()
optimizer.step()
第二阶段:细粒度分类
# 定义细粒度分类模型
class FineClassifier(nn.Module):
def __init__(self, input_dim, fine_classes):
super(FineClassifier, self).__init__()
self.fc = nn.Linear(input_dim, fine_classes)
def forward(self, x):
return self.fc(x)
# 训练细粒度分类器
def train_fine_classifier(coarse_model, fine_models, dataloader, epochs=10, lr=0.001):
optimizers = {coarse: optim.Adam(model.parameters(), lr=lr) for coarse, model in fine_models.items()}
criterion = nn.CrossEntropyLoss()
for epoch in range(epochs):
for x, coarse_label, fine_label in dataloader:
# 第一阶段:预测粗粒度类别
with torch.no_grad():
coarse_logits = coarse_model(x)
coarse_pred = torch.argmax(coarse_logits, dim=-1)
# 第二阶段:根据粗粒度类别选择对应的细粒度分类器
for i, coarse in enumerate(hierarchy.keys()):
mask = coarse_pred == i # 筛选当前粗粒度类别的样本
if mask.sum() > 0: # 如果有属于该类别的样本
x_fine = x[mask]
fine_label_fine = fine_label[mask]
optimizer = optimizers[coarse]
model = fine_models[coarse]
optimizer.zero_grad()
fine_logits = model(x_fine)
loss = criterion(fine_logits, fine_label_fine)
loss.backward()
optimizer.step()
print(f"Epoch {epoch + 1}/{epochs}, Loss: {loss.item()}")
# 示例:初始化细粒度分类器并训练
fine_models = {
coarse: FineClassifier(input_dim, len(fines))
for coarse, fines in hierarchy.items()
}
# 假设 dataloader_fine 是一个数据加载器,返回 (x, coarse_label, fine_label)
train_fine_classifier(coarse_model, fine_models, dataloader_fine)
示例数据加载器
from torch.utils.data import DataLoader, Dataset
class CustomDataset(Dataset):
def __init__(self, data):
self.data = data
def __len__(self):
return len(self.data)
def __getitem__(self, idx):
return self.data[idx]
# 示例数据
data = [
(torch.randn(128), 0, 0), # (x, coarse_label, fine_label)
(torch.randn(128), 1, 1),
(torch.randn(128), 2, 0),
# 更多数据...
]
dataset = CustomDataset(data)
dataloader_coarse = DataLoader(dataset, batch_size=2, shuffle=True)
dataloader_fine = DataLoader(dataset, batch_size=2, shuffle=True)
(3)结合层级损失
def hierarchical_consistency_loss(coarse_logits, fine_logits, coarse_label, fine_label, lambda_=0.1):
ce_loss = nn.CrossEntropyLoss()
kl_loss = nn.KLDivLoss()
# 计算交叉熵损失
coarse_loss = ce_loss(coarse_logits, coarse_label)
fine_loss = ce_loss(fine_logits, fine_label)
# 计算一致性损失(KL散度)
coarse_probs = torch.softmax(coarse_logits, dim=-1)
fine_probs = torch.softmax(fine_logits, dim=-1)
consistency_loss = kl_loss(fine_probs.log(), coarse_probs)
return coarse_loss + fine_loss + lambda_ * consistency_loss
def hierarchical_distance_loss(coarse_logits, fine_logits, coarse_label, fine_label, hierarchy, lambda_=0.1):
ce_loss = nn.CrossEntropyLoss()
mse_loss = nn.MSELoss()
# 计算交叉熵损失
coarse_loss = ce_loss(coarse_logits, coarse_label)
fine_loss = ce_loss(fine_logits, fine_label)
# 计算层级距离损失
coarse_probs = torch.softmax(coarse_logits, dim=-1)
fine_probs = torch.softmax(fine_logits, dim=-1)
distance_loss = mse_loss(coarse_probs, fine_probs)
return coarse_loss + fine_loss + lambda_ * distance_loss