场景图生成任务中关于数据标注的多重性问题

问题定义:数据标注的多重性(同一对对象有多个不同的关系标签)会对视觉关系检测任务的模型训练带来多种影响(数据噪声增加、预测不问题、长尾分布问题(细粒度关系在数据集中出现频率低,导致模型难以学习)),可能导致模型误判、过拟合、难以解释。

如何缓解标注多重性问题:关系去重、关系合并、多标签学习、层级关系建模(先预测粗粒度,再细化预测细粒度关系)

层级关系建模分为3个步骤:
(1)构建关系的层级结构(手动定义或者自动学习,使得每个关系都有粗粒度和细粒度的对应关系)
(2)两阶段模型训练:第一阶段训练一个分类器,先预测粗粒度关系。基于第一阶段的预测结果,再训练一个分类器来预测细粒度关系。
(3)结合层级损失,如层级交叉熵损失、层级一致性损失、层级距离损失。

层级关系建模案例分析
(1)构建层级关系结构:

# 定义层级关系结构
hierarchy = {
    "action": ["ride", "push", "hold"],
    "location": ["on", "beside"],
    "attribute": ["has", "is"]
}

# 将关系映射为索引
coarse_to_idx = {coarse: idx for idx, coarse in enumerate(hierarchy.keys())}
fine_to_idx = {fine: idx for coarse, fines in hierarchy.items() for idx, fine in enumerate(fines)}

# 示例:将关系对映射为索引
coarse_label = "action"
fine_label = "ride"
coarse_idx = coarse_to_idx[coarse_label]  # 粗粒度索引
fine_idx = fine_to_idx[fine_label]       # 细粒度索引

(2)两阶段模型训练
第一阶段:粗粒度分类

import torch
import torch.nn as nn
import torch.optim as optim

# 定义粗粒度分类模型
class CoarseClassifier(nn.Module):
    def __init__(self, input_dim, coarse_classes):
        super(CoarseClassifier, self).__init__()
        self.fc = nn.Linear(input_dim, coarse_classes)

    def forward(self, x):
        return self.fc(x)

# 训练粗粒度分类器
def train_coarse_classifier(model, dataloader, epochs=10, lr=0.001):
    optimizer = optim.Adam(model.parameters(), lr=lr)
    criterion = nn.CrossEntropyLoss()

    for epoch in range(epochs):
        for x, coarse_label in dataloader:
            optimizer.zero_grad()
            coarse_logits = model(x)
            loss = criterion(coarse_logits, coarse_label)
            loss.backward()
            optimizer.step()

第二阶段:细粒度分类

# 定义细粒度分类模型
class FineClassifier(nn.Module):
    def __init__(self, input_dim, fine_classes):
        super(FineClassifier, self).__init__()
        self.fc = nn.Linear(input_dim, fine_classes)

    def forward(self, x):
        return self.fc(x)

# 训练细粒度分类器
def train_fine_classifier(coarse_model, fine_models, dataloader, epochs=10, lr=0.001):
    optimizers = {coarse: optim.Adam(model.parameters(), lr=lr) for coarse, model in fine_models.items()}
    criterion = nn.CrossEntropyLoss()

    for epoch in range(epochs):
        for x, coarse_label, fine_label in dataloader:
            # 第一阶段:预测粗粒度类别
            with torch.no_grad():
                coarse_logits = coarse_model(x)
                coarse_pred = torch.argmax(coarse_logits, dim=-1)

            # 第二阶段:根据粗粒度类别选择对应的细粒度分类器
            for i, coarse in enumerate(hierarchy.keys()):
                mask = coarse_pred == i  # 筛选当前粗粒度类别的样本
                if mask.sum() > 0:  # 如果有属于该类别的样本
                    x_fine = x[mask]
                    fine_label_fine = fine_label[mask]

                    optimizer = optimizers[coarse]
                    model = fine_models[coarse]

                    optimizer.zero_grad()
                    fine_logits = model(x_fine)
                    loss = criterion(fine_logits, fine_label_fine)
                    loss.backward()
                    optimizer.step()
        print(f"Epoch {epoch + 1}/{epochs}, Loss: {loss.item()}")

# 示例:初始化细粒度分类器并训练
fine_models = {
    coarse: FineClassifier(input_dim, len(fines)) 
    for coarse, fines in hierarchy.items()
}

# 假设 dataloader_fine 是一个数据加载器,返回 (x, coarse_label, fine_label)
train_fine_classifier(coarse_model, fine_models, dataloader_fine)

示例数据加载器

from torch.utils.data import DataLoader, Dataset

class CustomDataset(Dataset):
    def __init__(self, data):
        self.data = data

    def __len__(self):
        return len(self.data)

    def __getitem__(self, idx):
        return self.data[idx]

# 示例数据
data = [
    (torch.randn(128), 0, 0),  # (x, coarse_label, fine_label)
    (torch.randn(128), 1, 1),
    (torch.randn(128), 2, 0),
    # 更多数据...
]

dataset = CustomDataset(data)
dataloader_coarse = DataLoader(dataset, batch_size=2, shuffle=True)
dataloader_fine = DataLoader(dataset, batch_size=2, shuffle=True)  

(3)结合层级损失

def hierarchical_consistency_loss(coarse_logits, fine_logits, coarse_label, fine_label, lambda_=0.1):
    ce_loss = nn.CrossEntropyLoss()
    kl_loss = nn.KLDivLoss()

    # 计算交叉熵损失
    coarse_loss = ce_loss(coarse_logits, coarse_label)
    fine_loss = ce_loss(fine_logits, fine_label)

    # 计算一致性损失(KL散度)
    coarse_probs = torch.softmax(coarse_logits, dim=-1)
    fine_probs = torch.softmax(fine_logits, dim=-1)
    consistency_loss = kl_loss(fine_probs.log(), coarse_probs)

    return coarse_loss + fine_loss + lambda_ * consistency_loss

def hierarchical_distance_loss(coarse_logits, fine_logits, coarse_label, fine_label, hierarchy, lambda_=0.1):
    ce_loss = nn.CrossEntropyLoss()
    mse_loss = nn.MSELoss()

    # 计算交叉熵损失
    coarse_loss = ce_loss(coarse_logits, coarse_label)
    fine_loss = ce_loss(fine_logits, fine_label)

    # 计算层级距离损失
    coarse_probs = torch.softmax(coarse_logits, dim=-1)
    fine_probs = torch.softmax(fine_logits, dim=-1)
    distance_loss = mse_loss(coarse_probs, fine_probs)

    return coarse_loss + fine_loss + lambda_ * distance_loss
©著作权归作者所有,转载或内容合作请联系作者
平台声明:文章内容(如有图片或视频亦包括在内)由作者上传并发布,文章内容仅代表作者本人观点,简书系信息发布平台,仅提供信息存储服务。

推荐阅读更多精彩内容