label smooth

标签平滑：
Label Smoothing（标签平滑）是一个经典的正则化方法，机器学习的样本中通常会存在少量错误标签，这些错误标签会影响到预测的效果。标签平滑采用如下思路解决这个问题：在训练时即假设标签可能存在错误，避免“过分”相信训练样本的标签。当目标函数为交叉熵时，这一思想有非常简单的实现：

# coding: UTF-8
import numpy as np
import torch
import torch.nn as nn
import torch.nn.functional as F

class CELoss(nn.Module):
    ''' Cross Entropy Loss with label smoothing '''
    def __init__(self, label_smooth=None, class_num=2):
        super().__init__()
        self.label_smooth = label_smooth
        self.class_num = class_num
 
    def forward(self, pred, target):
        ''' 
        Args:
            pred: prediction of model output    [N, M]
            target: ground truth of sampler [N]
        '''
        eps = 1e-12
        
        if self.label_smooth is not None:
            # cross entropy loss with label smoothing
            logprobs = F.log_softmax(pred, dim=1)   # softmax + log
            target = F.one_hot(target, self.class_num)  # 转换成one-hot
            
            # label smoothing
            # 实现 1
            # target = (1.0-self.label_smooth)*target + self.label_smooth/self.class_num    
            # 实现 2
            # implement 2
            target = torch.clamp(target.float(), min=self.label_smooth/(self.class_num-1), max=1.0-self.label_smooth)
            loss = -1*torch.sum(target*logprobs, 1)
        else:
            # standard cross entropy loss
            loss = -1.*pred.gather(1, target.unsqueeze(-1)) + torch.log(torch.exp(pred+eps).sum(dim=1))
        return loss.mean()

在训练过程中调用：

from CELoss import CELoss
loss2 = CELoss(label_smooth=0.05, class_num=2)  # 标签平滑
with torch.no_grad():
    for texts, labels in data_iter:
        outputs = model(texts) # [batch_size, num_class=2]
        loss = F.cross_entropy(outputs, labels)
        # loss=loss2(outputs, labels)

label smooth

推荐阅读更多精彩内容