1、tf 版本
# 二分类
def binary_focal_loss(gamma=2, alpha=0.25):
alpha = tf.constant(alpha, dtype=tf.float32)
gamma = tf.constant(gamma, dtype=tf.float32)
def focal_loss_sigmoid(y_true, y_pred):
labels = tf.cast(y_true, tf.float32)
L=-labels*(1-alpha)*((1-y_pred)*gamma)*K.log(y_pred)-\
(1-labels)*alpha*(y_pred**gamma)*K.log(1-y_pred)
return L
return focal_loss_sigmoid
2、torch 版本
class FocalLoss(nn.Module):
def __init__(self, gamma = 2, alpha = 1, size_average = True):
super(FocalLoss, self).__init__()
self.gamma = gamma
self.alpha = alpha
self.size_average = size_average
self.elipson = 0.000001
def forward(self, logits, labels):
"""
cal culates loss
logits: batch_size * labels_length * seq_length
labels: batch_size * seq_length
"""
if labels.dim() > 2:
labels = labels.contiguous().view(labels.size(0), labels.size(1), -1)
labels = labels.transpose(1, 2)
labels = labels.contiguous().view(-1, labels.size(2)).squeeze()
if logits.dim() > 3:
logits = logits.contiguous().view(logits.size(0), logits.size(1), logits.size(2), -1)
logits = logits.transpose(2, 3)
logits = logits.contiguous().view(-1, logits.size(1), logits.size(3)).squeeze()
assert(logits.size(0) == labels.size(0))
assert(logits.size(2) == labels.size(1))
batch_size = logits.size(0)
labels_length = logits.size(1)
seq_length = logits.size(2)
# transpose labels into labels onehot
new_label = labels.unsqueeze(1)
label_onehot = torch.zeros([batch_size, labels_length, seq_length]).scatter_(1, new_label, 1)
# calculate log
log_p = F.log_softmax(logits)
pt = label_onehot * log_p
sub_pt = 1 - pt
fl = -self.alpha * (sub_pt)**self.gamma * log_p
if self.size_average:
return fl.mean()
else:
return fl.sum()