NER命名实体识别,实体级level的评估,精确率、召回率和F1值

 

pre = "0 0 B_SONG I_SONG I_SONG 0 B_SONG I_SONG I_SONG 0 0 B_SINGER I_SINGER I_SINGER 0 O O O B_ALBUM I_ALBUM I_ALBUM O O B_TAG I_TAG I_TAG O"
true = "0 0 B_SONG I_SONG I_SONG 0 0 0 0 0 0 B_SINGER I_SINGER I_SINGER 0 O O O B_ALBUM I_ALBUM I_ALBUM O O B_TAG I_TAG I_TAG O"

# x = x.split()
tags = [("B_SONG","I_SONG"),("B_SINGER","I_SINGER"),("B_ALBUM","I_ALBUM"),("B_TAG","I_TAG")]


def _find_tag(labels,B_label="B_SONG",I_label="I_SONG"):
    result = []
    if isinstance(labels,str):
        labels = labels.strip().split()
        labels = ["O" if label =="0" else label for label in labels]
        # print(labels)
    for num in range(len(labels)):
        if labels[num] == B_label:
            song_pos0 = num
        if labels[num] == I_label and labels[num-1] == B_label:
            lenth = 2
            for num2 in range(num,len(labels)):
                if labels[num2] == I_label and labels[num2-1] == I_label:
                    lenth += 1
                if labels[num2] == "O":
                    result.append((song_pos0,lenth))
                    break
    return result


def find_all_tag(labels):

    result = {}
    for tag in tags:
        res = _find_tag(labels,B_label=tag[0],I_label=tag[1])
        result[tag[0].split("_")[1]] = res
    return result


def precision(pre_labels,true_labels):
    '''
    :param pre_tags: list
    :param true_tags: list
    :return:
    '''
    pre = []
    if isinstance(pre_labels,str):
        pre_labels = pre_labels.strip().split()
        pre_labels = ["O" if label =="0" else label for label in pre_labels]
    if isinstance(true_labels,str):
        true_labels = true_labels.strip().split()
        true_labels = ["O" if label =="0" else label for label in true_labels]

    pre_result = find_all_tag(pre_labels)
    for name in pre_result:
        for x in pre_result[name]:
            if x:
                if pre_labels[x[0]:x[0]+x[1]] == true_labels[x[0]:x[0]+x[1]]:
                    pre.append(1)
                else:
                    pre.append(0)
    return sum(pre)/len(pre)




def recall(pre_labels,true_labels):
    '''
    :param pre_tags: list
    :param true_tags: list
    :return:
    '''
    recall = []
    if isinstance(pre_labels,str):
        pre_labels = pre_labels.strip().split()
        pre_labels = ["O" if label =="0" else label for label in pre_labels]
    if isinstance(true_labels,str):
        true_labels = true_labels.strip().split()
        true_labels = ["O" if label =="0" else label for label in true_labels]

    true_result = find_all_tag(true_labels)
    for name in true_result:
        for x in true_result[name]:
            if x:
                if pre_labels[x[0]:x[0]+x[1]] == true_labels[x[0]:x[0]+x[1]]:
                    recall.append(1)
                else:
                    recall.append(0)
    return sum(recall)/len(recall)


def f1_score(precision,recall):

    return (2*precision*recall)/(precision+recall)

if __name__ == '__main__':
    precision = precision(pre,true)
    recall = recall(pre,true)
    f1 = f1_score(precision,recall)
    print(precision)
    print(recall)
    print(f1)

  

上一篇:Spring Framework中常见的事务传播陷阱(译文)


下一篇:coco音乐站总体设计