正向最大匹配法

class MM(object):
   def __init__(self,dic_path):
       self.dictionary=set()
       self.maximum=0
       #读取词典
       with open(dic_path,'r',encoding='utf-8') as f:
           for line in f:
               line=line.strip()
               if not line:
                   continue
               self.dictionary.add(line)
               if self.maximum0:
            word=None
            for size in range(self.maximum,0,-1):
                print(size)
                if index-size<0:
                    continue
                piece = text[n:n+size]
                print('piece',piece)
                if piece in self.dictionary:
                    word=piece
                    result.append(word)
                    index-=size
                    print('ooooop',index)
                    n+=size
                    break
            if word is None:
                n+=1
                index-=1
        return result[::]
def main():
    text="南京市长江大桥"
    t=MM(r'C:\Users\ljy\Desktop\learning-nlp-master\chapter-3\data\imm_dic.utf8')
    print(len(t.cut(text)))
main()

 

上一篇:Python 数据类型字典(Dictionary)


下一篇:36进制转换10进制