加载词向量

import numpy as np
from numpy import dtype, fromstring, float32 as REAL
#fname=glove.refine.txt
class WordLoader(object):
    def load_word_vector(self, fname, binary=None):
        if binary == None:
            if fname.endswith('.txt'):
                binary = False
            elif fname.endswith('.bin'):
                binary = True
            else:
                raise NotImplementedError('Cannot infer binary from %s' % (fname))#抛出未实现异常

        vocab = {}
        with open(fname) as fin:
            header = fin.readline()
            vocab_size, vec_size = map(int, header.split())
            if binary:#如果是二进制文件
                binary_len = dtype(REAL).itemsize * vec_size#dtype(REAL).itemsize字节大小4
                for line_no in range(vocab_size):
                    try:
                        word = []
                        while True:
                            ch = fin.read(1)
                            if ch == ' ':
                                word = ''.join(word)
                                break
                            if ch != '\n':
                                word.append(ch)
                        vocab[word] = fromstring(fin.read(binary_len), dtype=REAL)
                    except:
                        pass
            else:
                for line_no, line in enumerate(fin):
                    try:
                        parts = line.strip().split(' ')
                        word, weights = parts[0], map(REAL, parts[1:])
                        vocab[word] = weights
                    except:
                        pass
        return vocab
上一篇:c++ 各种奇门鬼爪的构造函数 和 类的初始化


下一篇:运算符重载