Source code for sentiment_classifier.nlp.utils

import io
import numpy as np


[docs]def load_word_vectors(filepath, word_index, vector_size):
    """ Load word embeddings from a file.

    Args:
        filepath (str): path to the embedding file
        word_index (dict): word indices from the keras Tokenizer
        vector_size (int): embedding dimension, must match the \
            trained word vectors

    Returns:
        embedding_matrix (np.ndarray): a matrix of size \
            (len(word_index) * vector_size) that assigns each word \
            to its learned embedding.

    """
    embedding_matrix = np.zeros((len(word_index) + 1, vector_size))

    fin = io.open(filepath, "r", encoding="utf-8", newline="\n", errors="ignore")
    n, d = map(int, fin.readline().split())

    for line in fin:
        tokens = line.rstrip().split(" ")
        if tokens[0] in word_index:
            w = word_index[tokens[0]]
            embedding_matrix[w] = np.fromiter(map(float, tokens[1:]), "float")

    return embedding_matrix