#You can do it using GloVe library:
#Install it:
!pip install glove_python
from glove import Corpus, Glove
#Creating a corpus object
corpus = Corpus()
#Training the corpus to generate the co-occurrence matrix which is used in GloVe
corpus.fit(lines, window=10)
glove = Glove(no_components=5, learning_rate=0.05)
glove.fit(corpus.matrix, epochs=30, no_threads=4, verbose=True)
glove.add_dictionary(corpus.dictionary)
glove.save('glove.model')
Save
#for Fasttext
from gensim.models import FastText
from gensim.test.utils import common_texts # some example sentences
>>>
print(common_texts[0])
['human', 'interface', 'computer']
print(len(common_texts))
9
model = FastText(vector_size=4, window=3, min_count=1) # instantiate
model.build_vocab(sentences=common_texts)
model.train(sentences=common_texts, total_examples=len(common_texts), epochs=10) # train
model2 = FastText(vector_size=4, window=3, min_count=1, sentences=common_texts, epochs=10)
import numpy as np
>>>
np.allclose(model.wv['computer'], model2.wv['computer'])
True
from gensim.test.utils import datapath
>>>
corpus_file = datapath('lee_background.cor') # absolute path to corpus
model3 = FastText(vector_size=4, window=3, min_count=1)
model3.build_vocab(corpus_file=corpus_file) # scan over corpus to build the vocabulary
>>>
total_words = model3.corpus_total_words # number of words in the corpus
model3.train(corpus_file=corpus_file, total_words=total_words, epochs=5)
from gensim.utils import tokenize
from gensim import utils
>>>
>>>
class MyIter:
def __iter__(self):
path = datapath('crime-and-punishment.txt')
with utils.open(path, 'r', encoding='utf-8') as fin:
for line in fin:
yield list(tokenize(line))
>>>
>>>
model4 = FastText(vector_size=4, window=3, min_count=1)
model4.build_vocab(sentences=MyIter())
total_examples = model4.corpus_count
model4.train(sentences=MyIter(), total_examples=total_examples, epochs=5)
from gensim.test.utils import get_tmpfile
>>>
fname = get_tmpfile("fasttext.model")
>>>
model.save(fname)
model = FastText.load(fname)
# https://radimrehurek.com/gensim/models/fasttext.html
Comments