Snippets Collections
### utlity function for pre-processing the text
import spacy

# load english language model and create nlp object from it
nlp = spacy.load("en_core_web_sm") 

def preprocess(text):
    # remove stop words and lemmatize the text
    doc = nlp(text)
    filtered_tokens = []
    for token in doc:
        if token.is_stop or token.is_punct:
            continue
        filtered_tokens.append(token.lemma_)
    
    return " ".join(filtered_tokens) 

df['preprocessed_txt'] = df['Text'].apply(preprocess)
star

Mon Sep 05 2022 09:41:21 GMT+0000 (Coordinated Universal Time)

#python #spacy #nlp #preprocessing

Save snippets that work with our extensions

Available in the Chrome Web Store Get Firefox Add-on Get VS Code extension