# remove punc, segment and stopword
def punc_jieba(text, sep = ' '):
#     stopword = stopwords(["zh"])
    text_punc = re.sub("[\s+\>\<\:\?\.\!\/_,$%^*(+\"\']+|[+——!,。?、~@#¥%……&*()!,❤。~《》:()【】「」?”“;:、【】╮╯▽╰╭★→「」]+".encode().decode("utf8"),
    text_cut = sep.join(jieba.cut(text_punc, cut_all=False)).lower()
#     tokens = word_tokenize(text_cut)
#     clean_text = [word for word in tokens if not word in stopword]
    return text_cut
# mothod1
def stop_word(text):
    stopword = stopwords(['zh'])
    remove_stw = [word for word in text if not word in stopword]
    return remove_stw
df['text'] = df['text'].apply(stop_word)
# mothod2
stopword = stopwords(['zh'])
df['text'] = df['text'].apply(lambda x: ' '.join([word for word in x.split() if word not in (stopword)]))
downloadDownload PNG downloadDownload JPEG downloadDownload SVG

Tip: You can change the style, width & colours of the snippet with the inspect tool before clicking Download!

Click to optimize width for Twitter