from deep_translator import GoogleTranslator from pymongo import MongoClient import matplotlib.pyplot as plt from wordcloud import WordCloud from nltk.corpus import stopwords from nltk.tokenize import word_tokenize import string # MongoDB connection details mongo_uri = "mongodb://localhost:27017/" database_name = "twitter_database" collection_name = "final" # Connect to MongoDB client = MongoClient(mongo_uri) db = client[database_name] collection = db[collection_name] # Function to translate text to English def translate_to_english(text, lang): if lang != 'en': translated_text = GoogleTranslator(source='auto', target='en').translate(text) else: translated_text = text return translated_text # Fetch text data from MongoDB texts = [] cursor = collection.find().limit(100) # Initialize stopwords stop_words = set(stopwords.words('english')) # Process each document fetched from MongoDB all_text = "" for doc in cursor: text = doc.get("json_data.text") lang = doc.get("json_data.lang") text = translate_to_english(text, lang) # Tokenize the text tokens = word_tokenize(text) # Remove punctuation and stopwords tokens = [word.lower() for word in tokens if word.isalpha() and word.lower() not in stop_words] # Join tokens into a single string processed_text = " ".join(tokens) # Accumulate all text for generating word cloud all_text += processed_text + " " # Generate the word cloud wordcloud = WordCloud(width=800, height=400, background_color='white').generate(all_text) # Plotting the word cloud plt.figure(figsize=(10, 5)) plt.imshow(wordcloud, interpolation='bilinear') plt.axis('off') plt.title('Word Cloud of Text Field in Tweets') plt.show()
Preview:
downloadDownload PNG
downloadDownload JPEG
downloadDownload SVG
Tip: You can change the style, width & colours of the snippet with the inspect tool before clicking Download!
Click to optimize width for Twitter