from pymongo import MongoClient from vaderSentiment.vaderSentiment import SentimentIntensityAnalyzer import statistics from deep_translator import GoogleTranslator from nltk.stem import WordNetLemmatizer import matplotlib.pyplot as plt # Initialize MongoDB client and collection mongo_uri = "mongodb://localhost:27017/" database_name = "twitter_database" collection_name = "tweet_cleaned" client = MongoClient(mongo_uri, serverSelectionTimeoutMS=5000) db = client[database_name] collection = db[collection_name] # Initialize sentiment analyzer analyzer = SentimentIntensityAnalyzer() # Initialize WordNet Lemmatizer lemmatizer = WordNetLemmatizer() # Function to clean text by lemmatizing def clean_text(text): # Lemmatize the entire text lemmatized_text = lemmatizer.lemmatize(text) return lemmatized_text.strip() # Function to translate text to English def translate_to_english(text, lang): translated_text = GoogleTranslator(source='auto', target='en').translate(text) return translated_text # Function to analyze sentiment def analyze_sentiment(text): sentiment = analyzer.polarity_scores(text) compound_score = sentiment['compound'] return compound_score # Initialize lists for sentiment scores positive_scores = [] negative_scores = [] # Iterate through tweets in MongoDB collection cursor = collection.find().limit(100) for doc in cursor: lang = doc.get("json_data.lang") tweet_text = doc.get('json_data.text') if lang != 'en': translated_text = translate_to_english(tweet_text, lang) else: translated_text = tweet_text # Clean and lemmatize text cleaned_text = clean_text(translated_text) # Perform sentiment analysis sentiment = analyze_sentiment(cleaned_text) # Categorize sentiment scores if sentiment > 0: positive_scores.append(sentiment) else: negative_scores.append(sentiment) # Close MongoDB client client.close() plt.figure(figsize=(10, 6)) # Box plot for positive scores plt.boxplot(positive_scores, positions=[1], widths=0.6, patch_artist=True, boxprops=dict(facecolor='lightgreen'), medianprops=dict(color='darkgreen'), showfliers=True) # Box plot for negative scores plt.boxplot(negative_scores, positions=[2], widths=0.6, patch_artist=True, boxprops=dict(facecolor='lightcoral'), medianprops=dict(color='darkred'), showfliers=True) # Plot details plt.title('Sentiment Analysis of Tweets') plt.ylabel('Compound Sentiment Score') plt.xticks([1, 2], ['Positive', 'Negative']) plt.grid(True) plt.tight_layout() # Show plot plt.show()
Preview:
downloadDownload PNG
downloadDownload JPEG
downloadDownload SVG
Tip: You can change the style, width & colours of the snippet with the inspect tool before clicking Download!
Click to optimize width for Twitter