BOXPLOT POSITIVE NEGATIVE
Tue Jun 18 2024 22:01:31 GMT+0000 (Coordinated Universal Time)
Saved by @madgakantara
from pymongo import MongoClient
from vaderSentiment.vaderSentiment import SentimentIntensityAnalyzer
import statistics
from deep_translator import GoogleTranslator
from nltk.stem import WordNetLemmatizer
import matplotlib.pyplot as plt
# Initialize MongoDB client and collection
mongo_uri = "mongodb://localhost:27017/"
database_name = "twitter_database"
collection_name = "tweet_cleaned"
client = MongoClient(mongo_uri, serverSelectionTimeoutMS=5000)
db = client[database_name]
collection = db[collection_name]
# Initialize sentiment analyzer
analyzer = SentimentIntensityAnalyzer()
# Initialize WordNet Lemmatizer
lemmatizer = WordNetLemmatizer()
# Function to clean text by lemmatizing
def clean_text(text):
# Lemmatize the entire text
lemmatized_text = lemmatizer.lemmatize(text)
return lemmatized_text.strip()
# Function to translate text to English
def translate_to_english(text, lang):
translated_text = GoogleTranslator(source='auto', target='en').translate(text)
return translated_text
# Function to analyze sentiment
def analyze_sentiment(text):
sentiment = analyzer.polarity_scores(text)
compound_score = sentiment['compound']
return compound_score
# Initialize lists for sentiment scores
positive_scores = []
negative_scores = []
# Iterate through tweets in MongoDB collection
cursor = collection.find().limit(100)
for doc in cursor:
lang = doc.get("json_data.lang")
tweet_text = doc.get('json_data.text')
if lang != 'en':
translated_text = translate_to_english(tweet_text, lang)
else:
translated_text = tweet_text
# Clean and lemmatize text
cleaned_text = clean_text(translated_text)
# Perform sentiment analysis
sentiment = analyze_sentiment(cleaned_text)
# Categorize sentiment scores
if sentiment > 0:
positive_scores.append(sentiment)
else:
negative_scores.append(sentiment)
# Close MongoDB client
client.close()
plt.figure(figsize=(10, 6))
# Box plot for positive scores
plt.boxplot(positive_scores, positions=[1], widths=0.6, patch_artist=True, boxprops=dict(facecolor='lightgreen'), medianprops=dict(color='darkgreen'), showfliers=True)
# Box plot for negative scores
plt.boxplot(negative_scores, positions=[2], widths=0.6, patch_artist=True, boxprops=dict(facecolor='lightcoral'), medianprops=dict(color='darkred'), showfliers=True)
# Plot details
plt.title('Sentiment Analysis of Tweets')
plt.ylabel('Compound Sentiment Score')
plt.xticks([1, 2], ['Positive', 'Negative'])
plt.grid(True)
plt.tight_layout()
# Show plot
plt.show()



Comments