thiscodeWorks | thiscodeWorks

from pymongo import MongoClient
from vaderSentiment.vaderSentiment import SentimentIntensityAnalyzer
import statistics
from deep_translator import GoogleTranslator
from nltk.stem import WordNetLemmatizer
import matplotlib.pyplot as plt

# Initialize MongoDB client and collection
mongo_uri = "mongodb://localhost:27017/"
database_name = "twitter_database"
collection_name = "tweet_cleaned"
client = MongoClient(mongo_uri, serverSelectionTimeoutMS=5000)
db = client[database_name]
collection = db[collection_name]

# Initialize sentiment analyzer
analyzer = SentimentIntensityAnalyzer()

# Initialize WordNet Lemmatizer
lemmatizer = WordNetLemmatizer()

# Function to clean text by lemmatizing
def clean_text(text):
    # Lemmatize the entire text
    lemmatized_text = lemmatizer.lemmatize(text)
    return lemmatized_text.strip()

# Function to translate text to English
def translate_to_english(text, lang):
    translated_text = GoogleTranslator(source='auto', target='en').translate(text)
    return translated_text

# Function to analyze sentiment
def analyze_sentiment(text):
    sentiment = analyzer.polarity_scores(text)
    compound_score = sentiment['compound']
    return compound_score

# Initialize lists for sentiment scores
positive_scores = []
negative_scores = []

# Iterate through tweets in MongoDB collection
cursor = collection.find().limit(100)

for doc in cursor:
    lang = doc.get("json_data.lang")
    tweet_text = doc.get('json_data.text')

    if lang != 'en':
        translated_text = translate_to_english(tweet_text, lang)
    else:
        translated_text = tweet_text
   
    # Clean and lemmatize text
    cleaned_text = clean_text(translated_text)
   
    # Perform sentiment analysis
    sentiment = analyze_sentiment(cleaned_text)
    
    # Categorize sentiment scores
    if sentiment > 0:
        positive_scores.append(sentiment)
    else:
        negative_scores.append(sentiment)

# Close MongoDB client
client.close()

plt.figure(figsize=(10, 6))

# Box plot for positive scores
plt.boxplot(positive_scores, positions=[1], widths=0.6, patch_artist=True, boxprops=dict(facecolor='lightgreen'), medianprops=dict(color='darkgreen'), showfliers=True)

# Box plot for negative scores
plt.boxplot(negative_scores, positions=[2], widths=0.6, patch_artist=True, boxprops=dict(facecolor='lightcoral'), medianprops=dict(color='darkred'), showfliers=True)

# Plot details
plt.title('Sentiment Analysis of Tweets')
plt.ylabel('Compound Sentiment Score')
plt.xticks([1, 2], ['Positive', 'Negative'])
plt.grid(True)
plt.tight_layout()

# Show plot
plt.show()