thiscodeWorks | thiscodeWorks

from vaderSentiment.vaderSentiment import SentimentIntensityAnalyzer
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score, confusion_matrix
from deep_translator import GoogleTranslator
from pymongo import MongoClient
from sklearn.metrics import ConfusionMatrixDisplay
import matplotlib.pyplot as plt
import numpy as np
from nltk.stem import WordNetLemmatizer

# Initialize MongoDB client and collection
mongo_uri = "mongodb://localhost:27017/"
database_name = "twitter_database"
collection_name = "final"
client = MongoClient(mongo_uri, serverSelectionTimeoutMS=5000)
db = client[database_name]
collection = db[collection_name]

# Initialize sentiment analyzer
analyzer = SentimentIntensityAnalyzer()

# Initialize WordNet Lemmatizer
lemmatizer = WordNetLemmatizer()

# Function to translate text to English
def translate_to_english(text, lang):
    translated_text = GoogleTranslator(source='auto', target='en').translate(text)
    return translated_text

# Function to clean text by lemmatizing
def clean_text(text):
    lemmatized_text = ' '.join([lemmatizer.lemmatize(word) for word in text.split()])
    return lemmatized_text.strip()

# Function to analyze sentiment
def analyze_sentiment(text):
    lemmatized_text = clean_text(text)
    sentiment = analyzer.polarity_scores(lemmatized_text)
    compound_score = sentiment['compound']
    if compound_score > 0:
        return 1  # Positive
    elif compound_score <= 0:
        return 0  # Negative

# Initialize lists to store true labels and predicted sentiments
true_labels = [1, 0, 0, 1, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 0, 0, 0, 1, 1, 1, 0, 0, 0, 1, 1, 0, 0, 1, 0, 0, 0, 1, 1, 0, 0, 0, 1, 1, 1, 0, 0, 1, 1, 0, 0, 1, 1, 1, 0, 1, 0, 0, 1, 0, 1, 1, 1, 0, 0, 1, 0, 0, 1, 0, 1, 1, 0, 1, 0, 0, 0, 0, 0, 1, 0, 1, 0, 1, 0, 1, 0, 0, 1, 0, 1, 0, 0, 1, 0, 0, 0, 1, 1, 1, 0]
predicted_sentiments = []

# Iterate through tweets in MongoDB collection
cursor = collection.find().limit(100)

for doc in cursor:
    lang = doc.get("json_data.lang")
    tweet_text = doc.get('json_data.text')
    
    text_to_use = tweet_text
    
    if lang != 'en':
        translated_text = translate_to_english(text_to_use, lang)
    else:
        translated_text = text_to_use
    
    # Clean and lemmatize text
    cleaned_text = clean_text(translated_text)
    
    # Perform sentiment analysis
    sentiment = analyze_sentiment(cleaned_text)
    predicted_sentiments.append(sentiment)

# Convert lists to numpy arrays for confusion matrix calculation
true_labels = np.array(true_labels)
predicted_sentiments = np.array(predicted_sentiments)

# Compute confusion matrix
cm = confusion_matrix(true_labels, predicted_sentiments)

plt.figure(figsize=(10, 8))
labels = ['Negative', 'Positive']
disp = ConfusionMatrixDisplay(confusion_matrix=cm, display_labels=labels)
disp.plot(cmap='Blues', ax=plt.gca())  # Use plt.gca() to get current axes for proper positioning

# Add metrics as annotations

plt.title('Confusion Matrix')
plt.tight_layout()  # Adjust layout for better spacing
plt.show()