from vaderSentiment.vaderSentiment import SentimentIntensityAnalyzer
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score, confusion_matrix
from deep_translator import GoogleTranslator
from pymongo import MongoClient
from sklearn.metrics import ConfusionMatrixDisplay
import matplotlib.pyplot as plt
import numpy as np
from nltk.stem import WordNetLemmatizer
# Initialize MongoDB client and collection
mongo_uri = "mongodb://localhost:27017/"
database_name = "twitter_database"
collection_name = "final"
client = MongoClient(mongo_uri, serverSelectionTimeoutMS=5000)
db = client[database_name]
collection = db[collection_name]
# Initialize sentiment analyzer
analyzer = SentimentIntensityAnalyzer()
# Initialize WordNet Lemmatizer
lemmatizer = WordNetLemmatizer()
# Function to translate text to English
def translate_to_english(text, lang):
translated_text = GoogleTranslator(source='auto', target='en').translate(text)
return translated_text
# Function to clean text by lemmatizing
def clean_text(text):
lemmatized_text = ' '.join([lemmatizer.lemmatize(word) for word in text.split()])
return lemmatized_text.strip()
# Function to analyze sentiment
def analyze_sentiment(text):
lemmatized_text = clean_text(text)
sentiment = analyzer.polarity_scores(lemmatized_text)
compound_score = sentiment['compound']
if compound_score > 0:
return 1 # Positive
elif compound_score <= 0:
return 0 # Negative
# Initialize lists to store true labels and predicted sentiments
true_labels = [1, 0, 0, 1, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 0, 0, 0, 1, 1, 1, 0, 0, 0, 1, 1, 0, 0, 1, 0, 0, 0, 1, 1, 0, 0, 0, 1, 1, 1, 0, 0, 1, 1, 0, 0, 1, 1, 1, 0, 1, 0, 0, 1, 0, 1, 1, 1, 0, 0, 1, 0, 0, 1, 0, 1, 1, 0, 1, 0, 0, 0, 0, 0, 1, 0, 1, 0, 1, 0, 1, 0, 0, 1, 0, 1, 0, 0, 1, 0, 0, 0, 1, 1, 1, 0]
predicted_sentiments = []
# Iterate through tweets in MongoDB collection
cursor = collection.find().limit(100)
for doc in cursor:
lang = doc.get("json_data.lang")
tweet_text = doc.get('json_data.text')
text_to_use = tweet_text
if lang != 'en':
translated_text = translate_to_english(text_to_use, lang)
else:
translated_text = text_to_use
# Clean and lemmatize text
cleaned_text = clean_text(translated_text)
# Perform sentiment analysis
sentiment = analyze_sentiment(cleaned_text)
predicted_sentiments.append(sentiment)
# Convert lists to numpy arrays for confusion matrix calculation
true_labels = np.array(true_labels)
predicted_sentiments = np.array(predicted_sentiments)
# Compute confusion matrix
cm = confusion_matrix(true_labels, predicted_sentiments)
plt.figure(figsize=(10, 8))
labels = ['Negative', 'Positive']
disp = ConfusionMatrixDisplay(confusion_matrix=cm, display_labels=labels)
disp.plot(cmap='Blues', ax=plt.gca()) # Use plt.gca() to get current axes for proper positioning
# Add metrics as annotations
plt.title('Confusion Matrix')
plt.tight_layout() # Adjust layout for better spacing
plt.show()