from vaderSentiment.vaderSentiment import SentimentIntensityAnalyzer from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score, confusion_matrix from deep_translator import GoogleTranslator from pymongo import MongoClient from sklearn.metrics import ConfusionMatrixDisplay import matplotlib.pyplot as plt import numpy as np from nltk.stem import WordNetLemmatizer # Initialize MongoDB client and collection mongo_uri = "mongodb://localhost:27017/" database_name = "twitter_database" collection_name = "final" client = MongoClient(mongo_uri, serverSelectionTimeoutMS=5000) db = client[database_name] collection = db[collection_name] # Initialize sentiment analyzer analyzer = SentimentIntensityAnalyzer() # Initialize WordNet Lemmatizer lemmatizer = WordNetLemmatizer() # Function to translate text to English def translate_to_english(text, lang): translated_text = GoogleTranslator(source='auto', target='en').translate(text) return translated_text # Function to clean text by lemmatizing def clean_text(text): lemmatized_text = ' '.join([lemmatizer.lemmatize(word) for word in text.split()]) return lemmatized_text.strip() # Function to analyze sentiment def analyze_sentiment(text): lemmatized_text = clean_text(text) sentiment = analyzer.polarity_scores(lemmatized_text) compound_score = sentiment['compound'] if compound_score > 0: return 1 # Positive elif compound_score <= 0: return 0 # Negative # Initialize lists to store true labels and predicted sentiments true_labels = [1, 0, 0, 1, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 0, 0, 0, 1, 1, 1, 0, 0, 0, 1, 1, 0, 0, 1, 0, 0, 0, 1, 1, 0, 0, 0, 1, 1, 1, 0, 0, 1, 1, 0, 0, 1, 1, 1, 0, 1, 0, 0, 1, 0, 1, 1, 1, 0, 0, 1, 0, 0, 1, 0, 1, 1, 0, 1, 0, 0, 0, 0, 0, 1, 0, 1, 0, 1, 0, 1, 0, 0, 1, 0, 1, 0, 0, 1, 0, 0, 0, 1, 1, 1, 0] predicted_sentiments = [] # Iterate through tweets in MongoDB collection cursor = collection.find().limit(100) for doc in cursor: lang = doc.get("json_data.lang") tweet_text = doc.get('json_data.text') text_to_use = tweet_text if lang != 'en': translated_text = translate_to_english(text_to_use, lang) else: translated_text = text_to_use # Clean and lemmatize text cleaned_text = clean_text(translated_text) # Perform sentiment analysis sentiment = analyze_sentiment(cleaned_text) predicted_sentiments.append(sentiment) # Convert lists to numpy arrays for confusion matrix calculation true_labels = np.array(true_labels) predicted_sentiments = np.array(predicted_sentiments) # Compute confusion matrix cm = confusion_matrix(true_labels, predicted_sentiments) plt.figure(figsize=(10, 8)) labels = ['Negative', 'Positive'] disp = ConfusionMatrixDisplay(confusion_matrix=cm, display_labels=labels) disp.plot(cmap='Blues', ax=plt.gca()) # Use plt.gca() to get current axes for proper positioning # Add metrics as annotations plt.title('Confusion Matrix') plt.tight_layout() # Adjust layout for better spacing plt.show()
Preview:
downloadDownload PNG
downloadDownload JPEG
downloadDownload SVG
Tip: You can change the style, width & colours of the snippet with the inspect tool before clicking Download!
Click to optimize width for Twitter