SENTIMENT EVOLUTION CODE FINAL
Tue Jun 18 2024 21:41:43 GMT+0000 (Coordinated Universal Time)
Saved by @madgakantara
from pymongo import MongoClient from vaderSentiment.vaderSentiment import SentimentIntensityAnalyzer import statistics import nltk from nltk.tokenize import word_tokenize from nltk.corpus import stopwords from nltk.stem import WordNetLemmatizer import string from deep_translator import GoogleTranslator import re # Initialize MongoDB client and collection mongo_uri = "mongodb://localhost:27017/" database_name = "twitter_database" collection_name = "tweet_cleaned" client = MongoClient(mongo_uri, serverSelectionTimeoutMS=5000) db = client[database_name] collection = db[collection_name] # Initialize sentiment analyzer analyzer = SentimentIntensityAnalyzer() # Initialize WordNet Lemmatizer lemmatizer = WordNetLemmatizer() # Function to clean text by lemmatizing def clean_text(text): # Lemmatize the entire text lemmatized_text = lemmatizer.lemmatize(text) return lemmatized_text.strip() # Function to translate text to English def translate_to_english(text, lang): if lang != 'en': translated_text = GoogleTranslator(source='auto', target='en').translate(text) else: translated_text = text return translated_text # Function to analyze sentiment def analyze_sentiment(text): sentiment = analyzer.polarity_scores(text) compound_score = sentiment['compound'] return compound_score # Initialize counters positive_count = 0 negative_count = 0 compound_scores = [] # Iterate through tweets in MongoDB collection cursor = collection.find().limit(100) for doc in cursor: lang = doc.get("json_data.lang") tweet_text = doc.get('json_data.text') if lang != 'en': translated_text = translate_to_english(tweet_text, lang) else: translated_text = tweet_text # Clean and lemmatize text cleaned_text = clean_text(translated_text) # Perform sentiment analysis sentiment = analyze_sentiment(cleaned_text) compound_scores.append(sentiment) # Update sentiment counts if sentiment > 0 : positive_count += 1 else : negative_count += 1 # Calculate mean compound score mean_score = statistics.mean(compound_scores) # Print results print("Positive:", positive_count) print("Negative:", negative_count) print("Mean compound score:", mean_score) # Close MongoDB client client.close()
Comments