Vader sentiment analysis plus irony + irony detection
Thu May 30 2024 21:04:07 GMT+0000 (Coordinated Universal Time)
Saved by
@madgakantara
from pymongo import MongoClient, errors
from vaderSentiment.vaderSentiment import SentimentIntensityAnalyzer
import re
# MongoDB connection settings
mongo_uri = "mongodb://localhost:27017/"
database_name = "twitter_database"
source_collection_name = "tweets"
def get_text_from_doc(doc):
"""
Extracts the text from the document.
Returns the text if available, otherwise returns None.
"""
text = doc.get("json_data", {}).get("text")
if text:
return text
extended_text = doc.get("json_data", {}).get("extended_tweet", {}).get("full_text")
return extended_text
def analyze_sentiment(text):
"""
Analyzes the sentiment of the given text using VADER.
Returns the compound sentiment score.
"""
analyzer = SentimentIntensityAnalyzer()
sentiment = analyzer.polarity_scores(text)
return sentiment['compound']
def detect_irony(text):
"""
Detects irony in the given text.
Returns True if irony is detected, otherwise False.
"""
irony_patterns = [
r"\b(?:delay)\b.*\thanks\b",
r"\b(?:can't|cannot)\b.*\bimagine\b",
r"\bwhat a surprise\b",
r"\bsarcasm\b",
r"\birony\b",
r"\bjust what I needed\b",
]
for pattern in irony_patterns:
if re.search(pattern, text, re.IGNORECASE):
return True
return False
try:
client = MongoClient(mongo_uri, serverSelectionTimeoutMS=5000)
db = client[database_name]
source_collection = db[source_collection_name]
cursor = source_collection.find().limit(20) # Limit to only 20 tweets
for doc in cursor:
text = get_text_from_doc(doc)
if text:
compound_score = analyze_sentiment(text)
irony_detected = detect_irony(text)
print("Compound Sentiment Score:", compound_score)
if irony_detected:
print("Irony detected in text:", text)
else:
print("Text is not ironic.")
print()
except errors.ServerSelectionTimeoutError as err:
print("Failed to connect to MongoDB server:", err)
except errors.PyMongoError as err:
print("An error occurred while working with MongoDB:", err)
finally:
client.close()
content_copyCOPY
Comments