import json
from deep_translator import GoogleTranslator
import nltk
from nltk.tokenize import word_tokenize
from nltk.corpus import stopwords
from nltk.stem import WordNetLemmatizer
from vaderSentiment.vaderSentiment import SentimentIntensityAnalyzer
# Initialize sentiment analyzer
analyzer = SentimentIntensityAnalyzer()
# Initialize WordNet Lemmatizer
lemmatizer = WordNetLemmatizer()
def clean_text(text):
lemmatized_text = lemmatizer.lemmatize(text)
return lemmatized_text.strip()
# Function to translate text to English
def translate_to_english(text, lang):
if lang != 'en':
translated_text = GoogleTranslator(source='auto', target='en').translate(text)
else:
translated_text = text
return translated_text
# Function to analyze sentiment
def analyze_sentiment(text):
sentiment = analyzer.polarity_scores(text)
compound_score = sentiment['compound']
return compound_score
# Recursive function to process replies
def process_replies(reply_data):
reply_text = reply_data.get("text", "")
reply_lang = reply_data.get("lang", "en")
if reply_lang != "en":
reply_text = translate_to_english(reply_text, reply_lang)
reply_text = clean_text(reply_text)
sentiment_score_reply = analyze_sentiment(reply_text)
reply_data["sentiment_score"] = sentiment_score_reply
for reply in reply_data.get("replies", []):
process_replies(reply)
file_path = r'C:\Users\User\Desktop\DBL Data Challenge\lala.json'
updated_data = []
with open(file_path, 'r', encoding='utf-8') as file:
for line in file:
try:
data = json.loads(line.strip())
lang = data.get("lang", "en")
text = data.get("text", "")
replies = data.get("replies", [])
if lang != "en":
text = translate_to_english(text, lang)
text = clean_text(text)
sentiment_score_main_text = analyze_sentiment(text)
data["sentiment_score"] = sentiment_score_main_text # Add sentiment score to main tweet object
for reply_data in replies:
process_replies(reply_data)
updated_data.append(data)
except json.JSONDecodeError:
print("Error: Invalid JSON format")
continue
# Write the updated data back to the JSON file
with open(file_path, 'w', encoding='utf-8') as file:
for item in updated_data:
json.dump(item, file)
file.write('\n')
print("Sentiment scores added to the JSON file.")
Preview:
downloadDownload PNG
downloadDownload JPEG
downloadDownload SVG
Tip: You can change the style, width & colours of the snippet with the inspect tool before clicking Download!
Click to optimize width for Twitter