from vaderSentiment.vaderSentiment import SentimentIntensityAnalyzer
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score
import csv

# File path to the CSV file
file_path = r'C:\Users\User\Desktop\DBL Data Challenge\archive\Tweets.csv'

# Initialize sentiment analyzer
analyzer = SentimentIntensityAnalyzer()

# Function to analyze sentiment
def analyze_sentiment(text):
    sentiment = analyzer.polarity_scores(text)
    compound_score = sentiment['compound']
    if compound_score >= 0.05:
        return 4  # Positive
    elif compound_score <= 0:
        return 0  # Negative
    else:
        return 2  # Neutral

# Mapping from sentiment text to numeric labels
sentiment_mapping = {
    'positive': 4,
    'negative': 0,
    'neutral': 2
}

# Lists to store labels and texts
labels = []
texts = []

# Open the CSV file and read its contents
encodings = ['utf-8', 'latin1', 'iso-8859-1']
for encoding in encodings:
    try:
        with open(file_path, 'r', encoding=encoding) as file:
            reader = csv.DictReader(file)
            # Read each line in the file
            for row in reader:
                # Extract label and text
                label_text = row['airline_sentiment'].strip().lower()
                text = row['text'].strip('"')
                # Convert label text to numeric value
                if label_text in sentiment_mapping:
                    labels.append(sentiment_mapping[label_text])
                    texts.append(text)
        break
    except UnicodeDecodeError:
        print(f"Failed to read with encoding {encoding}. Trying next encoding.")
    except Exception as e:
        print(f"An error occurred: {e}")
        break

# Apply sentiment analysis to each text in the list
predicted_sentiments = [analyze_sentiment(text) for text in texts]

# Evaluate performance
accuracy = accuracy_score(labels, predicted_sentiments)
precision = precision_score(labels, predicted_sentiments, average='weighted')
recall = recall_score(labels, predicted_sentiments, average='weighted')
f1 = f1_score(labels, predicted_sentiments, average='weighted')

print("Accuracy:", accuracy)
print("Precision:", precision)
print("Recall:", recall)
print("F1 Score:", f1)