WORD FREAKING CLOUDDD

PHOTO EMBED

Fri May 10 2024 21:57:26 GMT+0000 (Coordinated Universal Time)

Saved by @madgakantara

import nltk
import mysql.connector
from wordcloud import WordCloud
import matplotlib.pyplot as plt
from nltk.corpus import stopwords
from nltk.tokenize import word_tokenize

# Download NLTK data
nltk.download('punkt')

# Set NLTK data path if necessary
# nltk.data.path.append("path_to_nltk_data")

mydb = mysql.connector.connect(
    host="localhost",
    user="root",
    password="Eimaipolykala1",
    database="twitter_db"
)

# Create a cursor object
cursor = mydb.cursor()

# Execute SQL query to retrieve text data
cursor.execute("SELECT JSON_VALUE(data, '$.extended_tweet.full_text')  FROM data_db WHERE JSON_VALUE(data, '$.lang')='en'")

# Fetch data
data = cursor.fetchall()

# Close the connection
mydb.close()

# Process text data
text = ' '.join([row[0] for row in data if row[0]])  # Combine text from all rows, handling None values
stop_words = set(stopwords.words('english'))  # Load English stopwords
word_tokens = word_tokenize(text.lower())  # Tokenize and convert to lowercase
filtered_words = [word for word in word_tokens if word.isalnum() and word not in stop_words]  # Filter out stopwords and non-alphanumeric tokens
processed_text = ' '.join(filtered_words)

# Generate word cloud
wordcloud = WordCloud(width=800, height=400, background_color='white').generate(processed_text)

# Display word cloud
plt.figure(figsize=(10, 5))
plt.imshow(wordcloud, interpolation='bilinear')
plt.axis('off')
plt.show()
content_copyCOPY