WORD FREAKING CLOUDDD
Fri May 10 2024 21:57:26 GMT+0000 (Coordinated Universal Time)
Saved by
@madgakantara
import nltk
import mysql.connector
from wordcloud import WordCloud
import matplotlib.pyplot as plt
from nltk.corpus import stopwords
from nltk.tokenize import word_tokenize
# Download NLTK data
nltk.download('punkt')
# Set NLTK data path if necessary
# nltk.data.path.append("path_to_nltk_data")
mydb = mysql.connector.connect(
host="localhost",
user="root",
password="Eimaipolykala1",
database="twitter_db"
)
# Create a cursor object
cursor = mydb.cursor()
# Execute SQL query to retrieve text data
cursor.execute("SELECT JSON_VALUE(data, '$.extended_tweet.full_text') FROM data_db WHERE JSON_VALUE(data, '$.lang')='en'")
# Fetch data
data = cursor.fetchall()
# Close the connection
mydb.close()
# Process text data
text = ' '.join([row[0] for row in data if row[0]]) # Combine text from all rows, handling None values
stop_words = set(stopwords.words('english')) # Load English stopwords
word_tokens = word_tokenize(text.lower()) # Tokenize and convert to lowercase
filtered_words = [word for word in word_tokens if word.isalnum() and word not in stop_words] # Filter out stopwords and non-alphanumeric tokens
processed_text = ' '.join(filtered_words)
# Generate word cloud
wordcloud = WordCloud(width=800, height=400, background_color='white').generate(processed_text)
# Display word cloud
plt.figure(figsize=(10, 5))
plt.imshow(wordcloud, interpolation='bilinear')
plt.axis('off')
plt.show()
content_copyCOPY
Comments