word cloud klm
Fri May 10 2024 22:16:47 GMT+0000 (Coordinated Universal Time)
Saved by
@madgakantara
import mysql.connector
from wordcloud import WordCloud
import matplotlib.pyplot as plt
from nltk.corpus import stopwords
from nltk.tokenize import word_tokenize
mydb = mysql.connector.connect(
host="localhost",
user="root",
password="Eimaipolykala1",
database="twitter_db"
)
# Create a cursor object
cursor = mydb.cursor()
# Execute SQL query to retrieve text data
cursor.execute("SELECT JSON_VALUE(data, '$.extended_tweet.full_text') FROM data_db WHERE JSON_VALUE(data, '$.lang')='en' AND JSON_VALUE(data, '$.extended_tweet.entities.user_mentions[0].id') = '56377143' OR JSON_VALUE(data, '$.extended_tweet.entities.user_mentions[0].name') = 'KLM' OR JSON_VALUE(data, '$.extended_tweet.entities.user_mentions[0].name') = 'klm' OR JSON_VALUE(data, '$.extended_tweet.entities.user_mentions[1].name') = 'klm' OR JSON_VALUE(data, '$.extended_tweet.entities.user_mentions[1].name') = 'KLM' OR JSON_VALUE(data, '$.extended_tweet.entities.user_mentions[1].id') = '56377143'")
# Fetch data
data = cursor.fetchall()
# Close the connection
mydb.close()
# Process text data
text = ' '.join([row[0] for row in data]) # Combine text from all rows
stop_words = set(stopwords.words('english')) # Load English stopwords
word_tokens = word_tokenize(text.lower()) # Tokenize and convert to lowercase
filtered_words = [word for word in word_tokens if word.isalnum() and word not in stop_words] # Filter out stopwords and non-alphanumeric tokens
processed_text = ' '.join(filtered_words)
# Generate word cloud
wordcloud = WordCloud(width=800, height=400, background_color='white').generate(processed_text)
# Display word cloud
plt.figure(figsize=(10, 5))
plt.imshow(wordcloud, interpolation='bilinear')
plt.axis('off')
plt.show()
content_copyCOPY
Comments