import mysql.connector from wordcloud import WordCloud import matplotlib.pyplot as plt from nltk.corpus import stopwords from nltk.tokenize import word_tokenize mydb = mysql.connector.connect( host="localhost", user="root", password="Eimaipolykala1", database="twitter_db" ) # Create a cursor object cursor = mydb.cursor() # Execute SQL query to retrieve text data cursor.execute("SELECT JSON_VALUE(data, '$.extended_tweet.full_text') FROM data_db WHERE JSON_VALUE(data, '$.lang')='en' AND JSON_VALUE(data, '$.extended_tweet.entities.user_mentions[0].id') = '56377143' OR JSON_VALUE(data, '$.extended_tweet.entities.user_mentions[0].name') = 'KLM' OR JSON_VALUE(data, '$.extended_tweet.entities.user_mentions[0].name') = 'klm' OR JSON_VALUE(data, '$.extended_tweet.entities.user_mentions[1].name') = 'klm' OR JSON_VALUE(data, '$.extended_tweet.entities.user_mentions[1].name') = 'KLM' OR JSON_VALUE(data, '$.extended_tweet.entities.user_mentions[1].id') = '56377143'") # Fetch data data = cursor.fetchall() # Close the connection mydb.close() # Process text data text = ' '.join([row[0] for row in data]) # Combine text from all rows stop_words = set(stopwords.words('english')) # Load English stopwords word_tokens = word_tokenize(text.lower()) # Tokenize and convert to lowercase filtered_words = [word for word in word_tokens if word.isalnum() and word not in stop_words] # Filter out stopwords and non-alphanumeric tokens processed_text = ' '.join(filtered_words) # Generate word cloud wordcloud = WordCloud(width=800, height=400, background_color='white').generate(processed_text) # Display word cloud plt.figure(figsize=(10, 5)) plt.imshow(wordcloud, interpolation='bilinear') plt.axis('off') plt.show()
Preview:
downloadDownload PNG
downloadDownload JPEG
downloadDownload SVG
Tip: You can change the style, width & colours of the snippet with the inspect tool before clicking Download!
Click to optimize width for Twitter