import nltk import mysql.connector from wordcloud import WordCloud import matplotlib.pyplot as plt from nltk.corpus import stopwords from nltk.tokenize import word_tokenize # Download NLTK data nltk.download('punkt') # Set NLTK data path if necessary # nltk.data.path.append("path_to_nltk_data") mydb = mysql.connector.connect( host="localhost", user="root", password="Eimaipolykala1", database="twitter_db" ) # Create a cursor object cursor = mydb.cursor() # Execute SQL query to retrieve text data cursor.execute("SELECT JSON_VALUE(data, '$.extended_tweet.full_text') FROM data_db WHERE JSON_VALUE(data, '$.lang')='en'") # Fetch data data = cursor.fetchall() # Close the connection mydb.close() # Process text data text = ' '.join([row[0] for row in data if row[0]]) # Combine text from all rows, handling None values stop_words = set(stopwords.words('english')) # Load English stopwords word_tokens = word_tokenize(text.lower()) # Tokenize and convert to lowercase filtered_words = [word for word in word_tokens if word.isalnum() and word not in stop_words] # Filter out stopwords and non-alphanumeric tokens processed_text = ' '.join(filtered_words) # Generate word cloud wordcloud = WordCloud(width=800, height=400, background_color='white').generate(processed_text) # Display word cloud plt.figure(figsize=(10, 5)) plt.imshow(wordcloud, interpolation='bilinear') plt.axis('off') plt.show()
Preview:
downloadDownload PNG
downloadDownload JPEG
downloadDownload SVG
Tip: You can change the style, width & colours of the snippet with the inspect tool before clicking Download!
Click to optimize width for Twitter