Boxplot of text length
Wed May 15 2024 08:50:05 GMT+0000 (Coordinated Universal Time)
Saved by
@madgakantara
import mysql.connector
import pandas as pd
import matplotlib.pyplot as plt
# Establish connection to MySQL
mydb = mysql.connector.connect(
host="localhost",
user="root",
password="Eimaipolykala1",
database="twitter_db"
)
# Create cursor
cursor = mydb.cursor()
# Execute SQL query to retrieve data
cursor.execute("SELECT JSON_VALUE(data, '$.extended_tweet.display_text_range[1]') AS text_length FROM data_db")
# Fetch data
data = cursor.fetchall()
# Close the connection
mydb.close()
# Create DataFrame
df = pd.DataFrame(data, columns=['Text_Length'])
# Convert Text_Length to numeric
df['Text_Length'] = pd.to_numeric(df['Text_Length'], errors='coerce')
# Drop NaN values
df = df.dropna()
# Plotting
plt.figure(figsize=(10, 6))
plt.boxplot(df['Text_Length'])
plt.xlabel('Text Length')
plt.ylabel('Number of Characters')
plt.title('Distribution of Text Length in Extended Tweets')
plt.show()
content_copyCOPY
Comments