Data Cleaning in mysql

PHOTO EMBED

Thu May 09 2024 17:33:27 GMT+0000 (Coordinated Universal Time)

Saved by @madgakantara

CREATE TABLE tweets_extended AS
SELECT
  JSON_VALUE(tweet_json_obj, '$.extended_tweet.full_text') AS full_text,
  JSON_VALUE(tweet_json_obj, '$.extended_tweet.display_text_range[1]') AS text_range,
  JSON_VALUE(tweet_json_obj, '$.id') AS tweet_id
FROM twitter_db.data_db
WHERE JSON_VALUE(tweet_json_obj, '$.extended_tweet.full_text') IS NOT NULL
  AND JSON_VALUE(tweet_json_obj, '$.extended_tweet.display_text_range[1]') IS NOT NULL
  AND JSON_VALUE(tweet_json_obj, '$.id') IS NOT NULL;
CREATE TABLE user_data AS
SELECT
  JSON_VALUE(tweet_json_obj, '$.id') AS tweet_id,
  JSON_VALUE(tweet_json_obj, '$.user.description') AS user_description,
  JSON_VALUE(tweet_json_obj, '$.user.verified') AS user_verified,
  JSON_VALUE(tweet_json_obj, '$.user.followers_count') AS followers_count,
  JSON_VALUE(tweet_json_obj, '$.user.created_at') AS user_created_at,
  JSON_VALUE(tweet_json_obj, '$.user.translator_type') AS translator_type
FROM twitter_db.data_db
WHERE JSON_VALUE(tweet_json_obj, '$.id') IS NOT NULL
  AND JSON_VALUE(tweet_json_obj, '$.user.description') IS NOT NULL
  AND JSON_VALUE(tweet_json_obj, '$.user.verified') IS NOT NULL
  AND JSON_VALUE(tweet_json_obj, '$.user.followers_count') IS NOT NULL
  AND JSON_VALUE(tweet_json_obj, '$.user.created_at') IS NOT NULL
  AND JSON_VALUE(tweet_json_obj, '$.user.translator_type') IS NOT NULL;
CREATE TABLE tweets AS
SELECT 
  JSON_VALUE(data, '$.id') AS tweet_id,
  JSON_VALUE(data, '$.user.id') AS user_id,
  JSON_VALUE(data, '$.created_at') AS created_at,
  JSON_VALUE(data, '$.in_reply_to_user_id') AS in_reply_to_user_id,
  JSON_VALUE(data, '$.in_reply_to_status_id') AS in_reply_to_status_id,
  JSON_VALUE(data, '$.place.country') AS country,
  JSON_VALUE(data, '$.reply_count') AS reply_count,
  JSON_VALUE(data, '$.quote_count') AS quote_count,
  JSON_VALUE(data, '$.retweet_count') AS retweet_count,
  JSON_VALUE(data, '$.favorite_count') AS favorite_count,
  JSON_VALUE(data, '$.retweeted') AS retweeted,
  JSON_VALUE(data, '$.lang') AS lang,
  JSON_VALUE(data, '$.source') AS source,
  JSON_VALUE(data, '$.is_quote_status') AS is_quote_status
  FROM twitter_db.data_db
WHERE JSON_VALUE(data, '$.id') IS NOT NULL
  AND JSON_VALUE(data, '$.user.id') IS NOT NULL
  AND JSON_VALUE(data, '$.created_at') IS NOT NULL
  AND JSON_VALUE(data, '$.in_reply_to_user_id') IS NOT NULL
  AND JSON_VALUE(data, '$.in_reply_to_status_id') IS NOT NULL
  AND JSON_VALUE(data, '$.place.country') IS NOT NULL
  AND JSON_VALUE(data, '$.reply_count') IS NOT NULL
  AND JSON_VALUE(data, '$.quote_count') IS NOT NULL
  AND JSON_VALUE(data, '$.retweet_count') IS NOT NULL
  AND JSON_VALUE(data, '$.favorite_count') IS NOT NULL
  AND JSON_VALUE(data, '$.favorited') IS NOT NULL
  AND JSON_VALUE(data, '$.retweeted') IS NOT NULL
  AND JSON_VALUE(data, '$.lang') IS NOT NULL
  AND JSON_VALUE(data, '$.source') IS NOT NULL
  AND JSON_VALUE(data, '$.is_quote_status') IS NOT NULL;
CREATE TABLE entities AS
SELECT
JSON_VALUE(tweet_json_obj, '$.extended_tweet.entities.hashtags') AS hashtags,
JSON_VALUE(tweet_json_obj, '$.extended_tweet.entities.user_mentions[0].id') AS user_mention0_id,
JSON_VALUE(tweet_json_obj, '$.extended_tweet.entities.user_mentions[1].id') AS user_mention1_id,
JSON_VALUE(tweet_json_obj, '$.extended_tweet.entities.user_mentions[0].name') AS user_mention0_name,
JSON_VALUE(tweet_json_obj, '$.extended_tweet.entities.user_mentions[1].name') AS user_mention1_name,
JSON_VALUE(tweet_json_obj, '$.extended_tweet.entities.user_mentions[0].screen_name') AS user_mention0_screen,
JSON_VALUE(tweet_json_obj, '$.extended_tweet.entities.user_mentions[1].screen_name') AS user_mention1_screen
  FROM twitter_db.data_db
WHERE JSON_VALUE(tweet_json_obj, '$.extended_tweet.entities.hashtags') IS NOT NULL
  AND JSON_VALUE(tweet_json_obj, '$.extended_tweet.entities.user_mentions[0].id') IS NOT NULL
  AND JSON_VALUE(tweet_json_obj, '$.extended_tweet.entities.user_mentions[1].id') IS NOT NULL
  AND JSON_VALUE(tweet_json_obj, '$.extended_tweet.entities.user_mentions[0].name') IS NOT NULL
  AND JSON_VALUE(tweet_json_obj, '$.extended_tweet.entities.user_mentions[1].name') IS NOT NULL
  AND JSON_VALUE(tweet_json_obj, '$.extended_tweet.entities.user_mentions[0].screen_name') IS NOT NULL
  AND JSON_VALUE(tweet_json_obj, '$.extended_tweet.entities.user_mentions[1].screen_name') IS NOT NULL
  AND JSON_VALUE(data, '$.lang') = 'en'; OR JSON_VALUE(data, '$.lang') = 'nl'
content_copyCOPY