Data Cleaning in mysql
Thu May 09 2024 17:33:27 GMT+0000 (Coordinated Universal Time)
Saved by @madgakantara
CREATE TABLE tweets_extended AS SELECT JSON_VALUE(tweet_json_obj, '$.extended_tweet.full_text') AS full_text, JSON_VALUE(tweet_json_obj, '$.extended_tweet.display_text_range[1]') AS text_range, JSON_VALUE(tweet_json_obj, '$.id') AS tweet_id FROM twitter_db.data_db WHERE JSON_VALUE(tweet_json_obj, '$.extended_tweet.full_text') IS NOT NULL AND JSON_VALUE(tweet_json_obj, '$.extended_tweet.display_text_range[1]') IS NOT NULL AND JSON_VALUE(tweet_json_obj, '$.id') IS NOT NULL; CREATE TABLE user_data AS SELECT JSON_VALUE(tweet_json_obj, '$.id') AS tweet_id, JSON_VALUE(tweet_json_obj, '$.user.description') AS user_description, JSON_VALUE(tweet_json_obj, '$.user.verified') AS user_verified, JSON_VALUE(tweet_json_obj, '$.user.followers_count') AS followers_count, JSON_VALUE(tweet_json_obj, '$.user.created_at') AS user_created_at, JSON_VALUE(tweet_json_obj, '$.user.translator_type') AS translator_type FROM twitter_db.data_db WHERE JSON_VALUE(tweet_json_obj, '$.id') IS NOT NULL AND JSON_VALUE(tweet_json_obj, '$.user.description') IS NOT NULL AND JSON_VALUE(tweet_json_obj, '$.user.verified') IS NOT NULL AND JSON_VALUE(tweet_json_obj, '$.user.followers_count') IS NOT NULL AND JSON_VALUE(tweet_json_obj, '$.user.created_at') IS NOT NULL AND JSON_VALUE(tweet_json_obj, '$.user.translator_type') IS NOT NULL; CREATE TABLE tweets AS SELECT JSON_VALUE(data, '$.id') AS tweet_id, JSON_VALUE(data, '$.user.id') AS user_id, JSON_VALUE(data, '$.created_at') AS created_at, JSON_VALUE(data, '$.in_reply_to_user_id') AS in_reply_to_user_id, JSON_VALUE(data, '$.in_reply_to_status_id') AS in_reply_to_status_id, JSON_VALUE(data, '$.place.country') AS country, JSON_VALUE(data, '$.reply_count') AS reply_count, JSON_VALUE(data, '$.quote_count') AS quote_count, JSON_VALUE(data, '$.retweet_count') AS retweet_count, JSON_VALUE(data, '$.favorite_count') AS favorite_count, JSON_VALUE(data, '$.retweeted') AS retweeted, JSON_VALUE(data, '$.lang') AS lang, JSON_VALUE(data, '$.source') AS source, JSON_VALUE(data, '$.is_quote_status') AS is_quote_status FROM twitter_db.data_db WHERE JSON_VALUE(data, '$.id') IS NOT NULL AND JSON_VALUE(data, '$.user.id') IS NOT NULL AND JSON_VALUE(data, '$.created_at') IS NOT NULL AND JSON_VALUE(data, '$.in_reply_to_user_id') IS NOT NULL AND JSON_VALUE(data, '$.in_reply_to_status_id') IS NOT NULL AND JSON_VALUE(data, '$.place.country') IS NOT NULL AND JSON_VALUE(data, '$.reply_count') IS NOT NULL AND JSON_VALUE(data, '$.quote_count') IS NOT NULL AND JSON_VALUE(data, '$.retweet_count') IS NOT NULL AND JSON_VALUE(data, '$.favorite_count') IS NOT NULL AND JSON_VALUE(data, '$.favorited') IS NOT NULL AND JSON_VALUE(data, '$.retweeted') IS NOT NULL AND JSON_VALUE(data, '$.lang') IS NOT NULL AND JSON_VALUE(data, '$.source') IS NOT NULL AND JSON_VALUE(data, '$.is_quote_status') IS NOT NULL; CREATE TABLE entities AS SELECT JSON_VALUE(tweet_json_obj, '$.extended_tweet.entities.hashtags') AS hashtags, JSON_VALUE(tweet_json_obj, '$.extended_tweet.entities.user_mentions[0].id') AS user_mention0_id, JSON_VALUE(tweet_json_obj, '$.extended_tweet.entities.user_mentions[1].id') AS user_mention1_id, JSON_VALUE(tweet_json_obj, '$.extended_tweet.entities.user_mentions[0].name') AS user_mention0_name, JSON_VALUE(tweet_json_obj, '$.extended_tweet.entities.user_mentions[1].name') AS user_mention1_name, JSON_VALUE(tweet_json_obj, '$.extended_tweet.entities.user_mentions[0].screen_name') AS user_mention0_screen, JSON_VALUE(tweet_json_obj, '$.extended_tweet.entities.user_mentions[1].screen_name') AS user_mention1_screen FROM twitter_db.data_db WHERE JSON_VALUE(tweet_json_obj, '$.extended_tweet.entities.hashtags') IS NOT NULL AND JSON_VALUE(tweet_json_obj, '$.extended_tweet.entities.user_mentions[0].id') IS NOT NULL AND JSON_VALUE(tweet_json_obj, '$.extended_tweet.entities.user_mentions[1].id') IS NOT NULL AND JSON_VALUE(tweet_json_obj, '$.extended_tweet.entities.user_mentions[0].name') IS NOT NULL AND JSON_VALUE(tweet_json_obj, '$.extended_tweet.entities.user_mentions[1].name') IS NOT NULL AND JSON_VALUE(tweet_json_obj, '$.extended_tweet.entities.user_mentions[0].screen_name') IS NOT NULL AND JSON_VALUE(tweet_json_obj, '$.extended_tweet.entities.user_mentions[1].screen_name') IS NOT NULL AND JSON_VALUE(data, '$.lang') = 'en'; OR JSON_VALUE(data, '$.lang') = 'nl'
Comments