Creating tweet_data table without languges

PHOTO EMBED

Tue May 28 2024 21:33:09 GMT+0000 (Coordinated Universal Time)

Saved by @madgakantara

from pymongo import MongoClient

# MongoDB connection details
mongo_uri = "mongodb://localhost:27017/"
database_name = "twitter_database"

# Source and target collections
source_collection_name = "tweets"
target_collection_name = "tweet_data"

# columns to get
fields_to_extract = [
    "json_data.id",
    "json_data.user.id",
    "json_data.created_at",
    "json_data.in_reply_to_status_id",
    "json_data.in_reply_to_user_id",
    "json_data.lang",
    "json_data.place",
    "json_data.user.location",
    "json_data.is_quote_status"
]

# for the nested thing-didnt really get it but it works :)
def get_nested_field(data, field_path):
    keys = field_path.split('.')
    for key in keys:
        data = data.get(key)
        if data is None:
            return None
    return data

# Connect to MongoDB
client = MongoClient(mongo_uri)
db = client[database_name]
source_collection = db[source_collection_name]
target_collection = db[target_collection_name]

# Loop through each document in the source collection
cursor = source_collection.find()
for doc in cursor:
    # Create a new document with only the specified fields
    new_doc = {field: get_nested_field(doc, field) for field in fields_to_extract}
    new_doc["_id"] = doc["_id"]  # Keep the original _id
    
    # Insert the new document into the target collection
    target_collection.insert_one(new_doc)

client.close()
print("Data successfully transferred to new collections.")
content_copyCOPY