from pymongo import MongoClient # MongoDB connection details mongo_uri = "mongodb://localhost:27017/" database_name = "twitter_database" # Source and target collections source_collection_name = "tweets" target_collection_name = "tweet_data" # columns to get fields_to_extract = [ "json_data.id", "json_data.user.id", "json_data.created_at", "json_data.in_reply_to_status_id", "json_data.in_reply_to_user_id", "json_data.lang", "json_data.place", "json_data.user.location", "json_data.is_quote_status" ] # for the nested thing-didnt really get it but it works :) def get_nested_field(data, field_path): keys = field_path.split('.') for key in keys: data = data.get(key) if data is None: return None return data # Connect to MongoDB client = MongoClient(mongo_uri) db = client[database_name] source_collection = db[source_collection_name] target_collection = db[target_collection_name] # Loop through each document in the source collection cursor = source_collection.find() for doc in cursor: # Create a new document with only the specified fields new_doc = {field: get_nested_field(doc, field) for field in fields_to_extract} new_doc["_id"] = doc["_id"] # Keep the original _id # Insert the new document into the target collection target_collection.insert_one(new_doc) client.close() print("Data successfully transferred to new collections.")
Preview:
downloadDownload PNG
downloadDownload JPEG
downloadDownload SVG
Tip: You can change the style, width & colours of the snippet with the inspect tool before clicking Download!
Click to optimize width for Twitter