from pymongo import MongoClient, errors
mongo_uri = "mongodb://localhost:27017/"
database_name = "twitter_database"
source_collection_name = "tweets"
target_collection_name = "final"
fields_to_extract = [
"json_data.id",
"json_data.user.id",
"json_data.created_at",
"json_data.in_reply_to_status_id",
"json_data.in_reply_to_user_id",
"json_data.entities.user_mentions",
"json_data.lang",
"json_data.user.location",
]
airline_id_wanted =[56377143, 106062176, 124476322, 18332190, 22536055,20626359]
def get_nested_field(data, field_path):
keys = field_path.split('.')
for key in keys:
if isinstance(data, list):
try:
key = int(key)
data = data[key]
except (ValueError, IndexError):
return None
elif isinstance(data, dict):
data = data.get(key)
else:
return None
if data is None:
return None
return data
try:
client = MongoClient(mongo_uri, serverSelectionTimeoutMS=5000)
db = client[database_name]
source_collection = db[source_collection_name]
target_collection = db[target_collection_name]
cursor = source_collection.find()
for doc in cursor:
new_doc = {field: get_nested_field(doc, field) for field in fields_to_extract}
new_doc["_id"] = doc["_id"]
# Check if extended_tweet.full_text is present, if not use json_data.text
full_text = get_nested_field(doc, "json_data.extended_tweet.full_text")
if full_text:
new_doc["json_data.text"] = full_text
else:
new_doc["json_data.text"] = get_nested_field(doc, "json_data.text")
# Insert the new document into the target collection
target_collection.insert_one(new_doc)
print("Data successfully transferred to new collections.")
except errors.ServerSelectionTimeoutError as err:
print("Failed to connect to MongoDB server:", err)
except errors.PyMongoError as err:
print("An error occurred while working with MongoDB:", err)
finally:
client.close()
Preview:
downloadDownload PNG
downloadDownload JPEG
downloadDownload SVG
Tip: You can change the style, width & colours of the snippet with the inspect tool before clicking Download!
Click to optimize width for Twitter