from pymongo import MongoClient, errors

mongo_uri = "mongodb://localhost:27017/"
database_name = "twitter_database"
source_collection_name = "tweets"
target_collection_name = "tweet_cleaned"

fields_to_extract = [
    "json_data.id",
    "json_data.user.id",
    "json_data.created_at",
    "json_data.text",
    "json_data.user.created_at",
    "json_data.in_reply_to_status_id",
    "json_data.in_reply_to_user_id",
    "json_data.user.description",
    "json_data.quoted_status_id",
    "json_data.extended_tweet.full_text",
    "json_data.entities.user_mentions.0.id",
    "json_data.entities.user_mentions.1.id",
    "json_data.entities.user_mentions.2.id",
    "json_data.lang",
    "json_data.entities.hashtags",
    "json_data.user.location",
    "json_data.is_quote_status",
]

def get_nested_field(data, field_path):
    keys = field_path.split('.')
    for key in keys:
        if isinstance(data, list):
            try:
                key = int(key)
                data = data[key]
            except (ValueError, IndexError):
                return None
        elif isinstance(data, dict):
            data = data.get(key)
        else:
            return None
        if data is None:
            return None
    return data

try:
    client = MongoClient(mongo_uri, serverSelectionTimeoutMS=5000)
    db = client[database_name]
    source_collection = db[source_collection_name]
    target_collection = db[target_collection_name]

    cursor = source_collection.find()

    for doc in cursor:
        lang = get_nested_field(doc, "json_data.lang")
        
        if lang in ["en", "nl", "es"]:
            new_doc = {field: get_nested_field(doc, field) for field in fields_to_extract}
            new_doc["_id"] = doc["_id"]
            
            # Insert the new document into the target collection
            target_collection.insert_one(new_doc)

    print("Data successfully transferred to new collections.")

except errors.ServerSelectionTimeoutError as err:
    print("Failed to connect to MongoDB server:", err)
except errors.PyMongoError as err:
    print("An error occurred while working with MongoDB:", err)
finally:
    client.close()