import pandas as pd from sklearn.model_selection import train_test_split # Load the data from the CSV file data = pd.read_csv("data.csv") # Split the data into train and validation sets, using 85% of the data for training and 15% for validation for the "labels" column train_data, validation_data = train_test_split(data, train_size=0.85, test_size=0.15, random_state=42, stratify=data["labels"]) # Write the train and validation datasets to CSV files train_data.to_csv("train.csv", index=False) validation_data.to_csv("valid.csv", index=False)
Preview:
downloadDownload PNG
downloadDownload JPEG
downloadDownload SVG
Tip: You can change the style, width & colours of the snippet with the inspect tool before clicking Download!
Click to optimize width for Twitter