# Split train and test from the new dataset with clustering labels from part #4 X = df_emotions_labeled.drop('sentiment', axis=1) y = df_emotions['sentiment'] # Create oversampler (prevously defined) # oversampler = RandomOverSampler() # Fit and transform the data X_resampled_labeled, y_resampled_labeled = oversampler.fit_resample(X, y) # Split the dataset into training and testing sets with stratified sampling X_train_lbl, X_test_lbl, y_train_lbl, y_test_lbl = train_test_split(X_resampled_labeled, y_resampled_labeled, test_size=0.2, stratify=y_resampled_labeled, random_state=42) # Concatenate them train_lbl = pd.concat([X_train_lbl, y_train_lbl], axis=1) test_lbl = pd.concat([X_test_lbl, y_test_lbl], axis=1)
Preview:
downloadDownload PNG
downloadDownload JPEG
downloadDownload SVG
Tip: You can change the style, width & colours of the snippet with the inspect tool before clicking Download!
Click to optimize width for Twitter