# Import the oversampling library from imblearn.over_sampling import RandomOverSampler # Separate the target variable from the features X = df_emotions.drop('sentiment', axis=1) y = df_emotions['sentiment'] # Create oversampler oversampler = RandomOverSampler() # Fit and transform the data X_resampled, y_resampled = oversampler.fit_resample(X, y) # Split the dataset into training and testing sets with stratified sampling X_train_v4, X_test_v4, y_train_v4, y_test_v4 = train_test_split(X_resampled, y_resampled, test_size=0.2, stratify=y_resampled, random_state=42) # Join X and Y into one table again train_v4 = pd.concat([X_train_v4, y_train_v4], axis=1) test_v4 = pd.concat([X_test_v4, y_test_v4], axis=1)
Preview:
downloadDownload PNG
downloadDownload JPEG
downloadDownload SVG
Tip: You can change the style, width & colours of the snippet with the inspect tool before clicking Download!
Click to optimize width for Twitter