# Make copy to avoid changing original data (when imputing) X_train_plus = X_train.copy() X_valid_plus = X_valid.copy() # Make new columns indicating what will be imputed for col in cols_with_missing: X_train_plus[col + '_was_missing'] = X_train_plus[col].isnull() X_valid_plus[col + '_was_missing'] = X_valid_plus[col].isnull() # Imputation my_imputer = SimpleImputer() imputed_X_train_plus = pd.DataFrame(my_imputer.fit_transform(X_train_plus)) imputed_X_valid_plus = pd.DataFrame(my_imputer.transform(X_valid_plus)) # Imputation removed column names; put them back imputed_X_train_plus.columns = X_train_plus.columns imputed_X_valid_plus.columns = X_valid_plus.columns print("MAE from Approach 3 (An Extension to Imputation):") print(score_dataset(imputed_X_train_plus, imputed_X_valid_plus, y_train, y_valid))
Preview:
downloadDownload PNG
downloadDownload JPEG
downloadDownload SVG
Tip: You can change the style, width & colours of the snippet with the inspect tool before clicking Download!
Click to optimize width for Twitter