# correct approach for normalizing the data after the data is split before the model is evaluated from sklearn.datasets import make_classification from sklearn.model_selection import train_test_split from sklearn.preprocessing import MinMaxScaler from sklearn.linear_model import LogisticRegression from sklearn.metrics import accuracy_score # define dataset X, y = make_classification(n_samples=1000, n_features=20, n_informative=15, n_redundant=5, random_state=7) # split into train and test sets X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.33, random_state=1) # define the scaler scaler = MinMaxScaler() # fit on the training dataset scaler.fit(X_train) # scale the training dataset X_train = scaler.transform(X_train) # scale the test dataset X_test = scaler.transform(X_test) # fit the model model = LogisticRegression() model.fit(X_train, y_train) # evaluate the model yhat = model.predict(X_test) # evaluate predictions accuracy = accuracy_score(y_test, yhat) print('Accuracy: %.3f' % (accuracy*100))