Snippets Collections
import sklearn.datasets as datasets
X, y = datasets.make_regression(n_features=1, n_informative=1)

diabetes = datasets.load_diabetes()
X_diabets, y_diabetes = diabetes.data, diabetes.target
# correct approach for normalizing the data after the data is split before the model is evaluated
from sklearn.datasets import make_classification
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import MinMaxScaler
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import accuracy_score
# define dataset
X, y = make_classification(n_samples=1000, n_features=20, n_informative=15, n_redundant=5, random_state=7)
# split into train and test sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.33, random_state=1)
# define the scaler
scaler = MinMaxScaler()
# fit on the training dataset
scaler.fit(X_train)
# scale the training dataset
X_train = scaler.transform(X_train)
# scale the test dataset
X_test = scaler.transform(X_test)
# fit the model
model = LogisticRegression()
model.fit(X_train, y_train)
# evaluate the model
yhat = model.predict(X_test)
# evaluate predictions
accuracy = accuracy_score(y_test, yhat)
print('Accuracy: %.3f' % (accuracy*100))
...
# define the scaler
scaler = MinMaxScaler()
# fit on the training dataset
scaler.fit(X_train)
# scale the training dataset
X_train = scaler.transform(X_train)
# scale the test dataset
X_test = scaler.transform(X_test)
star

Sat Jan 15 2022 11:50:41 GMT+0000 (Coordinated Universal Time)

#python #sklearn #prediction #ml
star

Fri Jan 14 2022 15:03:21 GMT+0000 (Coordinated Universal Time) https://machinelearningmastery.com/data-preparation-without-data-leakage/

#python #prediction #regression
star

Fri Jan 14 2022 14:40:01 GMT+0000 (Coordinated Universal Time)

#python #scaling #prediction

Save snippets that work with our extensions

Available in the Chrome Web Store Get Firefox Add-on Get VS Code extension