from sklearn.model_selection import KFold, cross_val_score, GridSearchCV
from sklearn.neighbors import KNeighborsRegressor
from sklearn import metrics
from sklearn.metrics import r2_score
from sklearn.linear_model import LinearRegression
from sklearn.ensemble import RandomForestRegressor
from xgboost import XGBRegressor
from sklearn.tree import DecisionTreeRegressor
from sklearn.model_selection import train_test_split
x = df.drop(['Customer_Lifetime_Value'],axis=1)
y = df['Customer_Lifetime_Value']
x_train,x_test,y_train,y_test=train_test_split(x\
,y,test_size=0.30,random_state=42)
class Model_Selector():
def __init__(self):
print("Model Selector object created")
"""
This method helps to select
the best machine learning
model to compute the relationship
betweem i/p and d/p variable
"""
def Regression_Model_Selector(self,df):
seed = 42
models = []
models.append(("LR", LinearRegression()))
models.append(("RF", RandomForestRegressor()))
models.append(("KNN", KNeighborsRegressor()))
models.append(("CART", DecisionTreeRegressor()))
models.append(("XGB", XGBRegressor()))
result = []
names = []
scoring = 'r2'
seed = 42
for name, model in models:
kfold = KFold(n_splits = 5, random_state =seed)
cv_results = cross_val_score(model, x_train,\
y_train, cv = kfold, scoring = scoring)
result.append(cv_results)
names.append(name)
msg = (name, cv_results.mean(), cv_results.std())
print(msg)
fig = plt.figure(figsize = (8,4))
fig.suptitle('Algorithm Comparison')
ax = fig.add_subplot(1,1,1)
plt.boxplot(result)
ax.set_xticklabels(names)
plt.show()
Preview:
downloadDownload PNG
downloadDownload JPEG
downloadDownload SVG
Tip: You can change the style, width & colours of the snippet with the inspect tool before clicking Download!
Click to optimize width for Twitter