from sklearn.model_selection import KFold, cross_val_score, GridSearchCV from sklearn.neighbors import KNeighborsRegressor from sklearn import metrics from sklearn.metrics import r2_score from sklearn.linear_model import LinearRegression from sklearn.ensemble import RandomForestRegressor from xgboost import XGBRegressor from sklearn.tree import DecisionTreeRegressor from sklearn.model_selection import train_test_split x = df.drop(['Customer_Lifetime_Value'],axis=1) y = df['Customer_Lifetime_Value'] x_train,x_test,y_train,y_test=train_test_split(x\ ,y,test_size=0.30,random_state=42) class Model_Selector(): def __init__(self): print("Model Selector object created") """ This method helps to select the best machine learning model to compute the relationship betweem i/p and d/p variable """ def Regression_Model_Selector(self,df): seed = 42 models = [] models.append(("LR", LinearRegression())) models.append(("RF", RandomForestRegressor())) models.append(("KNN", KNeighborsRegressor())) models.append(("CART", DecisionTreeRegressor())) models.append(("XGB", XGBRegressor())) result = [] names = [] scoring = 'r2' seed = 42 for name, model in models: kfold = KFold(n_splits = 5, random_state =seed) cv_results = cross_val_score(model, x_train,\ y_train, cv = kfold, scoring = scoring) result.append(cv_results) names.append(name) msg = (name, cv_results.mean(), cv_results.std()) print(msg) fig = plt.figure(figsize = (8,4)) fig.suptitle('Algorithm Comparison') ax = fig.add_subplot(1,1,1) plt.boxplot(result) ax.set_xticklabels(names) plt.show()