import statsmodels.api as sm class Data_Modelling(object): def __init__(self,n_estimators, max_depth, min_samples_split, min_samples_leaf, max_leaf_nodes, min_impurity_split, min_impurity_decrease, bootstrap, min_child_weight, learning_rate, Subsample, Alpha, Lamda, random_state, criterion): self.n_estimators = 150 self.max_depth = 5 self.min_samples_split = 3 self.min_samples_leaf = 3 self.max_leaf_nodes = None self.min_impurity_split = 1 self.min_impurity_decrease = 0.1 self.bootstrap = True self.min_child_weight = 3 self.learning_rate = 0.07 self.Subsample = 0.7 self.Alpha = 0 self.Lamda = 1.5 self.random_state = 29 self.criterion = 'mse' print("Data Modelling object created") def OLS_Summary(self,data): model2 =sm.OLS(y_train,x_train).fit() return model2.summary() def Linear_Regression_Model(self,df): regressor = LinearRegression() reg=regressor.fit(x_train,y_train) LR_pred=regressor.predict(x_test) LR_RMSE = np.sqrt(metrics.mean_squared_error(y_test,LR_pred)) LR_r2_score = r2_score(y_test,LR_pred) return LR_RMSE,LR_r2_score def Decision_Tree_Model(self,df): DT_Regressor = DecisionTreeRegressor(max_depth = self.max_depth, min_samples_split = self.min_samples_split, min_samples_leaf = self.min_samples_leaf, max_leaf_nodes = self.max_leaf_nodes, min_impurity_split = self.min_impurity_split, min_impurity_decrease = self.min_impurity_decrease, random_state = self.random_state) DT_Regressor.fit(x_train,y_train) DT_pred=DT_Regressor.predict(x_test) DT_RMSE = np.sqrt(metrics.mean_squared_error(y_test,DT_pred)) DT_r2_score = r2_score(y_test,DT_pred) return DT_RMSE,DT_r2_score def Random_Forest_Model(self,df): RF_Regressor = RandomForestRegressor(n_estimators = self.n_estimators, max_depth = self.max_depth, min_samples_split = self.min_samples_split, min_samples_leaf = self.min_samples_leaf, max_leaf_nodes = self.max_leaf_nodes, bootstrap = self.bootstrap, criterion = self.criterion) RF_Regressor.fit(x_train,y_train) RF_pred=RF_Regressor.predict(x_test) RF_RMSE = np.sqrt(metrics.mean_squared_error(y_test,RF_pred)) RF_r2_score = r2_score(y_test,RF_pred) return RF_RMSE,RF_r2_score def Extreme_Gradient_Boosting_Model(self,df): XGB_Regressor = XGBRegressor(n_estimators = self.n_estimators, learning_rate = self.learning_rate, max_depth = self.max_depth, min_child_weight = self.min_child_weight, random_state = self.random_state, Subsample = self.Subsample, Alpha = self.Alpha, Lamda = self.Lamda) XGB_Regressor.fit(x_train,y_train) XGB_pred=XGB_Regressor.predict(x_test) XGB_RMSE = np.sqrt(metrics.mean_squared_error(y_test,XGB_pred)) XGB_r2_score = r2_score(y_test,XGB_pred) return XGB_RMSE,XGB_r2_score # base model Basemodell = Data_Modelling(500,5,3,3,None,1,0.1,True,3,0.07,0.7,0,1.5,29,'mse') Basemodell.OLS_Summary(df) Basemodell.Linear_Regression_Model(df) Basemodell.Decision_Tree_Model(df) Basemodell.Random_Forest_Model(df) Basemodell.Extreme_Gradient_Boosting_Model(df)
Preview:
downloadDownload PNG
downloadDownload JPEG
downloadDownload SVG
Tip: You can change the style, width & colours of the snippet with the inspect tool before clicking Download!
Click to optimize width for Twitter