import statsmodels.api as sm
class Data_Modelling(object):
def __init__(self,n_estimators,
max_depth,
min_samples_split,
min_samples_leaf,
max_leaf_nodes,
min_impurity_split,
min_impurity_decrease,
bootstrap,
min_child_weight,
learning_rate,
Subsample,
Alpha,
Lamda,
random_state,
criterion):
self.n_estimators = 150
self.max_depth = 5
self.min_samples_split = 3
self.min_samples_leaf = 3
self.max_leaf_nodes = None
self.min_impurity_split = 1
self.min_impurity_decrease = 0.1
self.bootstrap = True
self.min_child_weight = 3
self.learning_rate = 0.07
self.Subsample = 0.7
self.Alpha = 0
self.Lamda = 1.5
self.random_state = 29
self.criterion = 'mse'
print("Data Modelling object created")
def OLS_Summary(self,data):
model2 =sm.OLS(y_train,x_train).fit()
return model2.summary()
def Linear_Regression_Model(self,df):
regressor = LinearRegression()
reg=regressor.fit(x_train,y_train)
LR_pred=regressor.predict(x_test)
LR_RMSE = np.sqrt(metrics.mean_squared_error(y_test,LR_pred))
LR_r2_score = r2_score(y_test,LR_pred)
return LR_RMSE,LR_r2_score
def Decision_Tree_Model(self,df):
DT_Regressor = DecisionTreeRegressor(max_depth = self.max_depth,
min_samples_split = self.min_samples_split,
min_samples_leaf = self.min_samples_leaf,
max_leaf_nodes = self.max_leaf_nodes,
min_impurity_split = self.min_impurity_split,
min_impurity_decrease = self.min_impurity_decrease,
random_state = self.random_state)
DT_Regressor.fit(x_train,y_train)
DT_pred=DT_Regressor.predict(x_test)
DT_RMSE = np.sqrt(metrics.mean_squared_error(y_test,DT_pred))
DT_r2_score = r2_score(y_test,DT_pred)
return DT_RMSE,DT_r2_score
def Random_Forest_Model(self,df):
RF_Regressor = RandomForestRegressor(n_estimators = self.n_estimators,
max_depth = self.max_depth,
min_samples_split = self.min_samples_split,
min_samples_leaf = self.min_samples_leaf,
max_leaf_nodes = self.max_leaf_nodes,
bootstrap = self.bootstrap,
criterion = self.criterion)
RF_Regressor.fit(x_train,y_train)
RF_pred=RF_Regressor.predict(x_test)
RF_RMSE = np.sqrt(metrics.mean_squared_error(y_test,RF_pred))
RF_r2_score = r2_score(y_test,RF_pred)
return RF_RMSE,RF_r2_score
def Extreme_Gradient_Boosting_Model(self,df):
XGB_Regressor = XGBRegressor(n_estimators = self.n_estimators,
learning_rate = self.learning_rate,
max_depth = self.max_depth,
min_child_weight = self.min_child_weight,
random_state = self.random_state,
Subsample = self.Subsample,
Alpha = self.Alpha,
Lamda = self.Lamda)
XGB_Regressor.fit(x_train,y_train)
XGB_pred=XGB_Regressor.predict(x_test)
XGB_RMSE = np.sqrt(metrics.mean_squared_error(y_test,XGB_pred))
XGB_r2_score = r2_score(y_test,XGB_pred)
return XGB_RMSE,XGB_r2_score
# base model
Basemodell = Data_Modelling(500,5,3,3,None,1,0.1,True,3,0.07,0.7,0,1.5,29,'mse')
Basemodell.OLS_Summary(df)
Basemodell.Linear_Regression_Model(df)
Basemodell.Decision_Tree_Model(df)
Basemodell.Random_Forest_Model(df)
Basemodell.Extreme_Gradient_Boosting_Model(df)
Preview:
downloadDownload PNG
downloadDownload JPEG
downloadDownload SVG
Tip: You can change the style, width & colours of the snippet with the inspect tool before clicking Download!
Click to optimize width for Twitter