Model Building

PHOTO EMBED

Thu Aug 24 2023 15:52:09 GMT+0000 (Coordinated Universal Time)

Saved by @sumikk ##partialdependencyplot #info.column_information(df)info.agg_tabulation(df)info.num_count_summary(df)info.statistical_summary(df)

import statsmodels.api as sm

class Data_Modelling(object):
    

    def __init__(self,n_estimators,
                    max_depth,
                    min_samples_split,
                    min_samples_leaf,
                    max_leaf_nodes,
                    min_impurity_split,
                    min_impurity_decrease,
                    bootstrap,
                    min_child_weight,
                    learning_rate,
                    Subsample,
                    Alpha,
                    Lamda,
                    random_state,
                    criterion):
        
        self.n_estimators = 150
        self.max_depth = 5
        self.min_samples_split = 3
        self.min_samples_leaf = 3
        self.max_leaf_nodes = None
        self.min_impurity_split = 1
        self.min_impurity_decrease = 0.1
        self.bootstrap = True
        self.min_child_weight = 3
        self.learning_rate = 0.07
        self.Subsample = 0.7
        self.Alpha = 0
        self.Lamda = 1.5
        self.random_state = 29 
        self.criterion = 'mse'
        
        print("Data Modelling object created")
        
        
    def OLS_Summary(self,data):
        
        model2 =sm.OLS(y_train,x_train).fit()
        
        return model2.summary()
        
        
    def Linear_Regression_Model(self,df):
        
        regressor = LinearRegression()
        
        reg=regressor.fit(x_train,y_train)
        
        LR_pred=regressor.predict(x_test)
        
        LR_RMSE = np.sqrt(metrics.mean_squared_error(y_test,LR_pred))
        
        LR_r2_score = r2_score(y_test,LR_pred)
        
        return LR_RMSE,LR_r2_score
        
        
    def Decision_Tree_Model(self,df):
        
        DT_Regressor = DecisionTreeRegressor(max_depth = self.max_depth,
                    min_samples_split = self.min_samples_split,
                    min_samples_leaf = self.min_samples_leaf,
                    max_leaf_nodes = self.max_leaf_nodes,
                    min_impurity_split = self.min_impurity_split,
                    min_impurity_decrease = self.min_impurity_decrease,
                    random_state = self.random_state)
        
        DT_Regressor.fit(x_train,y_train)
        
        DT_pred=DT_Regressor.predict(x_test)
        
        DT_RMSE = np.sqrt(metrics.mean_squared_error(y_test,DT_pred))
        
        DT_r2_score = r2_score(y_test,DT_pred)
        
        return DT_RMSE,DT_r2_score
        
        
    def Random_Forest_Model(self,df):
        
        RF_Regressor = RandomForestRegressor(n_estimators = self.n_estimators,
                    max_depth = self.max_depth,
                    min_samples_split = self.min_samples_split,
                    min_samples_leaf = self.min_samples_leaf,
                    max_leaf_nodes = self.max_leaf_nodes,
                    bootstrap = self.bootstrap,
                    criterion = self.criterion)
        
        RF_Regressor.fit(x_train,y_train)
        
        RF_pred=RF_Regressor.predict(x_test)
        
        RF_RMSE = np.sqrt(metrics.mean_squared_error(y_test,RF_pred))
        
        RF_r2_score = r2_score(y_test,RF_pred)
        
        return RF_RMSE,RF_r2_score
    
    
    def Extreme_Gradient_Boosting_Model(self,df):
        
        XGB_Regressor = XGBRegressor(n_estimators = self.n_estimators,
                    learning_rate = self.learning_rate,
                    max_depth = self.max_depth,
                    min_child_weight = self.min_child_weight,
                    random_state = self.random_state,
                    Subsample = self.Subsample,
                    Alpha = self.Alpha,
                    Lamda = self.Lamda)
        
        XGB_Regressor.fit(x_train,y_train)
        
        XGB_pred=XGB_Regressor.predict(x_test)
        
        XGB_RMSE = np.sqrt(metrics.mean_squared_error(y_test,XGB_pred))
        
        XGB_r2_score = r2_score(y_test,XGB_pred)
        
        return XGB_RMSE,XGB_r2_score

# base model
Basemodell = Data_Modelling(500,5,3,3,None,1,0.1,True,3,0.07,0.7,0,1.5,29,'mse')

Basemodell.OLS_Summary(df)
Basemodell.Linear_Regression_Model(df)
Basemodell.Decision_Tree_Model(df)
Basemodell.Random_Forest_Model(df)
Basemodell.Extreme_Gradient_Boosting_Model(df)
content_copyCOPY

Model Building involves setting up ways of collecting data, understanding and paying attention to what is important in the data to answer the questions you are asking, finding a statistical, mathematical or a simulation model to gain understanding and make predictions.