Hyper Parameter Tuning

PHOTO EMBED

Thu Aug 24 2023 16:01:42 GMT+0000 (Coordinated Universal Time)

Saved by @sumikk ##partialdependencyplot #info.column_information(df)info.agg_tabulation(df)info.num_count_summary(df)info.statistical_summary(df)

from sklearn.model_selection import RandomizedSearchCV
class Model_Regression_HyperParameter_Tuning():
    

    def __init__(self):
        
        
        print("HyperParameter_Tuning object created")
        
    class XGB_Regressor_HyperParameter_Tuning():
    

        def __init__(self):

            print("XGB HyperParameter_Tuning object created")


        def Fit_XGB_HyperParameter_Tuner(self,dftrain):
            

            xgb_Reg = XGBRegressor(tree_method = "exact", predictor = "cpu_predictor",
                                        objective = "reg:squarederror")


            parameters = {"learning_rate": [0.1, 0.01, 0.001],
                           "gamma" : [0.01, 0.1, 0.3, 0.5, 1, 1.5, 2],
                           "max_depth": [2, 4, 7, 10],
                           "colsample_bytree": [0.3, 0.6, 0.8, 1.0],
                           "subsample": [0.2, 0.4, 0.5, 0.6, 0.7],
                           "reg_alpha": [0, 0.5, 1],
                           "reg_lambda": [1, 1.5, 2, 3, 4.5],
                           "min_child_weight": [1, 3, 5, 7],
                           "n_estimators": [100, 250, 500, 1000]}


            xgb_rscv = RandomizedSearchCV(xgb_Reg, param_distributions = parameters, scoring = "r2",
                                         cv = 3, random_state = 29 )

            # Fit the model
            model_xgboost = xgb_rscv.fit(x_train, y_train)
            return model_xgboost
        
        
        def XGB_Get_Best_Prams(self):
            
            print("Learning Rate: ", Xgb_model.best_estimator_.get_params()["learning_rate"])
            print("Gamma: ", Xgb_model.best_estimator_.get_params()["gamma"])
            print("Max Depth: ", Xgb_model.best_estimator_.get_params()["max_depth"])
            print("Subsample: ", Xgb_model.best_estimator_.get_params()["subsample"])
            print("Max Features at Split: ", Xgb_model.best_estimator_.get_params()["colsample_bytree"])
            print("Alpha: ", Xgb_model.best_estimator_.get_params()["reg_alpha"])
            print("Lamda: ", Xgb_model.best_estimator_.get_params()["reg_lambda"])
            print("Minimum Sum of the Instance Weight Hessian to Make a Child: ",Xgb_model.best_estimator_.get_params()["min_child_weight"])
            print("Number of Trees: ", Xgb_model.best_estimator_.get_params()["n_estimators"])


        
        def get_Regressor_result(self,modelname,y_test):
            
            
            
            RF_pred = modelname.predict(x_test)
            
            RF_RMSE = np.sqrt(metrics.mean_squared_error(y_test,RF_pred))
        
            RF_r2_score = r2_score(y_test,RF_pred)
        
            return RF_RMSE,RF_r2_score

        
        class RF_Regressor_HyperParameter_Tuning():
    

            def __init__(self):

                print("RF HyperParameter_Tuning object created")


            def Fit_RF_HyperParameter_Tuner(self,dftrain):
                
                

                param_grid = {"max_depth": [1, 3, 5, 7, 9, 10],
                              "min_samples_split": [1, 3, 10, 15, 20],
                              "min_samples_leaf": [1, 3, 5, 10],
                              "bootstrap": [True, False],
                              "criterion": ["mse", "mae"],
                              "n_estimators": [100, 250, 500, 1000]}

                Reg = RandomForestRegressor(random_state=29, n_jobs=-1)
                model = RandomizedSearchCV(Reg, param_grid, scoring = 'r2', cv=3)

                model.fit(x_train, y_train)

                return model

            def RF_Get_Best_Prams(self):
                
                
                

                print("n_estimators: ", RF_model.best_estimator_.get_params()["n_estimators"])
                print("Max Depth: ", RF_model.best_estimator_.get_params()["max_depth"])
                print("min_samples_split: ", RF_model.best_estimator_.get_params()["min_samples_split"])
                print("min_samples_leaf: ", RF_model.best_estimator_.get_params()["min_samples_leaf"])
                print("max_leaf_nodes: ", RF_model.best_estimator_.get_params()["max_leaf_nodes"])
                print("bootstrap: ", RF_model.best_estimator_.get_params()["bootstrap"])
                print("criterion: ",RF_model.best_estimator_.get_params()["criterion"])

            def Evaluation_Result(self,modelname,y_test):
                
                XGB_pred = modelname.predict(x_test)
                
                XGB_RMSE = np.sqrt(metrics.mean_squared_error(y_test,XGB_pred))

                XGB_r2_score = r2_score(y_test,XGB_pred)

                return XGB_RMSE,XGB_r2_score



HP_XGB = Model_Regression_HyperParameter_Tuning().XGB_Regressor_HyperParameter_Tuning()
HP_RF = Model_Regression_HyperParameter_Tuning().XGB_Regressor_HyperParameter_Tuning().RF_Regressor_HyperParameter_Tuning

Xgb_model = HP_XGB.Fit_XGB_HyperParameter_Tuner(df)


HP_XGB.XGB_Get_Best_Prams()


HP_XGB.get_Regressor_result(Xgb_model,y_test)

RF_model = HP_RF.Fit_RF_HyperParameter_Tuner(df)

HP_RF.RF_Get_Best_Prams()

HP_RF.Evaluation_Result(RF_model,y_test)
content_copyCOPY

The parameters, called hyperparameters, that define the performance of the machine learning algorithm (model), depends on the problem we are trying to solve. Thus, they need to be configured accordingly. This process of finding the best set of parameters is called hyperparameter optimization. The grid search is an exhaustive search through a set of manually specified set of values of hyperparameters. It means you have a set of models (which differ from each other in their parameter values, which lie on a grid). What you do is you then train each of the models and evaluate it using cross-validation. You then select the one that performed best.