Feature Selection

PHOTO EMBED

Thu Aug 24 2023 15:53:13 GMT+0000 (Coordinated Universal Time)

Saved by @sumikk ##partialdependencyplot #info.column_information(df)info.agg_tabulation(df)info.num_count_summary(df)info.statistical_summary(df)

from sklearn.feature_selection import RFE
from catboost import CatBoostRegressor

class Feature_Selection(Data_Modelling):

    def __init__(self,n_estimators,
                    max_depth,
                    min_samples_split,
                    min_samples_leaf,
                    max_leaf_nodes,
                    min_impurity_split,
                    min_impurity_decrease,
                    bootstrap,
                    min_child_weight,
                    learning_rate,
                    Subsample,
                    Alpha,
                    Lamda,
                    random_state,
                    criterion):
        
        Data_Modelling.__init__(self,n_estimators,
                    max_depth,
                    min_samples_split,
                    min_samples_leaf,
                    max_leaf_nodes,
                    min_impurity_split,
                    min_impurity_decrease,
                    bootstrap,
                    min_child_weight,
                    learning_rate,
                    Subsample,
                    Alpha,
                    Lamda,
                    random_state,
                    criterion)
        print("Feature Selection object created")
        
    def Regression_Feature_Selector(self,data):
        estimator = RandomForestRegressor(n_estimators = self.n_estimators,
                    max_depth = self.max_depth,
                    min_samples_split = self.min_samples_split,
                    min_samples_leaf = self.min_samples_leaf,
                    max_leaf_nodes = self.max_leaf_nodes,
                    bootstrap = self.bootstrap,
                    criterion = self.criterion)
        
        selector = RFE(estimator,6,step=1)
        selector = selector.fit(x_train,y_train)
        rank =pd.DataFrame(selector.ranking_,\
                        columns=['Importance'])
        Columns = pd.DataFrame(x_train.columns,\
                            columns=['Columns'])
        Var = pd.concat([rank,Columns],axis=1)
        Var.sort_values(["Importance"], axis=0,\
                    ascending=True, inplace=True) 
        return Var


FS = Feature_Selection(500,5,3,3,None,1,0.1,True,3,0.07,0.7,0,1.5,29,'mse')
content_copyCOPY