[EDA] : Feature Importance using randomforest

PHOTO EMBED

Thu Jul 21 2022 16:59:36 GMT+0000 (Coordinated Universal Time)

Saved by @instadeath

#features is the dataframe of features which are passed
#target is the dataframe of target variable whose feature importance needs to be evaluated

def feature_importance(features,target):
# define dataset
    X = features
    print(X.shape)
    target_columns = target.columns
    for i in target_columns:
        print(i)
        y= target.loc[:,[i]]
        print(y.columns)
        model = RandomForestClassifier()
        model.fit(X, y)
        # display the relative importance of each attribute
        importances = model.feature_importances_
        #Sort it
        sorted_feature_importance = sorted(zip(importances, list(X)), reverse=True)
        df = pd.DataFrame(sorted_feature_importance, columns = ['feature_importance', 'widget name'])
        df =  df[['widget name','feature_importance']]
        final[i] = df
content_copyCOPY

http://localhost:8889/notebooks/Desktop/Swiggy/2. Codes/Python notebooks/Widgets contribution to HM.ipynb