Tabella Decili/Percentili

PHOTO EMBED

Thu Jan 13 2022 11:34:36 GMT+0000 (Coordinated Universal Time)

Saved by @MatteoK #python

def new_decile_table(y_true, y_pred, y_prob, change_deciles=10, labels=True, round_decimal=3):
""""""
y_true = np.array(y_true)
y_pred = np.array(y_pred)
y_prob = np.array(y_prob)

df = pd.DataFrame()
df['y_true'] = y_true
df['y_pred'] = y_pred
df['y_prob'] = y_prob

df.sort_values('y_prob', ascending=False, inplace=True)
df['decile'] = np.linspace(1, change_deciles+1, len(df), False, dtype=int)

dt = df.groupby('decile').apply(lambda x: pd.Series([
np.min(x['y_prob']),
np.max(x['y_prob']),
np.mean(x['y_prob']),
np.size(x['y_prob']),
np.sum(x['y_true']),
np.size(x['y_true'][x['y_true']==0]),
np.size(x['y_true'][(x['y_true']==1) & (x['y_pred']==1)]), # True Positive
np.size(x['y_true'][(x['y_true']==1) & (x['y_pred']==0)]), # False Negative
np.size(x['y_true'][(x['y_true']==0) & (x['y_pred']==1)]), # False Positive
np.size(x['y_true'][(x['y_true']==0) & (x['y_pred']==0)]), # False Positive
],
index=(["prob_min",
"prob_max",
"prob_avg",
"count_istanze",
"actual_1",
"actual_0",
"true_positive",
"false_negative",
"false_positive",
"true_negative"])
)).reset_index()


dt['prob_min']=dt['prob_min'].round(round_decimal)
dt['prob_max']=dt['prob_max'].round(round_decimal)
dt['prob_avg']=round(dt['prob_avg'],round_decimal)

dt['precision_1'] = dt['true_positive'] / (dt['true_positive']+dt['false_positive'])
dt['recall_1'] = dt['true_positive'] / (dt['true_positive']+dt['false_negative'])
dt["f1_score"] = (dt['precision_1'] * dt['recall_1']) / (dt['precision_1'] + dt['recall_1'])

return dt
content_copyCOPY