df = pd.read_csv("..\\dataset\\ecommerce_sample.csv") df[time_axis] = pd.to_datetime(df[time_axis],format = '%d/%m/%Y') #get kpi_axis kpi_axis = 'kpi' time_axis = 'time' y_before = df[df[time_axis] <= '2019-09-11'][kpi_axis] y_after = df[df[time_axis] > '2019-09-11'][kpi_axis] df = binarize(df,df.columns,kpi_axis,time_axis,3) df_before = df[df[time_axis] <= '2019-09-11'] df_after = df[df[time_axis] > '2019-09-11'] features = copy(df.drop([time_axis,kpi_axis], axis=1).columns) K = 3 #number of subgroups to detect subgroups=[] score=[] for k in range(0,K): CATE = [] y_before = df_before[kpi_axis] y_after= df_after[kpi_axis] #compute CATEs for all subgroups for d in features: g = df_before[d] == True m_before = np.mean(y_before[g]) g = df_after[d] == True m_after = np.mean(y_after[g]) CATE.append(m_after-m_before) #find subgroup with biggest CATE index = np.argsort(-abs(np.array(CATE))) subgroups.append(features[index[0]]) score.append(abs( CATE [index[0]])) #remove found subgroups from dataset df_before = df_before[df_before[features[index[0]]] == False] df_after = df_after[df_after[features[index[0]]] == False] features = features.drop(features[index[0]]) p = figure(x_range=subgroups,title = "Conditional Average Treatment Effect",plot_width=1200,) p.vbar(x=subgroups, top=score,width = 0.8,color='black') show(p)