Blog: How causal 5
Fri Oct 15 2021 07:24:55 GMT+0000 (Coordinated Universal Time)
Saved by
@MerantixCode
df = pd.read_csv("..\\dataset\\ecommerce_sample.csv")
df[time_axis] = pd.to_datetime(df[time_axis],format = '%d/%m/%Y')
#get kpi_axis
kpi_axis = 'kpi'
time_axis = 'time'
y_before = df[df[time_axis] <= '2019-09-11'][kpi_axis]
y_after = df[df[time_axis] > '2019-09-11'][kpi_axis]
df = binarize(df,df.columns,kpi_axis,time_axis,3)
df_before = df[df[time_axis] <= '2019-09-11']
df_after = df[df[time_axis] > '2019-09-11']
features = copy(df.drop([time_axis,kpi_axis], axis=1).columns)
K = 3 #number of subgroups to detect
subgroups=[]
score=[]
for k in range(0,K):
CATE = []
y_before = df_before[kpi_axis]
y_after= df_after[kpi_axis]
#compute CATEs for all subgroups
for d in features:
g = df_before[d] == True
m_before = np.mean(y_before[g])
g = df_after[d] == True
m_after = np.mean(y_after[g])
CATE.append(m_after-m_before)
#find subgroup with biggest CATE
index = np.argsort(-abs(np.array(CATE)))
subgroups.append(features[index[0]])
score.append(abs( CATE [index[0]]))
#remove found subgroups from dataset
df_before = df_before[df_before[features[index[0]]] == False]
df_after = df_after[df_after[features[index[0]]] == False]
features = features.drop(features[index[0]])
p = figure(x_range=subgroups,title = "Conditional Average Treatment Effect",plot_width=1200,)
p.vbar(x=subgroups, top=score,width = 0.8,color='black')
show(p)
content_copyCOPY
Comments