# Client base
def get_retail_clients(scorecard_file, output_file, ip, base_filter):
#segments
segment = ['d.Core Mass', 'c.Mid-Market', 'b.Affluent', 'a.Private']
segment2 = []
#scorecard attrition tags
attrited = ['BASE_ATTRITED', 'NEW_ATTRITED']
#initial filter
if base_filter==True:
filter_ = "cl_id2 in @segment& \
CST_ID.notna() & \
CST_ID!=0"
df_cols = ['CST_ID','cl_id2','cl_id4', 'rela_ofcr']
else:
filter_ = "scorecard_id not in @attrited & \
cl_id2 in @segment & \
CST_ID.notna() & \
CST_ID!=0"
df_cols = ['CST_ID','cl_id2','cl_id4','scorecard_id', 'rela_ofcr']
#set ip to True if ip_type is included
if ip == True:
filter_ = filter_+"& ip_type == '01'"
df_cols = df_cols + ['ip_type']
df = (pd.read_parquet(scorecard_file, columns=df_cols)
.query(filter_, engine="python")
[["CST_ID","cl_id2","cl_id4", "rela_ofcr"]]
.sort_values("CST_ID"))
#print(f"saving to {output_file}")
#df.to_parquet(output_file)
gc.collect()
return df
df = get_retail_clients(root+r"\REGULAR DATA\ULC\cst_scorecard\cst_scorecard_202306_b.parquet", "", True, False)
Comments