Get Retail Clients

PHOTO EMBED

Wed Aug 23 2023 23:50:58 GMT+0000 (Coordinated Universal Time)

Saved by @aadpaguio

# Client base

def get_retail_clients(scorecard_file, output_file, ip, base_filter):
    #segments
    segment = ['d.Core Mass', 'c.Mid-Market', 'b.Affluent', 'a.Private']
    segment2 = []

    #scorecard attrition tags
    attrited = ['BASE_ATTRITED', 'NEW_ATTRITED']

    #initial filter
    if base_filter==True:
        filter_ = "cl_id2 in @segment& \
                   CST_ID.notna() & \
                   CST_ID!=0"
        df_cols = ['CST_ID','cl_id2','cl_id4', 'rela_ofcr']
    else:
        filter_ = "scorecard_id not in @attrited & \
                 cl_id2 in @segment & \
                 CST_ID.notna() & \
                 CST_ID!=0"
        df_cols = ['CST_ID','cl_id2','cl_id4','scorecard_id', 'rela_ofcr']

    #set ip to True if ip_type is included
    if ip == True:
        filter_ = filter_+"& ip_type == '01'"
        df_cols = df_cols + ['ip_type']

    df = (pd.read_parquet(scorecard_file, columns=df_cols)
           .query(filter_, engine="python")
           [["CST_ID","cl_id2","cl_id4", "rela_ofcr"]]
           .sort_values("CST_ID"))

    #print(f"saving to {output_file}")
    #df.to_parquet(output_file)

    gc.collect()

    return df


df = get_retail_clients(root+r"\REGULAR DATA\ULC\cst_scorecard\cst_scorecard_202306_b.parquet", "", True, False)
content_copyCOPY