Passagem entidades: bda->cloud - gerar queries
Thu Aug 26 2021 17:13:14 GMT+0000 (Coordinated Universal Time)
Saved by @Safrugans ##pyspark
# OPBEL list (Cmd 109 has more results) opbels = ['315202770489', '598661400957', '353202709339', '541100470776', '598464587854', '346202685945', '324202802135', '598480916806', '319202885264', '312202947012', '352202678789', '562202490034', '561202413691', '560202556164', '361202640606', '598415420300', '598595733012', '397202687363', '546100419912', '349202590129', '507100471308', '598513872630', '584202511872', '359202650339', '507100461832', '378202554599', '344202620289', '308203017960', '388202532380', '359202676702', '120032627089', '598349885794', '598677760481', '368202692148', '562202522678', '344202556277', '143808896301', '531100481780', '318202931027', '537100450113', '507100461831', '599022029773', '599021918726', '321202819928', '143920059595', '572202510340', '341202699190', '387202583320', '367202788683', '145025288916', '598972753006', '398202788439', '300203182269', '549883569266', '387202708985'] #### df_bda = 'idc_ago_8_205_bda' df_cloud = 'idc_ago_8_205' # ensure same # cols on idc_ago_11_207_bda and idc_ago_11_207 cols = 'table_name, mandt, opbel, opupw, opupk, opupz, bukrs, gsber, bupla, segment, prctr, augst, gpart, vtref, vtpos, vtre2, vkont, abwbl, abwtp, abwkt, applk, hvorg, tvorg, kofiz, spart, hkont, mwskz, mwszkz, xanza, stakz, bldat, budat, optxt, waers, faedn, faeds, verkz, studt, sktpz, xmanl, kursf, betrh, betrw, betr2, betr3, skfbt, sbeth, sbetw, sbet2, sbet3, mwsko, mwvko, txrul, spzah, pymet, pybuk, pernr, grkey, persl, xaesp, augdt, augbl, augbd, augrd, augwa, augbt, augbs, augsk, augvd, augob, whang, whgrp, xeiph, mahnv, mansp, xaugp, abrzu, abrzo, fdgrp, fdlev, fdztg, fdwbt, xtaus, augrs, pygrp, pdtyp, sperz, infoz, txjcd, txdat, vbund, kontt, kontl, opsta, blart, emgpa, embvt, emadr, ikey, eurou, xragl, astkz, asblg, xblnr, inkps, rndps, qsskz, qssew, qsptp, qsshb, qbshb, qsznr, xwhex, rfupk, strkz, fitpr, xpyor, landl, intbu, emcrd, c4eye, c4eyp, sctax, sttax, stzal, orupz, negbu, subap, pswsl, pswbt, pswtx, psgrp, xcolc, aasta, xcsha, reacc, rebuk, emmnd, pnnum, pnhkf, pnexd, pnctr, finre, rdsta, rdstb, deaktiv, sgrkey, solldat, recpt, tocollect, einmalanf, vorauszahl, aperiodic, abrabs, grbbp, asmeth, int_crossrefno, ethppm, payfreqid, invoicing_party, ppmst, logno, aperiodict, add_refobj, add_refobjid, add_service, xustpd, ptitm, hora_val_comp, id_processo' for opbel in opbels: # extract required keys keys = 'opbel, opupw, opupk, opupz' ## replace "*" for desired cols - item0, item1,... df = spark.sql(f"select {keys} from( select {cols} from {df_bda} minus select {cols} from {df_cloud}) as B where opbel = {opbel}") df.show() values = df.distinct().collect() filter_cols = keys.split(",") item = [] for i in range(len(keys.split(","))): item.append('') for value in values: item[i] += "'" + value[i] + "', " item[i] = item[i][0:-2] # build bda and cloud query ## fill in bda and cloud sources bda_sources = ['src_p16.dfkkop', ] cloud_sources = ['dfkkop_consolidated_08ago', ] filters = '' for i in range (len(filter_cols)): filters += f'\n\tand {filter_cols[i]} in ({item[i]})' for bda_source in bda_sources: query = f"select * from {bda_source}\n\twhere opbel = {opbel} {filters}" print("opbel: ", opbel, "\nquery bda:\n",query, "\n") for cloud_source in cloud_sources: query = f"select * from {cloud_source}\n\twhere opbel = {opbel} {filters}" print("query cloud:\n",query, "\n") print(spark.sql(f"select count(*) from {query}").collect()[0], "\n\n")
Comments