Consolidation - List PK & OK

PHOTO EMBED

Wed Jun 09 2021 09:40:41 GMT+0000 (UTC)

Saved by @Safrugans ##pyspark

tables = ["table1", "table2", "table3"]

for table in tables: 

  df =  spark.read.format("avro").load(f"/mnt/dledpc/senv-sgcc/raw/sapsr3.{table.lower()}/")
  
  keys = df.select("primary_keys").head(1)[0][0]
  primary_keys = ''
  for key in keys:
    primary_keys += key + ";"
    
  columns = df.columns 
    
  output_columns = ''
  for column in columns:
    if column not in ['table', 'op_type', 'op_ts', 'current_ts', 'pos', 'primary_keys', 'tokens'] and column not in primary_keys.split(";"):
      output_columns += column.upper() + ';'
  print( f"Table name:    {table}", f"\n  Primary key: {primary_keys[:-1]}", f"\n  Output col:  {output_columns[:-1]}", "\n")

  #col_check = "MWSKZ"
  #if col_check in df.columns: print(f"{col_check} in {table}")
content_copyCOPY