Preview:
######### categorize the words into "keys, values, headers"

def graph_categorizer(words,bboxes,img):
  
  output_dict = {"headers":[],"keys":[],"values":[]}

  bboxes = normalize_bbox(bboxes,img)
  
  for idx in range(len(words[0])):
    x1,y1,x2,y2 = bboxes[idx]
    ###### header
    if(y2<=13):
      output_dict['headers'].append(words[0][idx])
    ###### keys
    elif(x2<43):
      output_dict['keys'].append(words[0][idx])    
    ###### values
    elif(x2>=43):
      output_dict['values'].append(words[0][idx])
  
  return output_dict

####### assign values to their respective keys with the help contours 


def nearest_assignment(updated_categorized_data,updated_words_to_bboxes,contours_right_coords):
  #### values to contour relation
  contour_to_values = {}
  for idx in range(len(contours_right_coords)):
    contours = contours_right_coords[idx]
    cont_x,_ = contours
    min_dist = float('inf')
    min_value = None
    for value in updated_categorized_data['values']:
      _,_,value_x,_ = updated_words_to_bboxes[value]
      curr_dist = abs(value_x-cont_x)
      # print(value,curr_dist,value_x,cont_x)
      if(min_dist>curr_dist):
        min_dist = curr_dist
        min_value = value
    contour_to_values[contours[1]] = min_value


  ######## 2nd method
  if(len(updated_categorized_data['values'])==len(contours_right_coords)):
    contour_to_values = {}
    for idx in range(len(contours_right_coords)):
      contours = contours_right_coords[idx]
      contour_to_values[contours[1]] = updated_categorized_data['values'][idx]
  return contour_to_values

#### assign values to their respective keys

def assign_value_to_keys(updated_categorized_data,updated_words_to_bboxes,contours_right_coords,contour_to_values):

  # fist assign the keys to the bars
  bars_to_keys = {}

  ## parsing the updated_categorized_data
  for cnt in contours_right_coords:
    _,cond_y = cnt
    min_diff = float('inf')
    right_key = None
    for key in updated_categorized_data['keys']:
      _,_,_,y = updated_words_to_bboxes[key]
      diff = float(abs(cond_y - y))
      if(min_diff > diff):
        min_diff = diff
        right_key = key
    bars_to_keys[cond_y] = right_key


  ###### simple solution for bars to keys
  if(len(contours_right_coords)==len(updated_categorized_data['keys'])):
    bars_to_keys = {}
    for idx in range(len(contours_right_coords)):
      contours = contours_right_coords[idx]
      bars_to_keys[contours[1]] = updated_categorized_data['keys'][idx]


  # now assigning the keys to the values
  keys_to_values = {}
  for k,v in bars_to_keys.items():
    keys_to_values[v] = contour_to_values[k]

  return keys_to_values
downloadDownload PNG downloadDownload JPEG downloadDownload SVG

Tip: You can change the style, width & colours of the snippet with the inspect tool before clicking Download!

Click to optimize width for Twitter