######### categorize the words into "keys, values, headers"
def graph_categorizer(words,bboxes,img):
output_dict = {"headers":[],"keys":[],"values":[]}
bboxes = normalize_bbox(bboxes,img)
for idx in range(len(words[0])):
x1,y1,x2,y2 = bboxes[idx]
###### header
if(y2<=13):
output_dict['headers'].append(words[0][idx])
###### keys
elif(x2<43):
output_dict['keys'].append(words[0][idx])
###### values
elif(x2>=43):
output_dict['values'].append(words[0][idx])
return output_dict
####### assign values to their respective keys with the help contours
def nearest_assignment(updated_categorized_data,updated_words_to_bboxes,contours_right_coords):
#### values to contour relation
contour_to_values = {}
for idx in range(len(contours_right_coords)):
contours = contours_right_coords[idx]
cont_x,_ = contours
min_dist = float('inf')
min_value = None
for value in updated_categorized_data['values']:
_,_,value_x,_ = updated_words_to_bboxes[value]
curr_dist = abs(value_x-cont_x)
# print(value,curr_dist,value_x,cont_x)
if(min_dist>curr_dist):
min_dist = curr_dist
min_value = value
contour_to_values[contours[1]] = min_value
######## 2nd method
if(len(updated_categorized_data['values'])==len(contours_right_coords)):
contour_to_values = {}
for idx in range(len(contours_right_coords)):
contours = contours_right_coords[idx]
contour_to_values[contours[1]] = updated_categorized_data['values'][idx]
return contour_to_values
#### assign values to their respective keys
def assign_value_to_keys(updated_categorized_data,updated_words_to_bboxes,contours_right_coords,contour_to_values):
# fist assign the keys to the bars
bars_to_keys = {}
## parsing the updated_categorized_data
for cnt in contours_right_coords:
_,cond_y = cnt
min_diff = float('inf')
right_key = None
for key in updated_categorized_data['keys']:
_,_,_,y = updated_words_to_bboxes[key]
diff = float(abs(cond_y - y))
if(min_diff > diff):
min_diff = diff
right_key = key
bars_to_keys[cond_y] = right_key
###### simple solution for bars to keys
if(len(contours_right_coords)==len(updated_categorized_data['keys'])):
bars_to_keys = {}
for idx in range(len(contours_right_coords)):
contours = contours_right_coords[idx]
bars_to_keys[contours[1]] = updated_categorized_data['keys'][idx]
# now assigning the keys to the values
keys_to_values = {}
for k,v in bars_to_keys.items():
keys_to_values[v] = contour_to_values[k]
return keys_to_values