categorize
Sun Apr 16 2023 10:38:15 GMT+0000 (Coordinated Universal Time)
Saved by @mehla99_shubham #python
######### categorize the words into "keys, values, headers" def graph_categorizer(words,bboxes,img): output_dict = {"headers":[],"keys":[],"values":[]} bboxes = normalize_bbox(bboxes,img) for idx in range(len(words[0])): x1,y1,x2,y2 = bboxes[idx] ###### header if(y2<=13): output_dict['headers'].append(words[0][idx]) ###### keys elif(x2<43): output_dict['keys'].append(words[0][idx]) ###### values elif(x2>=43): output_dict['values'].append(words[0][idx]) return output_dict ####### assign values to their respective keys with the help contours def nearest_assignment(updated_categorized_data,updated_words_to_bboxes,contours_right_coords): #### values to contour relation contour_to_values = {} for idx in range(len(contours_right_coords)): contours = contours_right_coords[idx] cont_x,_ = contours min_dist = float('inf') min_value = None for value in updated_categorized_data['values']: _,_,value_x,_ = updated_words_to_bboxes[value] curr_dist = abs(value_x-cont_x) # print(value,curr_dist,value_x,cont_x) if(min_dist>curr_dist): min_dist = curr_dist min_value = value contour_to_values[contours[1]] = min_value ######## 2nd method if(len(updated_categorized_data['values'])==len(contours_right_coords)): contour_to_values = {} for idx in range(len(contours_right_coords)): contours = contours_right_coords[idx] contour_to_values[contours[1]] = updated_categorized_data['values'][idx] return contour_to_values #### assign values to their respective keys def assign_value_to_keys(updated_categorized_data,updated_words_to_bboxes,contours_right_coords,contour_to_values): # fist assign the keys to the bars bars_to_keys = {} ## parsing the updated_categorized_data for cnt in contours_right_coords: _,cond_y = cnt min_diff = float('inf') right_key = None for key in updated_categorized_data['keys']: _,_,_,y = updated_words_to_bboxes[key] diff = float(abs(cond_y - y)) if(min_diff > diff): min_diff = diff right_key = key bars_to_keys[cond_y] = right_key ###### simple solution for bars to keys if(len(contours_right_coords)==len(updated_categorized_data['keys'])): bars_to_keys = {} for idx in range(len(contours_right_coords)): contours = contours_right_coords[idx] bars_to_keys[contours[1]] = updated_categorized_data['keys'][idx] # now assigning the keys to the values keys_to_values = {} for k,v in bars_to_keys.items(): keys_to_values[v] = contour_to_values[k] return keys_to_values
Comments