Compare 2 dfs by Pair blocking, score matches and link according to best score

PHOTO EMBED

Wed Jun 01 2022 13:00:27 GMT+0000 (Coordinated Universal Time)

Saved by @Treenose #r

# Generate pairs with same 1 variable in common (block)
pair_blocking(df1, df2, blocking = "variable_in_common") %>%
# See how pairs match on varialbes
  compare_pairs(by = c("var1", "var2", "var3"), 
      default_comparator = jaro_winkler()/lcs()/lm()) %>%
  # Score pairs
  score_problink() %>%
  # Select pairs that match best
  select_n_to_m() %>%
  # Link data according to score
  link()
content_copyCOPY