func compare(records []Record) (comparisons int, edges [][2]int) { for _, a := range records { for _, b := range records { if a == b { continue // don't compare with itself } comparisons++ if matches(a, b) { edges = append(edges, [2]int{a.ID, b.ID}) } } } return comparisons, edges } func connectedComponents(edges [][2]int) [][]int { components := map[int][]int{} nextIdx := 0 idx := map[int]int{} for _, edge := range edges { a := edge[0] b := edge[1] aIdx, aOk := idx[a] bIdx, bOk := idx[b] switch { case aOk && bOk && aIdx == bIdx: // in same component continue case aOk && bOk && aIdx != bIdx: // merge two components components[nextIdx] = append(components[aIdx], components[bIdx]...) delete(components, aIdx) delete(components, bIdx) for _, x := range components[nextIdx] { idx[x] = nextIdx } nextIdx++ case aOk && !bOk: // add b to component of a idx[b] = aIdx components[aIdx] = append(components[aIdx], b) case bOk && !aOk: // add a to component of b idx[a] = bIdx components[bIdx] = append(components[bIdx], a) default: // create new component with a and b idx[a] = nextIdx idx[b] = nextIdx components[nextIdx] = []int{a, b} nextIdx++ } } cc := make([][]int, len(components)) i := 0 for k := range components { cc[i] = components[k] i++ } return cc } func main() { records := loadRecords(100) blocks := block(records) comparisons := 0 edges := [][2]int{} for _, blockRecords := range blocks { c, e := compare(blockRecords) comparisons += c edges = append(edges, e...) } cc := connectedComponents(edges) fmt.Printf("made %d comparisons and found %d matches and %d entities\n", comparisons, len(edges), len(cc)) for _, component := range cc { names := make([]string, len(component)) for i, id := range component { names[i] = records[id].Name } fmt.Printf("found the following entity: %s from %s\n", strings.Join(names, ", "), records[component[0]].City) } }
Preview:
downloadDownload PNG
downloadDownload JPEG
downloadDownload SVG
Tip: You can change the style, width & colours of the snippet with the inspect tool before clicking Download!
Click to optimize width for Twitter