Snippets Collections
made 1052 comparisons and found 6 matches and 2 entities
found the following entity: Ivan Smxth, Ixan Smith, Ivax Smitx from Cairo
found the following entity: Brxan Williams, Brian Williams from Cape Town
func compare(records []Record) (comparisons int, edges [][2]int) {
	for _, a := range records {
		for _, b := range records {
			if a == b {
				continue // don't compare with itself
			}
			comparisons++
			if matches(a, b) {
				edges = append(edges, [2]int{a.ID, b.ID})
			}
		}
	}
	return comparisons, edges
}

func connectedComponents(edges [][2]int) [][]int {
	components := map[int][]int{}
	nextIdx := 0
	idx := map[int]int{}

	for _, edge := range edges {
		a := edge[0]
		b := edge[1]
		aIdx, aOk := idx[a]
		bIdx, bOk := idx[b]
		switch {
		case aOk && bOk && aIdx == bIdx: // in same component
			continue
		case aOk && bOk && aIdx != bIdx: // merge two components
			components[nextIdx] = append(components[aIdx], components[bIdx]...)
			delete(components, aIdx)
			delete(components, bIdx)
			for _, x := range components[nextIdx] {
				idx[x] = nextIdx
			}
			nextIdx++
		case aOk && !bOk: // add b to component of a
			idx[b] = aIdx
			components[aIdx] = append(components[aIdx], b)
		case bOk && !aOk: // add a to component of b
			idx[a] = bIdx
			components[bIdx] = append(components[bIdx], a)
		default: // create new component with a and b
			idx[a] = nextIdx
			idx[b] = nextIdx
			components[nextIdx] = []int{a, b}
			nextIdx++
		}
	}

	cc := make([][]int, len(components))
	i := 0
	for k := range components {
		cc[i] = components[k]
		i++
	}
	return cc
}

func main() {
	records := loadRecords(100)
	blocks := block(records)
	comparisons := 0
	edges := [][2]int{}
	for _, blockRecords := range blocks {
		c, e := compare(blockRecords)
		comparisons += c
		edges = append(edges, e...)
	}
	cc := connectedComponents(edges)

	fmt.Printf("made %d comparisons and found %d matches and %d entities\n", comparisons, len(edges), len(cc))
	for _, component := range cc {
		names := make([]string, len(component))
		for i, id := range component {
			names[i] = records[id].Name
		}
		fmt.Printf("found the following entity: %s from %s\n", strings.Join(names, ", "), records[component[0]].City)
	}
}
star

Fri Aug 11 2023 09:19:43 GMT+0000 (Coordinated Universal Time)

#entityresolution #fuzzymatching #connectedcomponents
star

Fri Aug 11 2023 09:16:23 GMT+0000 (Coordinated Universal Time)

#entityresolution #fuzzymatching #connectedcomponents

Save snippets that work with our extensions

Available in the Chrome Web Store Get Firefox Add-on Get VS Code extension