Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
28 changes: 16 additions & 12 deletions consistenthash/consistenthash.go
Original file line number Diff line number Diff line change
Expand Up @@ -38,14 +38,19 @@ func FNVHash(data []byte) uint32 {
// Hash maps the data to a uint32 hash-ring
type Hash func(data []byte) uint32

// segment represents a hash ring entry containing both the hash value and the owner.
type segment struct {
hash uint32
owner string
}

// Map tracks segments in a hash-ring, mapped to specific keys.
type Map struct {
hash Hash
segsPerKey int
// keyHashes stores the sorted upper-bounds of every segment in the hash-ring.
keyHashes []uint32 // Sorted
// hashMap maps segment upper-bounds in keyHashes to the owner-key-names
hashMap map[uint32]string
// segments stores the sorted entries in the hash-ring, each containing
// the hash upper-bound and the owner-key-name.
segments []segment
// keys tracks which owner-keys are currently present in the hash-ring
keys map[string]struct{}
}
Expand All @@ -56,7 +61,6 @@ func New(segsPerKey int, fn Hash) *Map {
m := &Map{
segsPerKey: segsPerKey,
hash: fn,
hashMap: make(map[uint32]string),
keys: make(map[string]struct{}),
}
if m.hash == nil {
Expand All @@ -67,7 +71,7 @@ func New(segsPerKey int, fn Hash) *Map {

// IsEmpty returns true if there are no items available.
func (m *Map) IsEmpty() bool {
return len(m.keyHashes) == 0
return len(m.segments) == 0
}

// Get gets the closest item in the hash to the provided key.
Expand All @@ -84,29 +88,29 @@ func (m *Map) Get(key string) string {

func (m *Map) findSegmentOwner(hash uint32) (int, uint32, string) {
// Binary search for appropriate replica.
idx := sort.Search(len(m.keyHashes), func(i int) bool { return m.keyHashes[i] >= hash })
idx := sort.Search(len(m.segments), func(i int) bool { return m.segments[i].hash >= hash })

// Means we have cycled back to the first replica.
if idx == len(m.keyHashes) {
if idx == len(m.segments) {
idx = 0
}

return idx, m.keyHashes[idx], m.hashMap[m.keyHashes[idx]]
return idx, m.segments[idx].hash, m.segments[idx].owner
}

func (m *Map) nextSegmentOwner(idx int) (int, uint32, string) {
if len(m.keys) == 1 {
panic("attempt to find alternate owner for single-key map")
}
if idx == len(m.keyHashes)-1 {
if idx == len(m.segments)-1 {
// if idx is len(m.keys)-1, then wrap around
return 0, m.keyHashes[0], m.hashMap[m.keyHashes[0]]
return 0, m.segments[0].hash, m.segments[0].owner
}

// we're moving forward within a ring; increment the index
idx++

return idx, m.keyHashes[idx], m.hashMap[m.keyHashes[idx]]
return idx, m.segments[idx].hash, m.segments[idx].owner
}

func (m *Map) idxedKeyReplica(key string, replica int) uint32 {
Expand Down
7 changes: 2 additions & 5 deletions consistenthash/consistenthash_fuzz_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -48,14 +48,11 @@ func FuzzHashCollision(f *testing.F) {
hash2 := New(int(segments), hashFunc)
hash2.Add(input[:]...)

if !reflect.DeepEqual(hash1.hashMap, hash2.hashMap) {
t.Errorf("hash maps are not identical: %+v vs %+v", hash1.hashMap, hash2.hashMap)
if !reflect.DeepEqual(hash1.segments, hash2.segments) {
t.Errorf("segments are not identical: %+v vs %+v", hash1.segments, hash2.segments)
}
if !reflect.DeepEqual(hash1.keys, hash2.keys) {
t.Errorf("hash keys are not identical: %+v vs %+v", hash1.keys, hash2.keys)
}
if !reflect.DeepEqual(hash1.keyHashes, hash2.keyHashes) {
t.Errorf("hash keys are not identical: %+v vs %+v", hash1.keys, hash2.keys)
}
})
}
22 changes: 15 additions & 7 deletions consistenthash/consistenthash_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -75,14 +75,22 @@ func TestHashCollision(t *testing.T) {
hash2 := New(1, hashFunc)
hash2.Add("Bill", "Bonny", "Bob")

t.Log(hash1.hashMap[uint32('0')], hash2.hashMap[uint32('0')])
t.Logf("%+v", hash1.hashMap)
t.Logf("%v", hash1.keyHashes)
t.Logf("%+v", hash2.hashMap)
t.Logf("%v", hash2.keyHashes)
if hash1.hashMap[uint32('0')] != hash2.hashMap[uint32('0')] {
// Helper to find owner by hash value in segments
findOwner := func(m *Map, h uint32) string {
for _, seg := range m.segments {
if seg.hash == h {
return seg.owner
}
}
return ""
}

t.Log(findOwner(hash1, uint32('0')), findOwner(hash2, uint32('0')))
t.Logf("%+v", hash1.segments)
t.Logf("%+v", hash2.segments)
if findOwner(hash1, uint32('0')) != findOwner(hash2, uint32('0')) {
t.Errorf("inconsistent owner for hash %d: %s vs %s", 'B',
hash1.hashMap[uint32('B')], hash2.hashMap[uint32('B')])
findOwner(hash1, uint32('B')), findOwner(hash2, uint32('B')))
}
}

Expand Down
20 changes: 13 additions & 7 deletions consistenthash/map_norangefunc.go
Original file line number Diff line number Diff line change
Expand Up @@ -26,6 +26,11 @@ import (
// Add adds some keys to the hashring, establishing ownership of segsPerKey
// segments.
func (m *Map) Add(keys ...string) {
hashToOwner := make(map[uint32]string, len(m.segments)+len(keys)*m.segsPerKey)
for _, seg := range m.segments {
hashToOwner[seg.hash] = seg.owner
}

for _, key := range keys {
m.keys[key] = struct{}{}
for i := 0; i < m.segsPerKey; i++ {
Expand All @@ -36,17 +41,18 @@ func (m *Map) Add(keys ...string) {
// It doesn't matter how we reconcile collisions (the smallest would work
// just as well), we just need it to be insertion-order independent so all
// instances converge on the same hashmap.
if extKey, ok := m.hashMap[hash]; !ok {
// Only add another member for this hash-value if there isn't
// one there already.
m.keyHashes = append(m.keyHashes, hash)
} else if extKey >= key {
if extKey, ok := hashToOwner[hash]; ok && extKey >= key {
continue
}
m.hashMap[hash] = key
hashToOwner[hash] = key
}
}
sort.Slice(m.keyHashes, func(i, j int) bool { return m.keyHashes[i] < m.keyHashes[j] })

m.segments = make([]segment, 0, len(hashToOwner))
for hash, owner := range hashToOwner {
m.segments = append(m.segments, segment{hash: hash, owner: owner})
}
sort.Slice(m.segments, func(i, j int) bool { return m.segments[i].hash < m.segments[j].hash })
}

// GetReplicated gets the closest item in the hash to a deterministic set of
Expand Down
26 changes: 19 additions & 7 deletions consistenthash/map_rangefunc.go
Original file line number Diff line number Diff line change
Expand Up @@ -20,6 +20,7 @@ limitations under the License.
package consistenthash

import (
"cmp"
"iter"
"slices"
"strconv"
Expand All @@ -41,6 +42,14 @@ func (m *Map) ownerKeyHashes(ownerKey string) iter.Seq[uint32] {
// Add adds some keys to the hashring, establishing ownership of segsPerKey
// segments.
func (m *Map) Add(ownerKeys ...string) {
// note: Add() is called several orders of magnitude less often than Get() and GetReplicated(),
// since hashrings should change slowly. Hence, it's beneficial to trade a smaller Map against
// doing a bit more work when calling Add (rebuilding the segments slice from scratch).
hashToOwner := make(map[uint32]string, len(m.segments)+len(ownerKeys)*m.segsPerKey)
for _, seg := range m.segments {
hashToOwner[seg.hash] = seg.owner
}
Comment on lines +48 to +51
Copy link

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Let's add a comment saying that Add() is called several orders of magnitude less often than Get and GetReplicated, since hashrings should change slowly, so it's beneficial to trade a smaller Map against doing a bit more work when calling Add.

Copy link
Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

added the comment. Kept the wording quite similar to what you mentioned. Hope that's fine.

Copy link

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Works perfectly 🙂


for _, key := range ownerKeys {
m.keys[key] = struct{}{}
for hash := range m.ownerKeyHashes(key) {
Expand All @@ -50,17 +59,20 @@ func (m *Map) Add(ownerKeys ...string) {
// It doesn't matter how we reconcile collisions (the smallest would work
// just as well), we just need it to be insertion-order independent so all
// instances converge on the same hashmap.
if extKey, ok := m.hashMap[hash]; !ok {
// Only add another member for this hash-value if there isn't
// one there already.
m.keyHashes = append(m.keyHashes, hash)
} else if extKey >= key {
if extKey, ok := hashToOwner[hash]; ok && extKey >= key {
continue
}
m.hashMap[hash] = key
hashToOwner[hash] = key
}
}
slices.Sort(m.keyHashes)

m.segments = make([]segment, 0, len(hashToOwner))
for hash, owner := range hashToOwner {
m.segments = append(m.segments, segment{hash: hash, owner: owner})
}
slices.SortFunc(m.segments, func(a, b segment) int {
return cmp.Compare(a.hash, b.hash)
})
}

// returns the owner of the hash, and the upper-bound for that owner's segment
Expand Down
17 changes: 8 additions & 9 deletions consistenthash/rangeset.go
Original file line number Diff line number Diff line change
Expand Up @@ -91,26 +91,26 @@ func (m *Map) OwnerRangeSummary(ownerKey string, skipOverrideKeys int) (RangeSum
skipOverrideKeys = min(len(m.keys)-1, skipOverrideKeys)
ranges := make([]ownerRange, 0, m.segsPerKey)
for hash := range m.ownerKeyHashes(ownerKey) {
if skipOverrideKeys == 0 && m.hashMap[hash] != ownerKey {
idx, _, segOwner := m.findSegmentOwner(hash)
if skipOverrideKeys == 0 && segOwner != ownerKey {
// this ownerKey doesn't own this hash, and we're assuming that no one else is leaving the ring
continue
}
idx, _, _ := m.findSegmentOwner(hash)
lowerBound := uint32(0)
if idx <= skipOverrideKeys {
// too close to the beginning of the ring to simply index
// we have to wrap around.
wrappedLowerIdx := len(m.keyHashes) - (skipOverrideKeys - idx) - 1
wrappedLowerIdx := len(m.segments) - (skipOverrideKeys - idx) - 1
// first hash, so we'll wrap around for the lower-bound.
// lowerBound is already set to 0, so we don't need to touch that, but we do need to push a
// range for the tail of the last range in keyHashes.
// range for the tail of the last range in segments.
// ... but, only if the last key isn't MaxUint32.
if m.keyHashes[wrappedLowerIdx] != math.MaxUint32 {
ranges = append(ranges, ownerRange{low: m.keyHashes[wrappedLowerIdx] + 1, high: math.MaxUint32})
if m.segments[wrappedLowerIdx].hash != math.MaxUint32 {
ranges = append(ranges, ownerRange{low: m.segments[wrappedLowerIdx].hash + 1, high: math.MaxUint32})
}
} else {
// we're guaranteed to be able to index into the previous entry (and add 1 to it)
lowerBound = m.keyHashes[idx-skipOverrideKeys-1] + 1
lowerBound = m.segments[idx-skipOverrideKeys-1].hash + 1
}
// The upper-bound is fixed at this key's value. We're only traversing to lower values
// now, take care of filling in the upper-bound
Expand All @@ -125,8 +125,7 @@ func (m *Map) OwnerRangeSummary(ownerKey string, skipOverrideKeys int) (RangeSum
m: Map{
hash: m.hash,
segsPerKey: m.segsPerKey,
keyHashes: slices.Clone(m.keyHashes),
hashMap: maps.Clone(m.hashMap),
segments: slices.Clone(m.segments),
keys: maps.Clone(m.keys),
},
}, true
Expand Down