diff --git a/consistenthash/consistenthash.go b/consistenthash/consistenthash.go index 0f9c864c..4778a415 100644 --- a/consistenthash/consistenthash.go +++ b/consistenthash/consistenthash.go @@ -38,14 +38,19 @@ func FNVHash(data []byte) uint32 { // Hash maps the data to a uint32 hash-ring type Hash func(data []byte) uint32 +// segment represents a hash ring entry containing both the hash value and the owner. +type segment struct { + hash uint32 + owner string +} + // Map tracks segments in a hash-ring, mapped to specific keys. type Map struct { hash Hash segsPerKey int - // keyHashes stores the sorted upper-bounds of every segment in the hash-ring. - keyHashes []uint32 // Sorted - // hashMap maps segment upper-bounds in keyHashes to the owner-key-names - hashMap map[uint32]string + // segments stores the sorted entries in the hash-ring, each containing + // the hash upper-bound and the owner-key-name. + segments []segment // keys tracks which owner-keys are currently present in the hash-ring keys map[string]struct{} } @@ -56,7 +61,6 @@ func New(segsPerKey int, fn Hash) *Map { m := &Map{ segsPerKey: segsPerKey, hash: fn, - hashMap: make(map[uint32]string), keys: make(map[string]struct{}), } if m.hash == nil { @@ -67,7 +71,7 @@ func New(segsPerKey int, fn Hash) *Map { // IsEmpty returns true if there are no items available. func (m *Map) IsEmpty() bool { - return len(m.keyHashes) == 0 + return len(m.segments) == 0 } // Get gets the closest item in the hash to the provided key. @@ -84,29 +88,29 @@ func (m *Map) Get(key string) string { func (m *Map) findSegmentOwner(hash uint32) (int, uint32, string) { // Binary search for appropriate replica. - idx := sort.Search(len(m.keyHashes), func(i int) bool { return m.keyHashes[i] >= hash }) + idx := sort.Search(len(m.segments), func(i int) bool { return m.segments[i].hash >= hash }) // Means we have cycled back to the first replica. - if idx == len(m.keyHashes) { + if idx == len(m.segments) { idx = 0 } - return idx, m.keyHashes[idx], m.hashMap[m.keyHashes[idx]] + return idx, m.segments[idx].hash, m.segments[idx].owner } func (m *Map) nextSegmentOwner(idx int) (int, uint32, string) { if len(m.keys) == 1 { panic("attempt to find alternate owner for single-key map") } - if idx == len(m.keyHashes)-1 { + if idx == len(m.segments)-1 { // if idx is len(m.keys)-1, then wrap around - return 0, m.keyHashes[0], m.hashMap[m.keyHashes[0]] + return 0, m.segments[0].hash, m.segments[0].owner } // we're moving forward within a ring; increment the index idx++ - return idx, m.keyHashes[idx], m.hashMap[m.keyHashes[idx]] + return idx, m.segments[idx].hash, m.segments[idx].owner } func (m *Map) idxedKeyReplica(key string, replica int) uint32 { diff --git a/consistenthash/consistenthash_fuzz_test.go b/consistenthash/consistenthash_fuzz_test.go index 9bd1bc25..33fa870e 100644 --- a/consistenthash/consistenthash_fuzz_test.go +++ b/consistenthash/consistenthash_fuzz_test.go @@ -48,14 +48,11 @@ func FuzzHashCollision(f *testing.F) { hash2 := New(int(segments), hashFunc) hash2.Add(input[:]...) - if !reflect.DeepEqual(hash1.hashMap, hash2.hashMap) { - t.Errorf("hash maps are not identical: %+v vs %+v", hash1.hashMap, hash2.hashMap) + if !reflect.DeepEqual(hash1.segments, hash2.segments) { + t.Errorf("segments are not identical: %+v vs %+v", hash1.segments, hash2.segments) } if !reflect.DeepEqual(hash1.keys, hash2.keys) { t.Errorf("hash keys are not identical: %+v vs %+v", hash1.keys, hash2.keys) } - if !reflect.DeepEqual(hash1.keyHashes, hash2.keyHashes) { - t.Errorf("hash keys are not identical: %+v vs %+v", hash1.keys, hash2.keys) - } }) } diff --git a/consistenthash/consistenthash_test.go b/consistenthash/consistenthash_test.go index 851dc152..e42121a3 100644 --- a/consistenthash/consistenthash_test.go +++ b/consistenthash/consistenthash_test.go @@ -75,14 +75,22 @@ func TestHashCollision(t *testing.T) { hash2 := New(1, hashFunc) hash2.Add("Bill", "Bonny", "Bob") - t.Log(hash1.hashMap[uint32('0')], hash2.hashMap[uint32('0')]) - t.Logf("%+v", hash1.hashMap) - t.Logf("%v", hash1.keyHashes) - t.Logf("%+v", hash2.hashMap) - t.Logf("%v", hash2.keyHashes) - if hash1.hashMap[uint32('0')] != hash2.hashMap[uint32('0')] { + // Helper to find owner by hash value in segments + findOwner := func(m *Map, h uint32) string { + for _, seg := range m.segments { + if seg.hash == h { + return seg.owner + } + } + return "" + } + + t.Log(findOwner(hash1, uint32('0')), findOwner(hash2, uint32('0'))) + t.Logf("%+v", hash1.segments) + t.Logf("%+v", hash2.segments) + if findOwner(hash1, uint32('0')) != findOwner(hash2, uint32('0')) { t.Errorf("inconsistent owner for hash %d: %s vs %s", 'B', - hash1.hashMap[uint32('B')], hash2.hashMap[uint32('B')]) + findOwner(hash1, uint32('B')), findOwner(hash2, uint32('B'))) } } diff --git a/consistenthash/map_norangefunc.go b/consistenthash/map_norangefunc.go index 63e70540..8d001c3f 100644 --- a/consistenthash/map_norangefunc.go +++ b/consistenthash/map_norangefunc.go @@ -26,6 +26,11 @@ import ( // Add adds some keys to the hashring, establishing ownership of segsPerKey // segments. func (m *Map) Add(keys ...string) { + hashToOwner := make(map[uint32]string, len(m.segments)+len(keys)*m.segsPerKey) + for _, seg := range m.segments { + hashToOwner[seg.hash] = seg.owner + } + for _, key := range keys { m.keys[key] = struct{}{} for i := 0; i < m.segsPerKey; i++ { @@ -36,17 +41,18 @@ func (m *Map) Add(keys ...string) { // It doesn't matter how we reconcile collisions (the smallest would work // just as well), we just need it to be insertion-order independent so all // instances converge on the same hashmap. - if extKey, ok := m.hashMap[hash]; !ok { - // Only add another member for this hash-value if there isn't - // one there already. - m.keyHashes = append(m.keyHashes, hash) - } else if extKey >= key { + if extKey, ok := hashToOwner[hash]; ok && extKey >= key { continue } - m.hashMap[hash] = key + hashToOwner[hash] = key } } - sort.Slice(m.keyHashes, func(i, j int) bool { return m.keyHashes[i] < m.keyHashes[j] }) + + m.segments = make([]segment, 0, len(hashToOwner)) + for hash, owner := range hashToOwner { + m.segments = append(m.segments, segment{hash: hash, owner: owner}) + } + sort.Slice(m.segments, func(i, j int) bool { return m.segments[i].hash < m.segments[j].hash }) } // GetReplicated gets the closest item in the hash to a deterministic set of diff --git a/consistenthash/map_rangefunc.go b/consistenthash/map_rangefunc.go index d73653ad..d4b0d90d 100644 --- a/consistenthash/map_rangefunc.go +++ b/consistenthash/map_rangefunc.go @@ -20,6 +20,7 @@ limitations under the License. package consistenthash import ( + "cmp" "iter" "slices" "strconv" @@ -41,6 +42,14 @@ func (m *Map) ownerKeyHashes(ownerKey string) iter.Seq[uint32] { // Add adds some keys to the hashring, establishing ownership of segsPerKey // segments. func (m *Map) Add(ownerKeys ...string) { + // note: Add() is called several orders of magnitude less often than Get() and GetReplicated(), + // since hashrings should change slowly. Hence, it's beneficial to trade a smaller Map against + // doing a bit more work when calling Add (rebuilding the segments slice from scratch). + hashToOwner := make(map[uint32]string, len(m.segments)+len(ownerKeys)*m.segsPerKey) + for _, seg := range m.segments { + hashToOwner[seg.hash] = seg.owner + } + for _, key := range ownerKeys { m.keys[key] = struct{}{} for hash := range m.ownerKeyHashes(key) { @@ -50,17 +59,20 @@ func (m *Map) Add(ownerKeys ...string) { // It doesn't matter how we reconcile collisions (the smallest would work // just as well), we just need it to be insertion-order independent so all // instances converge on the same hashmap. - if extKey, ok := m.hashMap[hash]; !ok { - // Only add another member for this hash-value if there isn't - // one there already. - m.keyHashes = append(m.keyHashes, hash) - } else if extKey >= key { + if extKey, ok := hashToOwner[hash]; ok && extKey >= key { continue } - m.hashMap[hash] = key + hashToOwner[hash] = key } } - slices.Sort(m.keyHashes) + + m.segments = make([]segment, 0, len(hashToOwner)) + for hash, owner := range hashToOwner { + m.segments = append(m.segments, segment{hash: hash, owner: owner}) + } + slices.SortFunc(m.segments, func(a, b segment) int { + return cmp.Compare(a.hash, b.hash) + }) } // returns the owner of the hash, and the upper-bound for that owner's segment diff --git a/consistenthash/rangeset.go b/consistenthash/rangeset.go index 884a674e..42ffd1ed 100644 --- a/consistenthash/rangeset.go +++ b/consistenthash/rangeset.go @@ -91,26 +91,26 @@ func (m *Map) OwnerRangeSummary(ownerKey string, skipOverrideKeys int) (RangeSum skipOverrideKeys = min(len(m.keys)-1, skipOverrideKeys) ranges := make([]ownerRange, 0, m.segsPerKey) for hash := range m.ownerKeyHashes(ownerKey) { - if skipOverrideKeys == 0 && m.hashMap[hash] != ownerKey { + idx, _, segOwner := m.findSegmentOwner(hash) + if skipOverrideKeys == 0 && segOwner != ownerKey { // this ownerKey doesn't own this hash, and we're assuming that no one else is leaving the ring continue } - idx, _, _ := m.findSegmentOwner(hash) lowerBound := uint32(0) if idx <= skipOverrideKeys { // too close to the beginning of the ring to simply index // we have to wrap around. - wrappedLowerIdx := len(m.keyHashes) - (skipOverrideKeys - idx) - 1 + wrappedLowerIdx := len(m.segments) - (skipOverrideKeys - idx) - 1 // first hash, so we'll wrap around for the lower-bound. // lowerBound is already set to 0, so we don't need to touch that, but we do need to push a - // range for the tail of the last range in keyHashes. + // range for the tail of the last range in segments. // ... but, only if the last key isn't MaxUint32. - if m.keyHashes[wrappedLowerIdx] != math.MaxUint32 { - ranges = append(ranges, ownerRange{low: m.keyHashes[wrappedLowerIdx] + 1, high: math.MaxUint32}) + if m.segments[wrappedLowerIdx].hash != math.MaxUint32 { + ranges = append(ranges, ownerRange{low: m.segments[wrappedLowerIdx].hash + 1, high: math.MaxUint32}) } } else { // we're guaranteed to be able to index into the previous entry (and add 1 to it) - lowerBound = m.keyHashes[idx-skipOverrideKeys-1] + 1 + lowerBound = m.segments[idx-skipOverrideKeys-1].hash + 1 } // The upper-bound is fixed at this key's value. We're only traversing to lower values // now, take care of filling in the upper-bound @@ -125,8 +125,7 @@ func (m *Map) OwnerRangeSummary(ownerKey string, skipOverrideKeys int) (RangeSum m: Map{ hash: m.hash, segsPerKey: m.segsPerKey, - keyHashes: slices.Clone(m.keyHashes), - hashMap: maps.Clone(m.hashMap), + segments: slices.Clone(m.segments), keys: maps.Clone(m.keys), }, }, true