Skip to content

Commit 45abc65

Browse files
committed
use small btree for per-key versions
* avoid per-key COW snapshots: keep 0/1 item inline and promote to a locked B-tree on demand * avoid per-Set allocations on the single-item path
1 parent 674e045 commit 45abc65

5 files changed

Lines changed: 156 additions & 16 deletions

File tree

internal/blockstm/keycursor.go

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -12,7 +12,7 @@ type keyCursor[V any] interface {
1212
Valid() bool
1313
Key() Key
1414
// Tree returns the per-key version tree, when available.
15-
Tree() *tree.BTree[secondaryDataItem[V]]
15+
Tree() *tree.SmallBTree[secondaryDataItem[V]]
1616
Next()
1717
// Seek positions the cursor on the given key (if present).
1818
Seek(Key) bool
@@ -23,7 +23,7 @@ type noopKeyCursor[V any] struct{}
2323

2424
func (noopKeyCursor[V]) Valid() bool { return false }
2525
func (noopKeyCursor[V]) Key() Key { return nil }
26-
func (noopKeyCursor[V]) Tree() *tree.BTree[secondaryDataItem[V]] {
26+
func (noopKeyCursor[V]) Tree() *tree.SmallBTree[secondaryDataItem[V]] {
2727
return nil
2828
}
2929
func (noopKeyCursor[V]) Next() {}
@@ -98,7 +98,7 @@ func (c *btreeKeyCursor[V]) Key() Key {
9898
return c.iter.Item().Key
9999
}
100100

101-
func (c *btreeKeyCursor[V]) Tree() *tree.BTree[secondaryDataItem[V]] {
101+
func (c *btreeKeyCursor[V]) Tree() *tree.SmallBTree[secondaryDataItem[V]] {
102102
return c.iter.Item().Tree
103103
}
104104

internal/blockstm/mvdata.go

Lines changed: 5 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -76,12 +76,12 @@ func NewGMVData[V any](isZero func(V) bool, valueLen func(V) int, eq func(V, V)
7676
}
7777

7878
// getTree returns `nil` if not found
79-
func (d *GMVData[V]) getTree(key Key) *tree2.BTree[secondaryDataItem[V]] {
79+
func (d *GMVData[V]) getTree(key Key) *tree2.SmallBTree[secondaryDataItem[V]] {
8080
return d.index.get(key)
8181
}
8282

8383
// getTreeOrDefault set a new tree atomically if not found.
84-
func (d *GMVData[V]) getTreeOrDefault(key Key) *tree2.BTree[secondaryDataItem[V]] {
84+
func (d *GMVData[V]) getTreeOrDefault(key Key) *tree2.SmallBTree[secondaryDataItem[V]] {
8585
return d.index.getOrCreate(key)
8686
}
8787

@@ -316,12 +316,12 @@ type KVPair = GKVPair[[]byte]
316316

317317
type dataItem[V any] struct {
318318
Key Key
319-
Tree *tree2.BTree[secondaryDataItem[V]]
319+
Tree *tree2.SmallBTree[secondaryDataItem[V]]
320320
}
321321

322322
func (d *dataItem[V]) Init() {
323323
if d.Tree == nil {
324-
d.Tree = tree2.NewBTree(secondaryLesser[V], InnerBTreeDegree)
324+
d.Tree = tree2.NewSmallBTree(secondaryLesser[V], InnerBTreeDegree)
325325
}
326326
}
327327

@@ -350,6 +350,6 @@ func (item secondaryDataItem[V]) Version() TxnVersion {
350350
}
351351

352352
// seekClosestTxn returns the closest txn that's less than the given txn.
353-
func seekClosestTxn[V any](tree *tree2.BTree[secondaryDataItem[V]], txn TxnIndex) (secondaryDataItem[V], bool) {
353+
func seekClosestTxn[V any](tree *tree2.SmallBTree[secondaryDataItem[V]], txn TxnIndex) (secondaryDataItem[V], bool) {
354354
return tree.ReverseSeek(secondaryDataItem[V]{Index: txn - 1})
355355
}

internal/blockstm/mvindex.go

Lines changed: 5 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -15,7 +15,7 @@ const minSnapshotKeysCap = 1024
1515

1616
type mvIndexEntry[V any] struct {
1717
key []byte
18-
data *tree2.BTree[secondaryDataItem[V]]
18+
data *tree2.SmallBTree[secondaryDataItem[V]]
1919
}
2020

2121
// mvIndexKeyEntry is stored in the ordered key-set so iterators can retrieve
@@ -25,7 +25,7 @@ type mvIndexEntry[V any] struct {
2525
// but the underlying *tree.BTree is still mutated by writers.
2626
type mvIndexKeyEntry[V any] struct {
2727
Key Key
28-
Tree *tree2.BTree[secondaryDataItem[V]]
28+
Tree *tree2.SmallBTree[secondaryDataItem[V]]
2929
}
3030

3131
type mvIndexShard[V any] struct {
@@ -62,7 +62,7 @@ func newMVIndex[V any]() *mvIndex[V] {
6262
return idx
6363
}
6464

65-
func (idx *mvIndex[V]) get(key []byte) *tree2.BTree[secondaryDataItem[V]] {
65+
func (idx *mvIndex[V]) get(key []byte) *tree2.SmallBTree[secondaryDataItem[V]] {
6666
if idx == nil {
6767
return nil
6868
}
@@ -81,7 +81,7 @@ func (idx *mvIndex[V]) get(key []byte) *tree2.BTree[secondaryDataItem[V]] {
8181
return nil
8282
}
8383

84-
func (idx *mvIndex[V]) getOrCreate(key []byte) *tree2.BTree[secondaryDataItem[V]] {
84+
func (idx *mvIndex[V]) getOrCreate(key []byte) *tree2.SmallBTree[secondaryDataItem[V]] {
8585
h := hashKey64(key)
8686
sh := &idx.shards[h&uint64(mvIndexShards-1)]
8787
sh.mu.Lock()
@@ -97,7 +97,7 @@ func (idx *mvIndex[V]) getOrCreate(key []byte) *tree2.BTree[secondaryDataItem[V]
9797
// Avoid an extra allocation by reusing the provided key slice.
9898
// Callers must treat keys as immutable once written.
9999
kCopy := key
100-
data := tree2.NewBTree(secondaryLesser[V], InnerBTreeDegree)
100+
data := tree2.NewSmallBTree(secondaryLesser[V], InnerBTreeDegree)
101101
sh.m[h] = append(entries, mvIndexEntry[V]{key: kCopy, data: data})
102102
sh.mu.Unlock()
103103
idx.keys.Set(mvIndexKeyEntry[V]{Key: kCopy, Tree: data})

internal/blockstm/mviterator.go

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -18,7 +18,7 @@ type MVIterator[V any] struct {
1818
newKeys func() keyCursor[V]
1919

2020
curKey Key
21-
curTree *tree2.BTree[secondaryDataItem[V]]
21+
curTree *tree2.SmallBTree[secondaryDataItem[V]]
2222

2323
// cache current found value and version
2424
value V
@@ -106,7 +106,7 @@ func (it *MVIterator[V]) Close() error {
106106
return nil
107107
}
108108

109-
func (it *MVIterator[V]) treeForCurrentKey() *tree2.BTree[secondaryDataItem[V]] {
109+
func (it *MVIterator[V]) treeForCurrentKey() *tree2.SmallBTree[secondaryDataItem[V]] {
110110
if it.keys == nil || !it.keys.Valid() {
111111
it.curKey = nil
112112
it.curTree = nil
@@ -174,7 +174,7 @@ func (it *MVIterator[V]) resolveValue() {
174174
// - (nil, true) if the value is not found
175175
// - (nil, false) if the value is an estimate and we should fail the validation
176176
// - (v, true) if the value is found
177-
func (it *MVIterator[V]) resolveValueInner(tree *tree2.BTree[secondaryDataItem[V]]) (*secondaryDataItem[V], bool, bool, TxnIndex) {
177+
func (it *MVIterator[V]) resolveValueInner(tree *tree2.SmallBTree[secondaryDataItem[V]]) (*secondaryDataItem[V], bool, bool, TxnIndex) {
178178
if tree == nil {
179179
return nil, true, false, 0
180180
}
Lines changed: 140 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,140 @@
1+
package tree
2+
3+
import (
4+
"sync"
5+
"sync/atomic"
6+
)
7+
8+
// SmallBTree is a small-optimized ordered set/map.
9+
//
10+
// It stores 0/1 item inline (lock-free reads) and promotes to a lock-based
11+
// *BTree when a second distinct item is inserted.
12+
type SmallBTree[T any] struct {
13+
mu sync.Mutex
14+
less func(a, b T) bool
15+
degree int
16+
17+
slotA T
18+
slotB T
19+
20+
// single points to either slotA or slotB.
21+
single atomic.Pointer[T]
22+
bt atomic.Pointer[BTree[T]]
23+
}
24+
25+
func NewSmallBTree[T any](less func(a, b T) bool, degree int) *SmallBTree[T] {
26+
return &SmallBTree[T]{
27+
less: less,
28+
degree: degree,
29+
}
30+
}
31+
32+
func (t *SmallBTree[T]) equal(a, b T) bool {
33+
return !t.less(a, b) && !t.less(b, a)
34+
}
35+
36+
func (t *SmallBTree[T]) Get(item T) (result T, ok bool) {
37+
if bt := t.bt.Load(); bt != nil {
38+
return bt.Get(item)
39+
}
40+
if p := t.single.Load(); p != nil {
41+
if t.equal(*p, item) {
42+
return *p, true
43+
}
44+
}
45+
var zero T
46+
return zero, false
47+
}
48+
49+
func (t *SmallBTree[T]) Set(item T) (prev T, ok bool) {
50+
if bt := t.bt.Load(); bt != nil {
51+
return bt.Set(item)
52+
}
53+
54+
t.mu.Lock()
55+
defer t.mu.Unlock()
56+
57+
if bt := t.bt.Load(); bt != nil {
58+
return bt.Set(item)
59+
}
60+
61+
if p := t.single.Load(); p != nil {
62+
cur := *p
63+
if t.equal(cur, item) {
64+
dst := &t.slotA
65+
if p == dst {
66+
dst = &t.slotB
67+
}
68+
*dst = item
69+
t.single.Store(dst)
70+
return cur, true
71+
}
72+
73+
// Promote to a full B-tree.
74+
btNew := NewBTree(t.less, t.degree)
75+
btNew.Set(cur)
76+
btNew.Set(item)
77+
t.bt.Store(btNew)
78+
// Keep reads correct during promotion by storing bt first, then clearing single.
79+
t.single.Store(nil)
80+
var zero T
81+
return zero, false
82+
}
83+
84+
dst := &t.slotA
85+
*dst = item
86+
t.single.Store(dst)
87+
var zero T
88+
return zero, false
89+
}
90+
91+
func (t *SmallBTree[T]) Delete(item T) (prev T, ok bool) {
92+
if bt := t.bt.Load(); bt != nil {
93+
return bt.Delete(item)
94+
}
95+
96+
t.mu.Lock()
97+
defer t.mu.Unlock()
98+
99+
if bt := t.bt.Load(); bt != nil {
100+
return bt.Delete(item)
101+
}
102+
103+
if p := t.single.Load(); p != nil {
104+
cur := *p
105+
if t.equal(cur, item) {
106+
t.single.Store(nil)
107+
return cur, true
108+
}
109+
}
110+
var zero T
111+
return zero, false
112+
}
113+
114+
// ReverseSeek returns the first item that is less than or equal to the pivot.
115+
func (t *SmallBTree[T]) ReverseSeek(pivot T) (result T, ok bool) {
116+
if bt := t.bt.Load(); bt != nil {
117+
return bt.ReverseSeek(pivot)
118+
}
119+
if p := t.single.Load(); p != nil {
120+
// If pivot < item, there is no <= match.
121+
if t.less(pivot, *p) {
122+
var zero T
123+
return zero, false
124+
}
125+
return *p, true
126+
}
127+
var zero T
128+
return zero, false
129+
}
130+
131+
func (t *SmallBTree[T]) Max() (result T, ok bool) {
132+
if bt := t.bt.Load(); bt != nil {
133+
return bt.Max()
134+
}
135+
if p := t.single.Load(); p != nil {
136+
return *p, true
137+
}
138+
var zero T
139+
return zero, false
140+
}

0 commit comments

Comments
 (0)