masstree/leaf_trait.rs
1//! Traits for abstracting over leaf node WIDTH variants.
2//!
3//! This module defines [`TreePermutation`] and [`TreeLeafNode`] traits that
4//! enable generic tree operations over both WIDTH=15 and WIDTH=24 leaf nodes.
5//!
6//! # Design
7//!
8//! The traits use static dispatch (generics) for zero-cost abstraction:
9//! - No vtable overhead
10//! - Full monomorphization
11//! - Compiler can inline all trait methods
12//!
13//! # Implementors
14//!
15//! - [`TreePermutation`]: `Permuter<WIDTH>`, `Permuter24`
16//! - [`TreeLeafNode`]: `LeafNode<S, WIDTH>`, `LeafNode24<S>`
17
18use std::cmp::Ordering;
19use std::fmt::Debug;
20
21use crate::key::Key;
22use crate::nodeversion::NodeVersion;
23use crate::slot::ValueSlot;
24use seize::LocalGuard;
25
26// ============================================================================
27// Re-exports from value.rs for use in generic code
28// ============================================================================
29
30pub use crate::value::InsertTarget;
31pub use crate::value::SplitPoint;
32
33// ============================================================================
34// TreePermutation Trait
35// ============================================================================
36
37/// Trait for permutation types used in leaf nodes.
38///
39/// Abstracts over `Permuter<WIDTH>` (u64) and `Permuter24` (u128), enabling
40/// generic tree operations that work with both WIDTH=15 and WIDTH=24 nodes.
41///
42/// # Associated Types
43///
44/// - `Raw`: The underlying storage type (`u64` or `u128`)
45///
46/// # Implementors
47///
48/// - `Permuter<WIDTH>` for WIDTH in 1..=15
49/// - `Permuter24` for WIDTH=24
50pub trait TreePermutation: Copy + Clone + Eq + Debug + Send + Sync + Sized + 'static {
51 /// Raw storage type for atomic operations.
52 ///
53 /// - `Permuter<WIDTH>`: `u64`
54 /// - `Permuter24`: `u128`
55 type Raw: Copy + Clone + Eq + Debug + Send + Sync + 'static;
56
57 /// Number of slots this permutation supports.
58 const WIDTH: usize;
59
60 // ========================================================================
61 // Construction
62 // ========================================================================
63
64 /// Create an empty permutation with size = 0.
65 ///
66 /// Slots are arranged so `back()` returns slot 0 initially.
67 fn empty() -> Self;
68
69 /// Create a sorted permutation with `n` elements in slots `0..n`.
70 ///
71 /// The permutation will have size `n` with logical positions `0..n`
72 /// mapping to physical slots 0..n in order.
73 ///
74 /// This is used when creating layer nodes during suffix conflict resolution,
75 /// where we need a small number of pre-positioned entries.
76 ///
77 /// # Arguments
78 ///
79 /// * `n` - Number of elements (`0 <= n <= WIDTH`)
80 ///
81 /// # Panics
82 ///
83 /// Panics in debug mode if `n > WIDTH`.
84 ///
85 /// # Example
86 ///
87 /// ```ignore
88 /// // Create a permutation with 2 sorted entries
89 /// let perm = Permuter::<15>::make_sorted(2);
90 /// assert_eq!(perm.size(), 2);
91 ///
92 /// // Position 0 -> Slot 0
93 /// assert_eq!(perm.get(0), 0);
94 ///
95 /// // Position 1 -> Slot 1
96 /// assert_eq!(perm.get(1), 1);
97 /// ```
98 fn make_sorted(n: usize) -> Self;
99
100 /// Create a permutation from a raw storage value.
101 ///
102 /// Used when loading from atomic storage.
103 ///
104 /// # Safety Note
105 ///
106 /// The raw value should be a valid permutation encoding. Invalid values
107 /// may cause debug assertions to fail but won't cause undefined behavior.
108 fn from_value(raw: Self::Raw) -> Self;
109
110 // ========================================================================
111 // Accessors
112 // ========================================================================
113
114 /// Get the raw storage value.
115 ///
116 /// Used for atomic store/CAS operations.
117 fn value(&self) -> Self::Raw;
118
119 /// Get the number of slots in use.
120 fn size(&self) -> usize;
121
122 /// Get the physical slot at logical position `i`.
123 ///
124 /// # Panics
125 ///
126 /// Debug-panics if `i >= WIDTH`.
127 fn get(&self, i: usize) -> usize;
128
129 /// Get the slot at the back (next free slot to allocate).
130 ///
131 /// Equivalent to `get(WIDTH - 1)`.
132 fn back(&self) -> usize;
133
134 /// Get the slot at `back()` with an offset into the free region.
135 ///
136 /// `back_at_offset(0)` == `back()`.
137 ///
138 /// # Panics
139 ///
140 /// Debug-panics if `size() + offset >= WIDTH`.
141 fn back_at_offset(&self, offset: usize) -> usize;
142
143 // ========================================================================
144 // Mutation
145 // ========================================================================
146
147 /// Allocate a slot from back and insert at position `i`.
148 ///
149 /// Returns the allocated physical slot index.
150 ///
151 /// # Panics
152 ///
153 /// Debug-panics if `i > size()` or `size() >= WIDTH`.
154 fn insert_from_back(&mut self, i: usize) -> usize;
155
156 /// Compute insert result without mutation (for CAS operations).
157 ///
158 /// Returns `(new_permutation, allocated_slot)`.
159 ///
160 /// This is used in lock-free CAS insert paths where we need to compute
161 /// the new permutation value before attempting an atomic CAS.
162 fn insert_from_back_immutable(&self, i: usize) -> (Self, usize);
163
164 /// Swap two slots in the free region (positions >= size).
165 ///
166 /// Used to skip slot 0 when it can't be reused due to `ikey_bound` constraints.
167 fn swap_free_slots(&mut self, pos_i: usize, pos_j: usize);
168
169 /// Set the size without changing slot positions.
170 fn set_size(&mut self, n: usize);
171
172 /// Remove the slot at logical position `i`.
173 ///
174 /// The slot is moved to the free region (back) and size is decremented.
175 ///
176 /// # Panics
177 ///
178 /// Debug-panics if `i >= size()`.
179 fn remove(&mut self, i: usize);
180}
181
182// ============================================================================
183// TreeInternode Trait
184// ============================================================================
185
186/// Trait for internode types used in a `MassTree`.
187///
188/// Abstracts over `InternodeNode` for generic tree operations.
189/// Internode WIDTH is fixed at 15 (matching leaf WIDTH for optimal B+tree fanout).
190///
191/// # Note on Generic Parameter
192///
193/// Unlike leaf nodes, internodes don't store values - only `u64` keys and `*mut u8`
194/// child pointers. The internode type is therefore non-generic, reducing code
195/// monomorphization (one `InternodeNode` implementation regardless of slot type).
196///
197/// # Implementors
198///
199/// - `InternodeNode` (fixed WIDTH=15)
200pub trait TreeInternode: Sized + Send + Sync + 'static {
201 /// Node width (max number of children).
202 const WIDTH: usize;
203
204 // ========================================================================
205 // Construction
206 // ========================================================================
207
208 /// Create a new internode with specified height.
209 fn new_boxed(height: u32) -> Box<Self>;
210
211 /// Create a new root internode with specified height.
212 fn new_root_boxed(height: u32) -> Box<Self>;
213
214 /// Create a new internode sibling for a split operation.
215 ///
216 /// The new internode is created with a **split-locked** version copied from the
217 /// locked parent. This prevents other threads from locking the sibling until
218 /// it is installed into the tree and its parent pointer is set.
219 ///
220 /// # Help-Along Protocol
221 ///
222 /// This is the internode equivalent of leaf `NodeVersion::new_for_split()`.
223 /// The caller MUST call `version().unlock_for_split()` exactly once after:
224 /// 1. The sibling is inserted into its parent (grandparent or new root)
225 /// 2. The sibling's parent pointer is set
226 ///
227 /// # C++ Reference
228 ///
229 /// Matches `next_child->assign_version(*p)` in `masstree_split.hh:234`.
230 ///
231 /// # Safety
232 ///
233 /// The `parent_version` must be from a locked node (the parent being split).
234 fn new_boxed_for_split(parent_version: &NodeVersion, height: u32) -> Box<Self>;
235
236 // ========================================================================
237 // Version / Locking
238 // ========================================================================
239
240 /// Get reference to node version.
241 fn version(&self) -> &NodeVersion;
242
243 // ========================================================================
244 // Structure
245 // ========================================================================
246
247 /// Get the height of this internode.
248 fn height(&self) -> u32;
249
250 /// Check if children are leaves (height == 0).
251 fn children_are_leaves(&self) -> bool;
252
253 /// Get number of keys.
254 fn nkeys(&self) -> usize;
255
256 /// Set number of keys.
257 fn set_nkeys(&self, n: u8);
258
259 /// Increment nkeys by 1.
260 fn inc_nkeys(&self);
261
262 /// Check if this internode is full.
263 fn is_full(&self) -> bool;
264
265 // ========================================================================
266 // Keys
267 // ========================================================================
268
269 /// Get key at index (Acquire ordering).
270 fn ikey(&self, idx: usize) -> u64;
271
272 /// Get key at index using Relaxed ordering.
273 ///
274 /// # Safety Justification
275 ///
276 /// Callers must have already established ordering via `version.stable()`
277 /// or equivalent fence. The Acquire fence from `stable()` synchronizes
278 /// with writer's Release stores, making Relaxed loads safe.
279 ///
280 /// This is safe because:
281 /// 1. Call sites do `stable()` which spins until `DIRTY_MASK == 0`
282 /// 2. `stable()` issues `fence(Acquire)` before returning
283 /// 3. The fence synchronizes with writer's `Release` stores
284 /// 4. Therefore, Relaxed loads see fully-published values
285 fn ikey_relaxed(&self, idx: usize) -> u64;
286
287 /// Set key at index.
288 fn set_ikey(&self, idx: usize, key: u64);
289
290 /// Compare key at position with search key.
291 fn compare_key(&self, search_ikey: u64, p: usize) -> Ordering;
292
293 /// Find insert position for a key.
294 fn find_insert_position(&self, insert_ikey: u64) -> usize;
295
296 // ========================================================================
297 // Children
298 // ========================================================================
299
300 /// Get child pointer at index.
301 fn child(&self, idx: usize) -> *mut u8;
302
303 /// Get child pointer with prefetch hint for the next likely child.
304 ///
305 /// Default implementation just calls `child()` without prefetching.
306 /// Optimized implementations can prefetch the next child to hide latency.
307 #[inline(always)]
308 fn child_with_prefetch(&self, idx: usize, _nkeys: usize) -> *mut u8 {
309 self.child(idx)
310 }
311
312 /// Get child pointer with depth-first prefetch for descent.
313 ///
314 /// Prefetches the target child's header and keys, optimizing
315 /// for tree descent (going down) rather than sibling traversal.
316 ///
317 /// # Default Implementation
318 ///
319 /// Falls back to [`Self::child`] without prefetch.
320 #[inline(always)]
321 fn child_with_depth_prefetch(&self, idx: usize) -> *mut u8 {
322 self.child(idx)
323 }
324
325 /// Get child pointer with full prefetch (all key cache lines).
326 ///
327 /// Prefetches all cache lines containing keys. Use when nodes
328 /// are expected to be full.
329 ///
330 /// # Default Implementation
331 ///
332 /// Falls back to [`Self::child`] without prefetch.
333 #[inline(always)]
334 fn child_with_full_prefetch(&self, idx: usize) -> *mut u8 {
335 self.child(idx)
336 }
337
338 /// Set child pointer at index.
339 fn set_child(&self, idx: usize, child: *mut u8);
340
341 /// Assign key and right child at position.
342 fn assign(&self, p: usize, ikey: u64, right_child: *mut u8);
343
344 /// Insert key and child at position, shifting existing entries.
345 fn insert_key_and_child(&self, p: usize, new_ikey: u64, new_child: *mut u8);
346
347 // ========================================================================
348 // Navigation
349 // ========================================================================
350
351 /// Get parent pointer.
352 fn parent(&self) -> *mut u8;
353
354 /// Set parent pointer.
355 fn set_parent(&self, parent: *mut u8);
356
357 /// Check if this is a root node.
358 fn is_root(&self) -> bool;
359
360 // ========================================================================
361 // Split Support
362 // ========================================================================
363
364 /// Shift entries from another internode.
365 fn shift_from(&self, dst_pos: usize, src: &Self, src_pos: usize, count: usize);
366
367 /// Split this internode into a new sibling while inserting a key/child.
368 ///
369 /// This method performs the split AND updates all children's parent pointers
370 /// in `new_right` to point to `new_right_ptr`. This is critical for correctness:
371 /// parent updates must happen inside `split_into` (before returning) to prevent
372 /// races where a thread sees a child with a stale parent pointer.
373 ///
374 /// # Arguments
375 ///
376 /// * `new_right` - The new right sibling (pre-allocated, mutable reference)
377 /// * `new_right_ptr` - Raw pointer to `new_right` for setting parent pointers
378 /// * `insert_pos` - Position where the new key/child should be inserted
379 /// * `insert_ikey` - The key to insert
380 /// * `insert_child` - The child pointer to insert
381 ///
382 /// # Returns
383 ///
384 /// `(popup_key, insert_went_left)` where:
385 /// - `popup_key` is the key that goes to the parent
386 /// - `insert_went_left` is true if the insert went to the left sibling
387 ///
388 /// # Safety
389 ///
390 /// * `new_right_ptr` must point to `new_right`
391 /// * The caller must hold the lock on `self`
392 #[must_use = "popup_key must be inserted into parent node to complete the split"]
393 fn split_into(
394 &self,
395 new_right: &mut Self,
396 new_right_ptr: *mut Self,
397 insert_pos: usize,
398 insert_ikey: u64,
399 insert_child: *mut u8,
400 ) -> (u64, bool);
401}
402
403// ============================================================================
404// TreeLeafNode Trait
405// ============================================================================
406
407/// Trait for leaf node types that can be used in a [`crate::MassTree`].
408///
409/// Abstracts over `LeafNode<S, WIDTH>` and `LeafNode24<S>`, enabling generic
410/// tree operations that work with both WIDTH=15 and WIDTH=24 nodes.
411///
412/// # Type Parameters
413///
414/// - `S`: The slot type (e.g., `LeafValue<V>` or `LeafValueIndex<V>`)
415///
416/// # Associated Types
417///
418/// - `Perm`: The permutation type for this leaf
419/// - `Internode`: The internode type for this tree variant
420///
421/// # Implementors
422///
423/// - `LeafNode<S, WIDTH>` for WIDTH in 1..=15
424/// - `LeafNode24<S>` for WIDTH=24
425pub trait TreeLeafNode<S: ValueSlot>: Sized + Send + Sync + 'static {
426 /// The permutation type for this leaf.
427 type Perm: TreePermutation;
428
429 /// The internode type for this tree variant.
430 type Internode: TreeInternode;
431
432 /// Node width (number of slots).
433 const WIDTH: usize;
434
435 /// Split threshold (trigger split when size >= this).
436 ///
437 /// Default is 80% of WIDTH to reduce split frequency on hot keys.
438 /// This leaves headroom for inserts before triggering a split.
439 ///
440 /// - WIDTH=15: threshold=12 (80%)
441 /// - WIDTH=24: threshold=19 (79%)
442 const SPLIT_THRESHOLD: usize;
443
444 // ========================================================================
445 // Construction
446 // ========================================================================
447
448 /// Create a new leaf node (heap-allocated).
449 fn new_boxed() -> Box<Self>;
450
451 /// Create a new root leaf node (heap-allocated).
452 fn new_root_boxed() -> Box<Self>;
453
454 /// Create a new leaf node configured as a layer root.
455 ///
456 /// The returned node has:
457 /// - `is_root` flag set via `version.mark_root()`
458 /// - `parent` pointer set to null
459 ///
460 /// Layer roots are used when creating sublayers for keys longer than 8 bytes.
461 /// When two keys share the same 8-byte ikey but have different suffixes,
462 /// a new layer is created to distinguish them by their next 8-byte chunk.
463 fn new_layer_root_boxed() -> Box<Self>;
464
465 // ========================================================================
466 // NodeVersion Operations
467 // ========================================================================
468
469 /// Get a reference to the node's version.
470 ///
471 /// Used for optimistic concurrency control (OCC) and locking.
472 fn version(&self) -> &NodeVersion;
473
474 // ========================================================================
475 // Permutation Operations
476 // ========================================================================
477
478 /// Load the current permutation with Acquire ordering.
479 fn permutation(&self) -> Self::Perm;
480
481 /// Store a new permutation with Release ordering.
482 fn set_permutation(&self, perm: Self::Perm);
483
484 /// Load raw permutation value with Acquire ordering.
485 ///
486 /// Used for freeze detection without constructing a Permuter.
487 fn permutation_raw(&self) -> <Self::Perm as TreePermutation>::Raw;
488
489 // ========================================================================
490 // Key Operations
491 // ========================================================================
492
493 /// Get ikey at physical slot using Acquire ordering.
494 ///
495 /// # Panics
496 ///
497 /// Debug-panics if `slot >= WIDTH`.
498 fn ikey(&self, slot: usize) -> u64;
499
500 /// Get ikey at physical slot using Relaxed ordering.
501 ///
502 /// # Safety Justification
503 ///
504 /// Safe to use when caller has already loaded permutation with Acquire
505 /// ordering and will validate with OCC at the end of the read operation.
506 /// Avoids redundant Acquire fences on each ikey load.
507 ///
508 /// # Panics
509 ///
510 /// Debug-panics if `slot >= WIDTH`.
511 fn ikey_relaxed(&self, slot: usize) -> u64;
512
513 /// Set ikey at physical slot.
514 ///
515 /// # Panics
516 ///
517 /// Debug-panics if `slot >= WIDTH`.
518 fn set_ikey(&self, slot: usize, ikey: u64);
519
520 /// Get ikey bound (slot 0's ikey for B-link navigation).
521 ///
522 /// The `ikey_bound` is the smallest ikey in this leaf and is used
523 /// for navigating to the correct sibling during splits.
524 fn ikey_bound(&self) -> u64;
525
526 /// Find all physical slots with matching ikey, returning a bitmask.
527 ///
528 /// Returns a `u32` where bit `i` is set if `self.ikey(i) == target_ikey`.
529 /// Used for SIMD-accelerated key search.
530 ///
531 /// The default implementation uses a scalar loop. Implementations may
532 /// override with SIMD for better performance.
533 #[inline]
534 fn find_ikey_matches(&self, target_ikey: u64) -> u32 {
535 let mut mask: u32 = 0;
536 for slot in 0..Self::WIDTH {
537 if self.ikey(slot) == target_ikey {
538 mask |= 1 << slot;
539 }
540 }
541 mask
542 }
543
544 /// Get keylenx at physical slot.
545 ///
546 /// Values:
547 /// - 0-8: inline key length
548 /// - 64 (`KSUF_KEYLENX)`: has suffix
549 /// - >=128 (LAYER_KEYLENX): is layer pointer
550 fn keylenx(&self, slot: usize) -> u8;
551
552 /// Set keylenx at physical slot.
553 fn set_keylenx(&self, slot: usize, keylenx: u8);
554
555 /// Check if slot contains a layer pointer.
556 ///
557 /// A layer pointer indicates this slot descends into a sublayer
558 /// for keys longer than 8 bytes at this level.
559 fn is_layer(&self, slot: usize) -> bool;
560
561 /// Check if slot has a suffix.
562 fn has_ksuf(&self, slot: usize) -> bool;
563
564 // ========================================================================
565 // Value Operations
566 // ========================================================================
567
568 /// Load value pointer at slot.
569 ///
570 /// Returns raw pointer to either an `Arc<V>` (value mode) or
571 /// a sublayer root node (layer mode).
572 fn leaf_value_ptr(&self, slot: usize) -> *mut u8;
573
574 /// Store value pointer at slot.
575 fn set_leaf_value_ptr(&self, slot: usize, ptr: *mut u8);
576
577 /// Update an existing value in place.
578 ///
579 /// This is an optimization for updates where the slot already contains
580 /// a value. For inline storage, this skips the sentinel store since
581 /// the sentinel is already in place.
582 ///
583 /// Default implementation calls `set_leaf_value_ptr`.
584 #[inline(always)]
585 fn update_leaf_value_in_place(&self, slot: usize, ptr: *mut u8) {
586 self.set_leaf_value_ptr(slot, ptr);
587 }
588
589 /// CAS value pointer at slot.
590 ///
591 /// Used in CAS insert path to atomically claim a slot.
592 ///
593 /// # Errors
594 ///
595 /// Returns `Err(actual)` containing the actual pointer value if the CAS
596 /// failed due to a concurrent modification (the slot's current value
597 /// did not match `expected`).
598 fn cas_slot_value(
599 &self,
600 slot: usize,
601 expected: *mut u8,
602 new_value: *mut u8,
603 ) -> Result<(), *mut u8>;
604
605 // ========================================================================
606 // Slot Clearing (for gc_layer)
607 // ========================================================================
608
609 /// Clear a slot completely, removing any value or layer pointer.
610 ///
611 /// Used by `gc_layer` when cleaning up an empty sublayer.
612 /// The parent leaf's slot that pointed to the sublayer is cleared.
613 ///
614 /// # Safety
615 ///
616 /// The caller must ensure:
617 /// - The leaf is locked
618 /// - The slot is valid (0..WIDTH)
619 /// - Any value/layer at this slot has been or will be properly retired
620 fn clear_slot(&self, slot: usize);
621
622 /// Clear a slot and update permutation.
623 ///
624 /// This is a convenience method that:
625 /// 1. Clears the slot contents
626 /// 2. Removes the slot from the permutation
627 ///
628 /// # Safety
629 ///
630 /// The caller must ensure the leaf is locked.
631 fn clear_slot_and_permutation(&self, slot: usize);
632
633 // ========================================================================
634 // Size Operations
635 // ========================================================================
636
637 /// Get number of keys in this leaf.
638 #[inline(always)]
639 fn size(&self) -> usize {
640 self.permutation().size()
641 }
642
643 /// Check if leaf is empty.
644 #[inline(always)]
645 fn is_empty(&self) -> bool {
646 self.size() == 0
647 }
648
649 /// Check if leaf is full.
650 #[inline(always)]
651 fn is_full(&self) -> bool {
652 self.size() >= Self::WIDTH
653 }
654
655 // ========================================================================
656 // Navigation (B-link tree pointers)
657 // ========================================================================
658
659 /// Get next leaf pointer (with mark bit cleared).
660 ///
661 /// The next pointer may be marked during splits. This method
662 /// returns the clean pointer for following the linked list.
663 fn safe_next(&self) -> *mut Self;
664
665 /// Check if next pointer is marked.
666 ///
667 /// A marked next pointer indicates a split is in progress.
668 fn next_is_marked(&self) -> bool;
669
670 /// Set next leaf pointer.
671 fn set_next(&self, next: *mut Self);
672
673 /// Mark the next pointer (during split).
674 fn mark_next(&self);
675
676 /// Unmark the next pointer.
677 fn unmark_next(&self);
678
679 /// Get previous leaf pointer.
680 fn prev(&self) -> *mut Self;
681
682 /// Set previous leaf pointer.
683 fn set_prev(&self, prev: *mut Self);
684
685 /// Unlink this leaf from the B-link doubly-linked chain.
686 ///
687 /// Used when removing an empty leaf from the tree.
688 ///
689 /// # Safety
690 ///
691 /// - Caller must hold the version lock on this leaf
692 /// - `self.prev()` must be non-null (not the leftmost leaf)
693 /// - The prev and next pointers must be valid leaves
694 unsafe fn unlink_from_chain(&self);
695
696 /// Get parent internode pointer.
697 fn parent(&self) -> *mut u8;
698
699 /// Set parent internode pointer.
700 fn set_parent(&self, parent: *mut u8);
701
702 // ========================================================================
703 // Slot Assignment Helpers
704 // ========================================================================
705
706 /// Check if slot 0 can be reused for a new key.
707 ///
708 /// Slot 0 stores `ikey_bound()` which must be preserved if this
709 /// leaf has a predecessor (prev != null). Slot 0 can only be
710 /// reused if the new key has the same ikey as the current bound.
711 fn can_reuse_slot0(&self, new_ikey: u64) -> bool;
712
713 // ========================================================================
714 // CAS Insert Support
715 // ========================================================================
716
717 /// Store key metadata (`ikey`, `keylenx`) for a CAS insert attempt.
718 ///
719 /// # Safety
720 ///
721 /// - The caller must have successfully claimed the slot via `cas_slot_value` and ensured
722 /// the slot still belongs to the CAS attempt (i.e. `leaf_values[slot]` still equals the
723 /// claimed pointer).
724 ///
725 /// Note: writing key metadata *before* claiming the slot is not safe in this design because
726 /// multiple concurrent CAS attempts can overwrite each other's metadata before publish.
727 unsafe fn store_key_data_for_cas(&self, slot: usize, ikey: u64, keylenx: u8);
728
729 /// Load the raw slot value pointer atomically.
730 ///
731 /// Used to verify slot ownership after CAS claim.
732 fn load_slot_value(&self, slot: usize) -> *mut u8;
733
734 /// Get the raw next pointer (may be marked).
735 ///
736 /// Returns the next pointer without unmarking. Use to check
737 /// if a split is in progress (marked) or get the raw value.
738 fn next_raw(&self) -> *mut Self;
739
740 /// Wait for an in-progress split to complete.
741 ///
742 /// Spins until the next pointer is unmarked and version is stable.
743 fn wait_for_split(&self);
744
745 // ========================================================================
746 // Split Operations
747 // ========================================================================
748
749 /// Calculate the optimal split point.
750 ///
751 /// # Arguments
752 ///
753 /// * `insert_pos` - Logical position where new key will be inserted
754 /// * `insert_ikey` - The key being inserted
755 ///
756 /// # Returns
757 ///
758 /// `Some(SplitPoint)` with position and split key, or `None` if split
759 /// is not possible (e.g., empty leaf).
760 fn calculate_split_point(&self, insert_pos: usize, insert_ikey: u64) -> Option<SplitPoint>;
761
762 /// Split this leaf at `split_pos` using a pre-allocated target.
763 ///
764 /// Moves entries from `split_pos..size` to `new_leaf_ptr`.
765 /// The caller is responsible for allocating and tracking the new leaf.
766 ///
767 /// # Returns
768 ///
769 /// `(split_ikey, insert_target)` tuple where:
770 /// - `split_ikey` is the first key of the new leaf (separator for parent)
771 /// - `insert_target` indicates which leaf should receive the new key
772 ///
773 /// # Safety
774 ///
775 /// - Caller must hold the leaf lock (if concurrent)
776 /// - `new_leaf_ptr` must point to valid, initialized leaf memory
777 /// - The new leaf should be freshly allocated (empty) with split-locked version
778 /// - `guard` must be valid
779 unsafe fn split_into_preallocated(
780 &self,
781 split_pos: usize,
782 new_leaf_ptr: *mut Self,
783 guard: &seize::LocalGuard<'_>,
784 ) -> (u64, InsertTarget);
785
786 /// Move ALL entries to a new right leaf.
787 ///
788 /// Used for the edge case where `split_pos == 0` in post-insert coordinates.
789 /// The original leaf becomes empty, and all entries move to the new leaf.
790 ///
791 /// # Safety
792 ///
793 /// Same requirements as `split_into_preallocated`.
794 unsafe fn split_all_to_right_preallocated(
795 &self,
796 new_leaf_ptr: *mut Self,
797 guard: &seize::LocalGuard<'_>,
798 ) -> (u64, InsertTarget);
799
800 // ========================================================================
801 // Sibling Link Helper (for split)
802 // ========================================================================
803
804 /// Link this leaf to a new sibling (B-link tree threading).
805 ///
806 /// Sets up the doubly-linked list: `self.next = new_sibling`,
807 /// `new_sibling.prev = self`, and if there was an old next,
808 /// updates `old_next.prev = new_sibling`.
809 ///
810 /// # Safety
811 ///
812 /// - `new_sibling` must be a valid pointer to a freshly allocated leaf
813 /// - Caller must hold the leaf lock
814 unsafe fn link_sibling(&self, new_sibling: *mut Self);
815
816 // ========================================================================
817 // Suffix Operations (for split)
818 // ========================================================================
819
820 /// Get suffix at slot (if any).
821 ///
822 /// Returns `None` if no suffix is stored at this slot.
823 fn ksuf(&self, slot: usize) -> Option<&[u8]>;
824
825 /// Assign a suffix to a slot (normal operation).
826 ///
827 /// Uses the permutation to determine which slots are active.
828 /// This is the hot path for normal inserts/updates.
829 ///
830 /// # Safety
831 ///
832 /// - Caller must hold the leaf lock
833 /// - Slot must be valid
834 unsafe fn assign_ksuf(&self, slot: usize, suffix: &[u8], guard: &seize::LocalGuard<'_>);
835
836 /// Assign a suffix to a slot during node initialization (e.g., splits).
837 ///
838 /// Unlike `assign_ksuf`, this assumes slots `0..slot` are already filled
839 /// sequentially and doesn't rely on the permutation. Used during split
840 /// operations when the new node's permutation hasn't been set up yet.
841 ///
842 /// This matches C++ `assign_ksuf`'s `initializing=true` path in
843 /// `masstree_struct.hh:728`.
844 ///
845 /// # Safety
846 ///
847 /// - Caller must hold the leaf lock
848 /// - Slot must be valid
849 /// - Slots 0..slot must already be filled sequentially
850 unsafe fn assign_ksuf_init(&self, slot: usize, suffix: &[u8], guard: &seize::LocalGuard<'_>);
851
852 /// Clear suffix at slot.
853 ///
854 /// # Safety
855 ///
856 /// - Caller must hold the leaf lock
857 unsafe fn clear_ksuf(&self, slot: usize, guard: &seize::LocalGuard<'_>);
858
859 /// Take ownership of the value pointer at slot (for moving during split).
860 ///
861 /// Returns the pointer and clears the slot. Used when moving entries
862 /// between leaves during a split.
863 fn take_leaf_value_ptr(&self, slot: usize) -> *mut u8;
864
865 // ========================================================================
866 // Suffix Comparison Operations
867 // ========================================================================
868
869 /// Check if a slot's suffix equals the given suffix.
870 ///
871 /// Returns `false` if:
872 /// - Slot has no suffix (`keylenx != KSUF_KEYLENX`)
873 /// - Suffix bag is null
874 /// - Suffixes don't match
875 ///
876 /// # Panics
877 ///
878 /// Panics in debug mode if `slot >= WIDTH`.
879 #[must_use]
880 fn ksuf_equals(&self, slot: usize, suffix: &[u8]) -> bool;
881
882 /// Compare a slot's suffix with the given suffix.
883 ///
884 /// Returns `None` if the slot has no suffix.
885 /// Returns `Some(Ordering)` if comparison is possible.
886 ///
887 /// # Panics
888 ///
889 /// Panics in debug mode if `slot >= WIDTH`.
890 #[must_use]
891 fn ksuf_compare(&self, slot: usize, suffix: &[u8]) -> Option<Ordering>;
892
893 /// Get the suffix for a slot, or an empty slice if none.
894 ///
895 /// Convenience wrapper around `ksuf()`.
896 ///
897 /// # Panics
898 ///
899 /// Panics in debug mode if `slot >= WIDTH`.
900 #[must_use]
901 #[inline(always)]
902 fn ksuf_or_empty(&self, slot: usize) -> &[u8] {
903 self.ksuf(slot).unwrap_or(&[])
904 }
905
906 /// Check if a slot's key (ikey + suffix) matches the given full key.
907 ///
908 /// This compares both the 8-byte ikey and the suffix (if any).
909 ///
910 /// # Arguments
911 ///
912 /// * `slot` - Physical slot index
913 /// * `ikey` - The 8-byte key to compare
914 /// * `suffix` - The suffix to compare (bytes after the first 8)
915 ///
916 /// # Returns
917 ///
918 /// `true` if both ikey and suffix match.
919 ///
920 /// # Panics
921 ///
922 /// Panics in debug mode if `slot >= WIDTH`.
923 #[must_use]
924 fn ksuf_matches(&self, slot: usize, ikey: u64, suffix: &[u8]) -> bool;
925
926 /// Check if a slot matches the given key parameters, with layer detection.
927 ///
928 /// This is the layer-aware version of `ksuf_matches` that returns detailed
929 /// match information needed for layer traversal.
930 ///
931 /// # Arguments
932 ///
933 /// * `slot` - Physical slot index
934 /// * `keylenx` - The keylenx of the search key (0-8 for inline, `KSUF_KEYLENX` for suffix)
935 /// * `suffix` - The suffix bytes to match (empty if inline key)
936 ///
937 /// # Returns
938 ///
939 /// * `1` - Exact match (ikey, keylenx, and suffix all match)
940 /// * `0` - Same ikey but different key (keylenx or suffix mismatch)
941 /// * `-8` - Slot is a layer pointer; caller should shift key by 8 bytes and descend
942 ///
943 /// # Note
944 ///
945 /// The ikey is assumed to already match (caller should check `leaf.ikey(slot) == ikey`
946 /// before calling this method).
947 ///
948 /// # Panics
949 ///
950 /// Panics in debug mode if `slot >= WIDTH`.
951 #[must_use]
952 fn ksuf_match_result(&self, slot: usize, keylenx: u8, suffix: &[u8]) -> i32;
953
954 // ========================================================================
955 // Cache Optimization
956 // ========================================================================
957
958 /// Prefetch the leaf node's data into cache.
959 ///
960 /// Brings the node's key arrays (`ikey0`, `keylenx`) and value pointers
961 /// (`leaf_values`) into CPU cache before they're accessed, reducing memory
962 /// latency during sequential scanning.
963 ///
964 /// # C++ Reference
965 ///
966 /// Matches C++ `leaf::prefetch()` pattern from `masstree_scan.hh:195, 299`.
967 fn prefetch(&self);
968
969 /// Prefetch the ikey at the given slot into CPU cache.
970 ///
971 /// This is used during linear search to hide memory latency by
972 /// prefetching future ikeys while processing current ones.
973 ///
974 /// # Arguments
975 ///
976 /// * `slot` - Physical slot index (0..WIDTH)
977 ///
978 /// # Default Implementation
979 ///
980 /// No-op. Implementations may override with actual prefetch.
981 #[inline(always)]
982 fn prefetch_ikey(&self, _slot: usize) {
983 // Default no-op; implementations may override
984 }
985
986 /// Prefetch permutation and ikeys for point lookups.
987 ///
988 /// Lighter-weight than [`Self::prefetch`] - only fetches cache lines
989 /// needed for the search loop. Call before `version.stable()` to
990 /// hide memory latency during version spin-wait.
991 ///
992 /// # Default Implementation
993 ///
994 /// No-op. Implementations may override with actual prefetch.
995 #[inline(always)]
996 fn prefetch_for_search(&self) {
997 // Default no-op; implementations may override
998 }
999
1000 /// Prefetch for search, adapting to node size.
1001 ///
1002 /// Avoids unnecessary prefetch instructions for small nodes by
1003 /// only prefetching cache lines that contain valid keys.
1004 ///
1005 /// # Arguments
1006 ///
1007 /// * `size` - Number of keys in the node
1008 ///
1009 /// # Default Implementation
1010 ///
1011 /// Falls back to unconditional [`Self::prefetch_for_search`].
1012 #[inline(always)]
1013 fn prefetch_for_search_adaptive(&self, size: usize) {
1014 let _ = size;
1015 self.prefetch_for_search();
1016 }
1017
1018 // ========================================================================
1019 // Modification State (modstate) Operations
1020 // ========================================================================
1021
1022 /// Get the modification state.
1023 ///
1024 /// Returns one of:
1025 /// - `0` (`MODSTATE_INSERT`): Normal insert mode
1026 /// - `1` (`MODSTATE_REMOVE`): Node is being removed
1027 /// - `2` (`MODSTATE_DELETED_LAYER`): Layer has been garbage collected
1028 ///
1029 /// # C++ Reference
1030 ///
1031 /// Matches `leaf::modstate_` in `masstree_struct.hh:264-270`.
1032 fn modstate(&self) -> u8;
1033
1034 /// Set the modification state.
1035 fn set_modstate(&self, state: u8);
1036
1037 /// Check if this layer has been deleted (garbage collected).
1038 ///
1039 /// This is distinct from `version.is_deleted()`:
1040 /// - `is_deleted()` means the node itself is removed from the tree
1041 /// - `deleted_layer()` means the sublayer this node was root of has been gc'd
1042 ///
1043 /// When `deleted_layer()` is true, readers should reset their key position
1044 /// and retry from the main tree root.
1045 ///
1046 /// # C++ Reference
1047 ///
1048 /// Matches `leaf::deleted_layer()` in `masstree_struct.hh:456-458`.
1049 fn deleted_layer(&self) -> bool;
1050
1051 /// Mark this layer as deleted (for `gc_layer`).
1052 ///
1053 /// Called when garbage collecting an empty sublayer.
1054 fn mark_deleted_layer(&self);
1055
1056 /// Mark this node as being in remove mode.
1057 ///
1058 /// Called at the start of a remove operation.
1059 fn mark_remove(&self);
1060
1061 /// Check if this node is in remove mode.
1062 fn is_removing(&self) -> bool;
1063
1064 // =========================================================================
1065 // Empty State (for lazy coalescing)
1066 // =========================================================================
1067
1068 /// Check if this leaf is in empty state (modstate == `MODSTATE_EMPTY`).
1069 ///
1070 /// Empty state means all keys were removed and the leaf is available
1071 /// for reuse by insert or cleanup by background coalescing.
1072 ///
1073 /// Note: Use `is_empty()` (inherited from trait) to check if permutation
1074 /// size is 0. Use `is_empty_state()` to check the modstate flag.
1075 fn is_empty_state(&self) -> bool;
1076
1077 /// Mark this leaf as empty (all keys removed).
1078 ///
1079 /// Called when the last key is removed from a leaf.
1080 fn mark_empty(&self);
1081
1082 /// Clear empty state, returning to normal insert mode.
1083 ///
1084 /// Called when an empty leaf is being reused for a new insert.
1085 fn clear_empty_state(&self);
1086}
1087
1088// =============================================================================
1089// LayerCapableLeaf Trait
1090// =============================================================================
1091
1092/// Extension trait for Arc-mode leaves that support layer creation.
1093///
1094/// This trait adds layer-specific operations needed for handling suffix conflicts
1095/// in keys longer than 8 bytes. It is separate from [`TreeLeafNode`] because the
1096/// methods are specific to `LeafValue<V>` mode (Arc-wrapped values).
1097///
1098/// # When Layer Creation Occurs
1099///
1100/// Layer creation is triggered when:
1101/// 1. Two keys share the same 8-byte ikey
1102/// 2. Both have suffixes (bytes beyond the first 8)
1103/// 3. The suffixes differ
1104/// 4. Neither slot is already a layer pointer
1105///
1106/// This is the "Conflict" case in `InsertSearchResultGeneric`.
1107///
1108/// # Implementors
1109///
1110/// - `LeafNode24<LeafValue<V>>`
1111pub trait LayerCapableLeaf<S: ValueSlot>: TreeLeafNode<S>
1112where
1113 S::Value: Send + Sync + 'static,
1114 S::Output: Send + Sync,
1115{
1116 /// Try to clone the Arc value from a slot.
1117 ///
1118 /// Returns `None` if:
1119 /// - Slot is empty (null pointer)
1120 /// - Slot contains a layer pointer (`keylenx >= LAYER_KEYLENX`)
1121 ///
1122 /// # Safety Considerations
1123 ///
1124 /// This method is safe to call, but the caller should:
1125 /// - Hold the node lock (for write operations), OR
1126 /// - Have validated the version (for read operations)
1127 ///
1128 /// The returned `Arc<V>` is a new strong reference; the original
1129 /// slot's reference count is incremented.
1130 ///
1131 /// # Panics
1132 ///
1133 /// Panics in debug mode if `slot >= WIDTH`.
1134 fn try_clone_output(&self, slot: usize) -> Option<S::Output>;
1135
1136 /// Assign a slot from a Key iterator with an Arc value.
1137 ///
1138 /// This method sets up a slot with:
1139 /// - `ikey` from `key.ikey()`
1140 /// - `keylenx` computed from `key.has_suffix()`:
1141 /// - If `key.has_suffix()`: `KSUF_KEYLENX` (64)
1142 /// - Otherwise: `key.current_len().min(8)` (0-8)
1143 /// - Value pointer from `Arc::into_raw(value)`
1144 /// - Suffix data via `assign_ksuf()` if `key.has_suffix()`
1145 ///
1146 /// # Arguments
1147 ///
1148 /// * `slot` - Physical slot index (0..WIDTH)
1149 /// * `key` - The key containing ikey and suffix information
1150 /// * `value` - The Arc-wrapped value. Must be `Some`; `None` will panic.
1151 /// * `guard` - Seize guard for deferred suffix bag retirement
1152 ///
1153 /// # Safety
1154 ///
1155 /// - Caller must hold the node lock
1156 /// - `guard` must come from this tree's collector
1157 /// - Slot must be unoccupied or caller must handle cleanup of old value
1158 ///
1159 /// # Panics
1160 ///
1161 /// - Panics if `value` is `None` (use layer pointer setup methods instead)
1162 /// - Panics in debug mode if `slot >= WIDTH`
1163 unsafe fn assign_from_key_arc(
1164 &self,
1165 slot: usize,
1166 key: &Key<'_>,
1167 value: Option<S::Output>,
1168 guard: &LocalGuard<'_>,
1169 );
1170}
1171
1172// ============================================================================
1173// Tests
1174// ============================================================================
1175
1176#[cfg(test)]
1177mod unit_tests;