use arrayvec::ArrayVec;
use std::borrow::Borrow;
use std::cmp::Ordering;
use std::ops::{Bound, RangeBounds};
const LEAF_CAP: usize = 64;
const INNER_CAP: usize = 64;
const INNER_CHILDREN: usize = INNER_CAP + 1;
const MIN_LEAF: usize = LEAF_CAP / 4;
const MIN_INNER: usize = INNER_CAP / 4;
const NONE: u32 = u32::MAX;
const MAX_DEPTH: usize = 20;
fn av_insert<T, const N: usize>(av: &mut ArrayVec<T, N>, idx: usize, val: T) {
av.insert(idx, val);
}
fn av_split_off<T, const N: usize>(av: &mut ArrayVec<T, N>, at: usize) -> ArrayVec<T, N> {
let mut right = ArrayVec::new();
for item in av.drain(at..) {
right.push(item);
}
right
}
fn av_append<T, const N: usize>(dst: &mut ArrayVec<T, N>, src: &mut ArrayVec<T, N>) {
for item in src.drain(..) {
dst.push(item);
}
}
struct LeafNode<K, V> {
keys: ArrayVec<K, LEAF_CAP>,
vals: ArrayVec<V, LEAF_CAP>,
prev: u32,
next: u32,
}
impl<K, V> LeafNode<K, V> {
fn new() -> Self {
Self {
keys: ArrayVec::new(),
vals: ArrayVec::new(),
prev: NONE,
next: NONE,
}
}
#[inline]
fn len(&self) -> usize {
self.keys.len()
}
#[inline]
fn is_full(&self) -> bool {
self.keys.is_full()
}
fn can_lend(&self) -> bool {
self.keys.len() > MIN_LEAF
}
}
impl<K: Ord, V> LeafNode<K, V> {
fn search_kv_pos<Q>(&self, k: &Q, v: &V) -> Result<usize, usize>
where
K: Borrow<Q> + Ord,
Q: Ord + ?Sized,
V: Ord,
{
let mut lo = 0usize;
let mut hi = self.len();
while lo < hi {
let mid = lo + (hi - lo) / 2;
match self.keys[mid]
.borrow()
.cmp(k)
.then_with(|| self.vals[mid].cmp(v))
{
Ordering::Less => lo = mid + 1,
Ordering::Greater => hi = mid,
Ordering::Equal => return Ok(mid),
}
}
Err(lo)
}
fn lower_bound_k<Q>(&self, k: &Q) -> usize
where
K: Borrow<Q> + Ord,
Q: Ord + ?Sized,
{
self.keys.partition_point(|sk| sk.borrow() < k)
}
fn upper_bound_k<Q>(&self, k: &Q) -> usize
where
K: Borrow<Q> + Ord,
Q: Ord + ?Sized,
{
self.keys.partition_point(|sk| sk.borrow() <= k)
}
}
struct InnerNode<K, V> {
keys: ArrayVec<K, INNER_CAP>,
vals: ArrayVec<V, INNER_CAP>,
children: ArrayVec<u32, INNER_CHILDREN>,
}
impl<K, V> InnerNode<K, V> {
fn new() -> Self {
Self {
keys: ArrayVec::new(),
vals: ArrayVec::new(),
children: ArrayVec::new(),
}
}
fn len(&self) -> usize {
self.keys.len()
}
fn is_full(&self) -> bool {
self.keys.is_full()
}
fn can_lend(&self) -> bool {
self.keys.len() > MIN_INNER
}
}
impl<K: Ord, V: Ord> InnerNode<K, V> {
fn find_child_kv<Q>(&self, k: &Q, v: &V) -> usize
where
K: Borrow<Q> + Ord,
Q: Ord + ?Sized,
{
let mut lo = 0usize;
let mut hi = self.len();
while lo < hi {
let mid = lo + (hi - lo) / 2;
match self.keys[mid]
.borrow()
.cmp(k)
.then_with(|| self.vals[mid].cmp(v))
{
Ordering::Less | Ordering::Equal => lo = mid + 1,
Ordering::Greater => hi = mid,
}
}
lo
}
fn find_child_k_lower<Q>(&self, k: &Q) -> usize
where
K: Borrow<Q> + Ord,
Q: Ord + ?Sized,
{
self.keys.partition_point(|sk| sk.borrow() < k)
}
}
#[derive(Clone, Copy)]
struct PathEntry {
node: u32,
child_pos: usize,
}
struct Path {
entries: [PathEntry; MAX_DEPTH],
len: usize,
}
impl Path {
fn new() -> Self {
Self {
entries: [PathEntry {
node: 0,
child_pos: 0,
}; MAX_DEPTH],
len: 0,
}
}
fn push(&mut self, node: u32, child_pos: usize) {
self.entries[self.len] = PathEntry { node, child_pos };
self.len += 1;
}
}
pub struct BPlusIndex<K, V> {
leaves: Vec<LeafNode<K, V>>,
inners: Vec<InnerNode<K, V>>,
free_leaves: Vec<u32>,
free_inners: Vec<u32>,
root: u32,
height: u32,
len: usize,
first_leaf: u32,
}
impl<K: Ord + Clone, V: Ord + Clone> BPlusIndex<K, V> {
pub fn new() -> Self {
let mut idx = Self {
leaves: Vec::new(),
inners: Vec::new(),
free_leaves: Vec::new(),
free_inners: Vec::new(),
root: 0,
height: 0,
len: 0,
first_leaf: 0,
};
idx.alloc_leaf(); idx
}
pub fn len(&self) -> usize {
self.len
}
pub fn is_empty(&self) -> bool {
self.len == 0
}
fn alloc_leaf(&mut self) -> u32 {
if let Some(id) = self.free_leaves.pop() {
self.leaves[id as usize] = LeafNode::new();
id
} else {
let id = self.leaves.len() as u32;
self.leaves.push(LeafNode::new());
id
}
}
fn free_leaf(&mut self, id: u32) {
self.free_leaves.push(id);
}
fn alloc_inner(&mut self) -> u32 {
if let Some(id) = self.free_inners.pop() {
self.inners[id as usize] = InnerNode::new();
id
} else {
let id = self.inners.len() as u32;
self.inners.push(InnerNode::new());
id
}
}
fn free_inner(&mut self, id: u32) {
self.free_inners.push(id);
}
#[inline]
fn leaf(&self, id: u32) -> &LeafNode<K, V> {
&self.leaves[id as usize]
}
#[inline]
fn leaf_mut(&mut self, id: u32) -> &mut LeafNode<K, V> {
&mut self.leaves[id as usize]
}
#[inline]
fn inner(&self, id: u32) -> &InnerNode<K, V> {
&self.inners[id as usize]
}
#[inline]
fn inner_mut(&mut self, id: u32) -> &mut InnerNode<K, V> {
&mut self.inners[id as usize]
}
fn descend_kv<Q>(&self, k: &Q, v: &V) -> (u32, Path)
where
K: Borrow<Q> + Ord,
Q: Ord + ?Sized,
{
let mut path = Path::new();
if self.height == 0 {
return (self.root, path);
}
let mut node = self.root;
for level in (1..=self.height).rev() {
let inner = self.inner(node);
let child_pos = inner.find_child_kv(k, v);
path.push(node, child_pos);
node = inner.children[child_pos];
if level == 1 {
return (node, path);
}
}
unreachable!()
}
fn descend_k<Q>(&self, k: &Q) -> u32
where
K: Borrow<Q> + Ord,
Q: Ord + ?Sized,
{
if self.height == 0 {
return self.root;
}
let mut node = self.root;
for level in (1..=self.height).rev() {
let inner = self.inner(node);
let child_pos = inner.find_child_k_lower(k);
node = inner.children[child_pos];
if level == 1 {
return node;
}
}
unreachable!()
}
pub fn insert(&mut self, key: K, val: V) {
let (leaf_id, path) = self.descend_kv(&key, &val);
let leaf = self.leaf(leaf_id);
let pos = match leaf.search_kv_pos(&key, &val) {
Ok(_) => return, Err(p) => p,
};
if !self.leaf(leaf_id).is_full() {
let leaf = self.leaf_mut(leaf_id);
av_insert(&mut leaf.keys, pos, key);
av_insert(&mut leaf.vals, pos, val);
self.len += 1;
return;
}
let (right_id, sep_k, sep_v) = self.split_leaf(leaf_id, pos, key, val);
self.propagate_split(path, sep_k, sep_v, right_id);
self.len += 1;
}
fn split_leaf(&mut self, leaf_id: u32, pos: usize, key: K, val: V) -> (u32, K, V) {
let split = LEAF_CAP / 2;
let mut right_keys = av_split_off(&mut self.leaf_mut(leaf_id).keys, split);
let mut right_vals = av_split_off(&mut self.leaf_mut(leaf_id).vals, split);
if pos <= split {
let leaf = self.leaf_mut(leaf_id);
av_insert(&mut leaf.keys, pos, key);
av_insert(&mut leaf.vals, pos, val);
} else {
av_insert(&mut right_keys, pos - split, key);
av_insert(&mut right_vals, pos - split, val);
}
let sep_k = right_keys[0].clone();
let sep_v = right_vals[0].clone();
let right_id = self.alloc_leaf();
let old_next = self.leaf(leaf_id).next;
let right = self.leaf_mut(right_id);
right.keys = right_keys;
right.vals = right_vals;
right.prev = leaf_id;
right.next = old_next;
self.leaf_mut(leaf_id).next = right_id;
if old_next != NONE {
self.leaf_mut(old_next).prev = right_id;
}
(right_id, sep_k, sep_v)
}
fn propagate_split(&mut self, path: Path, mut sep_k: K, mut sep_v: V, mut right_child: u32) {
for i in (0..path.len).rev() {
let inner_id = path.entries[i].node;
let pos = self.inner(inner_id).find_child_kv(&sep_k, &sep_v);
if !self.inner(inner_id).is_full() {
let inner = self.inner_mut(inner_id);
av_insert(&mut inner.keys, pos, sep_k);
av_insert(&mut inner.vals, pos, sep_v);
av_insert(&mut inner.children, pos + 1, right_child);
return;
}
let (new_right, med_k, med_v) =
self.split_inner(inner_id, pos, sep_k, sep_v, right_child);
sep_k = med_k;
sep_v = med_v;
right_child = new_right;
}
self.grow_root(sep_k, sep_v, right_child);
}
fn split_inner(
&mut self,
inner_id: u32,
pos: usize,
key: K,
val: V,
child: u32,
) -> (u32, K, V) {
let split = INNER_CAP / 2;
let mut right_keys = av_split_off(&mut self.inner_mut(inner_id).keys, split + 1);
let mut right_vals = av_split_off(&mut self.inner_mut(inner_id).vals, split + 1);
let mut right_children = av_split_off(&mut self.inner_mut(inner_id).children, split + 1);
let med_k = self.inner_mut(inner_id).keys.remove(split);
let med_v = self.inner_mut(inner_id).vals.remove(split);
if pos <= split {
let inner = self.inner_mut(inner_id);
av_insert(&mut inner.keys, pos, key);
av_insert(&mut inner.vals, pos, val);
av_insert(&mut inner.children, pos + 1, child);
} else {
let rp = pos - split - 1;
av_insert(&mut right_keys, rp, key);
av_insert(&mut right_vals, rp, val);
av_insert(&mut right_children, rp + 1, child);
}
let right_id = self.alloc_inner();
let right = self.inner_mut(right_id);
right.keys = right_keys;
right.vals = right_vals;
right.children = right_children;
(right_id, med_k, med_v)
}
fn grow_root(&mut self, sep_k: K, sep_v: V, right_child: u32) {
let old_root = self.root;
let new_root = self.alloc_inner();
let root = self.inner_mut(new_root);
root.keys.push(sep_k);
root.vals.push(sep_v);
root.children.push(old_root);
root.children.push(right_child);
self.root = new_root;
self.height += 1;
}
pub fn delete<Q>(&mut self, key: &Q, val: &V) -> bool
where
K: Borrow<Q> + Ord,
Q: Ord + ?Sized,
{
let (leaf_id, path) = self.descend_kv(key, val);
let pos = match self.leaf(leaf_id).search_kv_pos(key, val) {
Ok(p) => p,
Err(_) => return false,
};
self.leaf_mut(leaf_id).keys.remove(pos);
self.leaf_mut(leaf_id).vals.remove(pos);
self.len -= 1;
if self.height == 0 {
return true;
}
if self.leaf(leaf_id).len() >= MIN_LEAF {
return true;
}
self.rebalance_leaf(leaf_id, &path);
true
}
fn rebalance_leaf(&mut self, leaf_id: u32, path: &Path) {
let parent_entry = path.entries[path.len - 1];
let parent_id = parent_entry.node;
let child_pos = parent_entry.child_pos;
if child_pos + 1 < self.inner(parent_id).children.len() {
let right_id = self.inner(parent_id).children[child_pos + 1];
if self.leaf(right_id).can_lend() {
let rk = self.leaf_mut(right_id).keys.remove(0);
let rv = self.leaf_mut(right_id).vals.remove(0);
self.leaf_mut(leaf_id).keys.push(rk);
self.leaf_mut(leaf_id).vals.push(rv);
let new_sep_k = self.leaf(right_id).keys[0].clone();
let new_sep_v = self.leaf(right_id).vals[0].clone();
self.inner_mut(parent_id).keys[child_pos] = new_sep_k;
self.inner_mut(parent_id).vals[child_pos] = new_sep_v;
return;
}
}
if child_pos > 0 {
let left_id = self.inner(parent_id).children[child_pos - 1];
if self.leaf(left_id).can_lend() {
let lk = self.leaf_mut(left_id).keys.pop().unwrap();
let lv = self.leaf_mut(left_id).vals.pop().unwrap();
av_insert(&mut self.leaf_mut(leaf_id).keys, 0, lk);
av_insert(&mut self.leaf_mut(leaf_id).vals, 0, lv);
let new_sep_k = self.leaf(leaf_id).keys[0].clone();
let new_sep_v = self.leaf(leaf_id).vals[0].clone();
self.inner_mut(parent_id).keys[child_pos - 1] = new_sep_k;
self.inner_mut(parent_id).vals[child_pos - 1] = new_sep_v;
return;
}
}
if child_pos + 1 < self.inner(parent_id).children.len() {
let right_id = self.inner(parent_id).children[child_pos + 1];
self.merge_leaves(leaf_id, right_id);
self.inner_mut(parent_id).keys.remove(child_pos);
self.inner_mut(parent_id).vals.remove(child_pos);
self.inner_mut(parent_id).children.remove(child_pos + 1);
self.free_leaf(right_id);
} else {
let left_id = self.inner(parent_id).children[child_pos - 1];
self.merge_leaves(left_id, leaf_id);
self.inner_mut(parent_id).keys.remove(child_pos - 1);
self.inner_mut(parent_id).vals.remove(child_pos - 1);
self.inner_mut(parent_id).children.remove(child_pos);
self.free_leaf(leaf_id);
}
self.maybe_shrink_or_rebalance_inner(parent_id, path, path.len - 1);
}
fn merge_leaves(&mut self, left_id: u32, right_id: u32) {
let right = self.leaf_mut(right_id);
let mut rk = std::mem::replace(&mut right.keys, ArrayVec::new());
let mut rv = std::mem::replace(&mut right.vals, ArrayVec::new());
let right_next = right.next;
let left = self.leaf_mut(left_id);
av_append(&mut left.keys, &mut rk);
av_append(&mut left.vals, &mut rv);
left.next = right_next;
if right_next != NONE {
self.leaf_mut(right_next).prev = left_id;
}
}
fn maybe_shrink_or_rebalance_inner(&mut self, inner_id: u32, path: &Path, path_idx: usize) {
if inner_id == self.root {
if self.inner(inner_id).keys.is_empty() {
let only_child = self.inner(inner_id).children[0];
self.free_inner(inner_id);
self.root = only_child;
self.height -= 1;
if self.height == 0 {
self.first_leaf = self.root;
}
}
return;
}
if self.inner(inner_id).len() >= MIN_INNER {
return;
}
self.rebalance_inner(inner_id, path, path_idx);
}
fn rebalance_inner(&mut self, inner_id: u32, path: &Path, path_idx: usize) {
if path_idx == 0 {
return;
}
let parent_entry = path.entries[path_idx - 1];
let parent_id = parent_entry.node;
let child_pos = parent_entry.child_pos;
if child_pos + 1 < self.inner(parent_id).children.len() {
let right_id = self.inner(parent_id).children[child_pos + 1];
if self.inner(right_id).can_lend() {
let sep_k = self.inner_mut(parent_id).keys.remove(child_pos);
let sep_v = self.inner_mut(parent_id).vals.remove(child_pos);
let rk = self.inner_mut(right_id).keys.remove(0);
let rv = self.inner_mut(right_id).vals.remove(0);
let rc = self.inner_mut(right_id).children.remove(0);
av_insert(&mut self.inner_mut(parent_id).keys, child_pos, rk);
av_insert(&mut self.inner_mut(parent_id).vals, child_pos, rv);
self.inner_mut(inner_id).keys.push(sep_k);
self.inner_mut(inner_id).vals.push(sep_v);
self.inner_mut(inner_id).children.push(rc);
return;
}
}
if child_pos > 0 {
let left_id = self.inner(parent_id).children[child_pos - 1];
if self.inner(left_id).can_lend() {
let sep_k = self.inner_mut(parent_id).keys.remove(child_pos - 1);
let sep_v = self.inner_mut(parent_id).vals.remove(child_pos - 1);
let lk = self.inner_mut(left_id).keys.pop().unwrap();
let lv = self.inner_mut(left_id).vals.pop().unwrap();
let lc = self.inner_mut(left_id).children.pop().unwrap();
av_insert(&mut self.inner_mut(parent_id).keys, child_pos - 1, lk);
av_insert(&mut self.inner_mut(parent_id).vals, child_pos - 1, lv);
av_insert(&mut self.inner_mut(inner_id).keys, 0, sep_k);
av_insert(&mut self.inner_mut(inner_id).vals, 0, sep_v);
av_insert(&mut self.inner_mut(inner_id).children, 0, lc);
return;
}
}
if child_pos + 1 < self.inner(parent_id).children.len() {
let right_id = self.inner(parent_id).children[child_pos + 1];
let sep_k = self.inner_mut(parent_id).keys.remove(child_pos);
let sep_v = self.inner_mut(parent_id).vals.remove(child_pos);
self.inner_mut(parent_id).children.remove(child_pos + 1);
self.inner_mut(inner_id).keys.push(sep_k);
self.inner_mut(inner_id).vals.push(sep_v);
let right = self.inner_mut(right_id);
let mut rk = std::mem::replace(&mut right.keys, ArrayVec::new());
let mut rv = std::mem::replace(&mut right.vals, ArrayVec::new());
let mut rc = std::mem::replace(&mut right.children, ArrayVec::new());
av_append(&mut self.inner_mut(inner_id).keys, &mut rk);
av_append(&mut self.inner_mut(inner_id).vals, &mut rv);
av_append(&mut self.inner_mut(inner_id).children, &mut rc);
self.free_inner(right_id);
} else {
let left_id = self.inner(parent_id).children[child_pos - 1];
let sep_k = self.inner_mut(parent_id).keys.remove(child_pos - 1);
let sep_v = self.inner_mut(parent_id).vals.remove(child_pos - 1);
self.inner_mut(parent_id).children.remove(child_pos);
self.inner_mut(left_id).keys.push(sep_k);
self.inner_mut(left_id).vals.push(sep_v);
let cur = self.inner_mut(inner_id);
let mut ck = std::mem::replace(&mut cur.keys, ArrayVec::new());
let mut cv = std::mem::replace(&mut cur.vals, ArrayVec::new());
let mut cc = std::mem::replace(&mut cur.children, ArrayVec::new());
av_append(&mut self.inner_mut(left_id).keys, &mut ck);
av_append(&mut self.inner_mut(left_id).vals, &mut cv);
av_append(&mut self.inner_mut(left_id).children, &mut cc);
self.free_inner(inner_id);
}
self.maybe_shrink_or_rebalance_inner(parent_id, path, path_idx - 1);
}
pub fn update<Q>(&mut self, old_key: &Q, new_key: K, val: V)
where
K: Borrow<Q> + Ord,
Q: Ord + ?Sized,
{
self.delete(old_key, &val);
self.insert(new_key, val);
}
pub fn lookup_unique<Q>(&self, key: &Q) -> Option<&V>
where
K: Borrow<Q> + Ord,
Q: Ord + ?Sized,
{
let leaf_id = self.descend_k(key);
let leaf = self.leaf(leaf_id);
let pos = leaf.lower_bound_k(key);
if pos < leaf.len() && leaf.keys[pos].borrow() == key {
return Some(&leaf.vals[pos]);
}
if leaf.next != NONE {
let next = self.leaf(leaf.next);
if !next.keys.is_empty() && next.keys[0].borrow() == key {
return Some(&next.vals[0]);
}
}
None
}
pub fn lookup_range<'a, 'b, Q>(&'a self, key: &'b Q) -> LookupRange<'a, 'b, Q, K, V>
where
K: Borrow<Q> + Ord,
Q: Ord + ?Sized,
{
let leaf_id = self.descend_k(key);
let leaf = self.leaf(leaf_id);
let pos = leaf.lower_bound_k(key);
LookupRange {
tree: self,
key,
leaf: leaf_id,
pos,
}
}
pub fn range<R: RangeBounds<K>>(&self, bounds: R) -> RangeIter<'_, K, V> {
let (leaf, pos) = match bounds.start_bound() {
Bound::Unbounded => (self.first_leaf, 0),
Bound::Included(k) => {
let lid = self.descend_k(k);
let l = self.leaf(lid);
(lid, l.lower_bound_k(k))
}
Bound::Excluded(k) => {
let lid = self.descend_k(k);
let l = self.leaf(lid);
(lid, l.upper_bound_k(k))
}
};
let end = match bounds.end_bound() {
Bound::Unbounded => RangeEnd::Unbounded,
Bound::Included(k) => RangeEnd::Included(k.clone()),
Bound::Excluded(k) => RangeEnd::Excluded(k.clone()),
};
RangeIter {
tree: self,
leaf,
pos,
end,
}
}
pub fn iter(&self) -> RangeIter<'_, K, V> {
RangeIter {
tree: self,
leaf: self.first_leaf,
pos: 0,
end: RangeEnd::Unbounded,
}
}
}
impl<K: Ord + Clone, V: Ord + Clone> Default for BPlusIndex<K, V> {
fn default() -> Self {
Self::new()
}
}
pub struct LookupRange<'a, 'b, Q, K, V>
where
K: Borrow<Q> + Ord,
Q: Ord + ?Sized,
{
tree: &'a BPlusIndex<K, V>,
key: &'b Q,
leaf: u32,
pos: usize,
}
impl<'a, 'b, Q, K, V> Iterator for LookupRange<'a, 'b, Q, K, V>
where
K: Borrow<Q> + Ord + Clone,
Q: Ord + ?Sized,
V: Ord + Clone,
{
type Item = &'a V;
fn next(&mut self) -> Option<Self::Item> {
loop {
if self.leaf == NONE {
return None;
}
let leaf = self.tree.leaf(self.leaf);
if self.pos < leaf.len() {
if leaf.keys[self.pos].borrow() == self.key {
let v = &leaf.vals[self.pos];
self.pos += 1;
return Some(v);
}
return None;
}
self.leaf = leaf.next;
self.pos = 0;
}
}
}
enum RangeEnd<K> {
Unbounded,
Included(K),
Excluded(K),
}
pub struct RangeIter<'a, K, V> {
tree: &'a BPlusIndex<K, V>,
leaf: u32,
pos: usize,
end: RangeEnd<K>,
}
impl<'a, K: Ord + Clone, V: Ord + Clone> Iterator for RangeIter<'a, K, V> {
type Item = (&'a K, &'a V);
fn next(&mut self) -> Option<Self::Item> {
loop {
if self.leaf == NONE {
return None;
}
let leaf = self.tree.leaf(self.leaf);
if self.pos < leaf.len() {
let k = &leaf.keys[self.pos];
match &self.end {
RangeEnd::Excluded(end) if k >= end => return None,
RangeEnd::Included(end) if k > end => return None,
_ => {}
}
let v = &leaf.vals[self.pos];
self.pos += 1;
return Some((k, v));
}
self.leaf = leaf.next;
self.pos = 0;
}
}
}
#[cfg(test)]
mod tests {
use super::*;
use std::collections::HashSet;
use std::fmt::Debug;
impl<K: Ord + Clone + Debug, V: Ord + Clone + Debug> BPlusIndex<K, V> {
fn validate(&self) {
self.validate_len();
if self.height == 0 {
self.validate_root_leaf();
} else {
self.validate_inner_recursive(self.root, self.height, None, None);
}
self.validate_leaf_chain();
self.validate_first_leaf();
}
fn validate_len(&self) {
let actual: usize = self.iter().count();
assert_eq!(
actual, self.len,
"len mismatch: stored {} but iter yields {}",
self.len, actual
);
}
fn validate_root_leaf(&self) {
let leaf = self.leaf(self.root);
assert!(
leaf.keys.len() == leaf.vals.len(),
"root leaf: keys.len != vals.len"
);
self.validate_leaf_sorted(self.root);
}
fn validate_leaf_sorted(&self, id: u32) {
let leaf = self.leaf(id);
for i in 1..leaf.len() {
let cmp = leaf.keys[i - 1]
.cmp(&leaf.keys[i])
.then_with(|| leaf.vals[i - 1].cmp(&leaf.vals[i]));
assert!(
cmp == Ordering::Less,
"leaf {}: entries not strictly sorted at position {}: ({:?},{:?}) vs ({:?},{:?})",
id,
i,
leaf.keys[i - 1],
leaf.vals[i - 1],
leaf.keys[i],
leaf.vals[i]
);
}
}
fn validate_inner_recursive(
&self,
node_id: u32,
level: u32,
lower: Option<(&K, &V)>,
upper: Option<(&K, &V)>,
) {
let inner = self.inner(node_id);
assert_eq!(
inner.children.len(),
inner.keys.len() + 1,
"inner {}: children.len ({}) != keys.len ({}) + 1",
node_id,
inner.children.len(),
inner.keys.len()
);
for i in 1..inner.len() {
let cmp = inner.keys[i - 1]
.cmp(&inner.keys[i])
.then_with(|| inner.vals[i - 1].cmp(&inner.vals[i]));
assert!(
cmp == Ordering::Less,
"inner {}: separators not sorted at position {}",
node_id,
i
);
}
if node_id != self.root {
assert!(
inner.len() >= MIN_INNER,
"inner {}: underflow, len {} < MIN_INNER {}",
node_id,
inner.len(),
MIN_INNER
);
}
for ci in 0..inner.children.len() {
let child_lower = if ci == 0 {
lower
} else {
Some((&inner.keys[ci - 1], &inner.vals[ci - 1]))
};
let child_upper = if ci == inner.keys.len() {
upper
} else {
Some((&inner.keys[ci], &inner.vals[ci]))
};
if level == 1 {
let leaf_id = inner.children[ci];
let leaf = self.leaf(leaf_id);
if self.len > 0 {
let total_leaves = self.iter().count();
if total_leaves > LEAF_CAP {
assert!(
leaf.len() >= MIN_LEAF,
"leaf {}: underflow, len {} < MIN_LEAF {}",
leaf_id,
leaf.len(),
MIN_LEAF
);
}
}
self.validate_leaf_sorted(leaf_id);
for i in 0..leaf.len() {
if let Some((lk, lv)) = child_lower {
let cmp = leaf.keys[i].cmp(lk).then_with(|| leaf.vals[i].cmp(lv));
assert!(
cmp != Ordering::Less,
"leaf {}: entry ({:?},{:?}) below lower bound ({:?},{:?})",
leaf_id,
leaf.keys[i],
leaf.vals[i],
lk,
lv
);
}
if let Some((uk, uv)) = child_upper {
let cmp = leaf.keys[i].cmp(uk).then_with(|| leaf.vals[i].cmp(uv));
assert!(
cmp == Ordering::Less,
"leaf {}: entry ({:?},{:?}) not below upper bound ({:?},{:?})",
leaf_id,
leaf.keys[i],
leaf.vals[i],
uk,
uv
);
}
}
} else {
self.validate_inner_recursive(
inner.children[ci],
level - 1,
child_lower,
child_upper,
);
}
}
}
fn validate_leaf_chain(&self) {
let mut visited = HashSet::new();
let mut leaf_id = self.first_leaf;
let mut prev_id = NONE;
let mut total_entries = 0usize;
while leaf_id != NONE {
assert!(
visited.insert(leaf_id),
"leaf chain: cycle detected at leaf {}",
leaf_id
);
let leaf = self.leaf(leaf_id);
assert_eq!(
leaf.prev, prev_id,
"leaf {}: prev is {} but expected {}",
leaf_id, leaf.prev, prev_id
);
if prev_id != NONE && leaf.len() > 0 {
let prev_leaf = self.leaf(prev_id);
if prev_leaf.len() > 0 {
let prev_last_k = &prev_leaf.keys[prev_leaf.len() - 1];
let prev_last_v = &prev_leaf.vals[prev_leaf.len() - 1];
let cur_first_k = &leaf.keys[0];
let cur_first_v = &leaf.vals[0];
let cmp = prev_last_k
.cmp(cur_first_k)
.then_with(|| prev_last_v.cmp(cur_first_v));
assert!(
cmp == Ordering::Less,
"leaf chain: last entry of leaf {} ({:?},{:?}) >= first entry of leaf {} ({:?},{:?})",
prev_id,
prev_last_k,
prev_last_v,
leaf_id,
cur_first_k,
cur_first_v
);
}
}
total_entries += leaf.len();
prev_id = leaf_id;
leaf_id = leaf.next;
}
assert_eq!(
total_entries, self.len,
"leaf chain: total entries {} != stored len {}",
total_entries, self.len
);
}
fn validate_first_leaf(&self) {
if self.height == 0 {
assert_eq!(
self.first_leaf, self.root,
"height 0 but first_leaf != root"
);
} else {
let mut node = self.root;
for level in (1..=self.height).rev() {
let inner = self.inner(node);
node = inner.children[0];
if level == 1 {
assert_eq!(
self.first_leaf, node,
"first_leaf {} != leftmost leaf {}",
self.first_leaf, node
);
return;
}
}
}
}
}
fn insert_and_validate(idx: &mut BPlusIndex<i32, u64>, k: i32, v: u64) {
idx.insert(k, v);
idx.validate();
}
fn delete_and_validate(idx: &mut BPlusIndex<i32, u64>, k: &i32, v: &u64) -> bool {
let result = idx.delete(k, v);
idx.validate();
result
}
#[test]
fn empty_tree() {
let idx = BPlusIndex::<i32, u64>::new();
assert_eq!(idx.len(), 0);
assert!(idx.is_empty());
assert_eq!(idx.lookup_unique(&0), None);
assert_eq!(idx.lookup_range(&0).count(), 0);
assert_eq!(idx.range(..).count(), 0);
assert_eq!(idx.range(0..10).count(), 0);
assert_eq!(idx.iter().count(), 0);
idx.validate();
}
#[test]
fn empty_tree_delete() {
let mut idx = BPlusIndex::<i32, u64>::new();
assert!(!idx.delete(&1, &1));
idx.validate();
}
#[test]
fn single_insert_delete() {
let mut idx = BPlusIndex::<i32, u64>::new();
insert_and_validate(&mut idx, 42, 1);
assert_eq!(idx.len(), 1);
assert_eq!(idx.lookup_unique(&42), Some(&1));
assert!(delete_and_validate(&mut idx, &42, &1));
assert_eq!(idx.len(), 0);
assert_eq!(idx.lookup_unique(&42), None);
}
#[test]
fn insert_duplicate_kv_is_noop() {
let mut idx = BPlusIndex::<i32, u64>::new();
insert_and_validate(&mut idx, 5, 100);
insert_and_validate(&mut idx, 5, 100);
assert_eq!(idx.len(), 1);
}
#[test]
fn non_unique_keys_small() {
let mut idx = BPlusIndex::<i32, u64>::new();
insert_and_validate(&mut idx, 5, 100);
insert_and_validate(&mut idx, 5, 200);
insert_and_validate(&mut idx, 5, 300);
insert_and_validate(&mut idx, 10, 400);
let vals: Vec<_> = idx.lookup_range(&5).copied().collect();
assert_eq!(vals, vec![100, 200, 300]);
assert_eq!(idx.lookup_unique(&5), Some(&100)); }
#[test]
fn non_unique_keys_spanning_leaves() {
let mut idx = BPlusIndex::<i32, u64>::new();
for v in 0..200u64 {
insert_and_validate(&mut idx, 42, v);
}
assert_eq!(idx.len(), 200);
let vals: Vec<_> = idx.lookup_range(&42).copied().collect();
assert_eq!(vals, (0..200u64).collect::<Vec<_>>());
for v in (0..200u64).step_by(2) {
assert!(delete_and_validate(&mut idx, &42, &v));
}
assert_eq!(idx.len(), 100);
let vals: Vec<_> = idx.lookup_range(&42).copied().collect();
let expected: Vec<u64> = (0..200u64).filter(|v| v % 2 != 0).collect();
assert_eq!(vals, expected);
}
#[test]
fn insert_exactly_leaf_cap() {
let mut idx = BPlusIndex::<i32, u64>::new();
for i in 0..LEAF_CAP as i32 {
insert_and_validate(&mut idx, i, i as u64);
}
assert_eq!(idx.len(), LEAF_CAP);
assert_eq!(idx.height, 0); }
#[test]
fn insert_triggers_first_split() {
let mut idx = BPlusIndex::<i32, u64>::new();
for i in 0..=LEAF_CAP as i32 {
insert_and_validate(&mut idx, i, i as u64);
}
assert_eq!(idx.len(), LEAF_CAP + 1);
assert_eq!(idx.height, 1); }
#[test]
fn insert_triggers_inner_split() {
let mut idx = BPlusIndex::<i32, u64>::new();
let n = LEAF_CAP * INNER_CAP + 100;
for i in 0..n as i32 {
idx.insert(i, i as u64);
}
idx.validate();
assert!(idx.height >= 2, "expected height >= 2, got {}", idx.height);
}
#[test]
fn delete_nonexistent_key() {
let mut idx = BPlusIndex::<i32, u64>::new();
insert_and_validate(&mut idx, 1, 10);
assert!(!idx.delete(&999, &10));
assert!(!idx.delete(&1, &999));
assert_eq!(idx.len(), 1);
idx.validate();
}
#[test]
fn delete_triggers_borrow_from_right() {
let mut idx = BPlusIndex::<i32, u64>::new();
for i in 0..LEAF_CAP as i32 + 10 {
idx.insert(i, i as u64);
}
idx.validate();
for i in 0..LEAF_CAP as i32 / 2 {
delete_and_validate(&mut idx, &i, &(i as u64));
}
}
#[test]
fn delete_triggers_borrow_from_left() {
let mut idx = BPlusIndex::<i32, u64>::new();
for i in 0..LEAF_CAP as i32 + 10 {
idx.insert(i, i as u64);
}
idx.validate();
for i in (LEAF_CAP as i32..LEAF_CAP as i32 + 10).rev() {
delete_and_validate(&mut idx, &i, &(i as u64));
}
}
#[test]
fn delete_triggers_leaf_merge() {
let mut idx = BPlusIndex::<i32, u64>::new();
for i in 0..LEAF_CAP as i32 + 2 {
idx.insert(i, i as u64);
}
idx.validate();
assert_eq!(idx.height, 1);
for i in (0..LEAF_CAP as i32 + 2).rev() {
delete_and_validate(&mut idx, &i, &(i as u64));
if idx.len() <= MIN_LEAF {
break;
}
}
assert_eq!(idx.height, 0, "tree should have shrunk back to single leaf");
}
#[test]
fn delete_all_forward() {
let mut idx = BPlusIndex::<i32, u64>::new();
let n = 500;
for i in 0..n {
idx.insert(i, i as u64);
}
for i in 0..n {
assert!(delete_and_validate(&mut idx, &i, &(i as u64)));
}
assert_eq!(idx.len(), 0);
}
#[test]
fn delete_all_reverse() {
let mut idx = BPlusIndex::<i32, u64>::new();
let n = 500;
for i in 0..n {
idx.insert(i, i as u64);
}
for i in (0..n).rev() {
assert!(delete_and_validate(&mut idx, &i, &(i as u64)));
}
assert_eq!(idx.len(), 0);
}
#[test]
fn delete_all_from_middle_out() {
let mut idx = BPlusIndex::<i32, u64>::new();
let n = 500i32;
for i in 0..n {
idx.insert(i, i as u64);
}
let mid = n / 2;
for d in 0..mid {
delete_and_validate(&mut idx, &(mid + d), &((mid + d) as u64));
if mid - d > 0 {
delete_and_validate(&mut idx, &(mid - d - 1), &((mid - d - 1) as u64));
}
}
assert_eq!(idx.len(), 0);
}
#[test]
fn delete_cascade_inner_rebalance() {
let mut idx = BPlusIndex::<i32, u64>::new();
let n = 5000;
for i in 0..n {
idx.insert(i, i as u64);
}
idx.validate();
let initial_height = idx.height;
assert!(initial_height >= 2);
for i in 0..(n * 9 / 10) {
assert!(delete_and_validate(&mut idx, &i, &(i as u64)));
}
assert!(idx.height < initial_height);
}
#[test]
fn lookup_unique_all() {
let mut idx = BPlusIndex::<i32, u64>::new();
for i in 0..200 {
idx.insert(i, i as u64 * 10);
}
for i in 0..200 {
assert_eq!(idx.lookup_unique(&i), Some(&(i as u64 * 10)));
}
assert_eq!(idx.lookup_unique(&999), None);
assert_eq!(idx.lookup_unique(&-1), None);
}
#[test]
fn lookup_unique_boundary_between_leaves() {
let mut idx = BPlusIndex::<i32, u64>::new();
for i in 0..200 {
idx.insert(i, i as u64);
}
for i in 0..200 {
assert_eq!(
idx.lookup_unique(&i),
Some(&(i as u64)),
"lookup_unique failed for key {}",
i
);
}
}
#[test]
fn lookup_range_empty_result() {
let mut idx = BPlusIndex::<i32, u64>::new();
idx.insert(5, 100);
idx.insert(10, 200);
assert_eq!(idx.lookup_range(&7).count(), 0);
}
#[test]
fn lookup_range_key_not_in_tree() {
let idx = BPlusIndex::<i32, u64>::new();
assert_eq!(idx.lookup_range(&42).count(), 0);
}
#[test]
fn range_exclusive_both_ends() {
let mut idx = BPlusIndex::<i32, u64>::new();
for i in 0..100 {
idx.insert(i, i as u64);
}
let vals: Vec<_> = idx.range(10..20).map(|(k, _)| *k).collect();
assert_eq!(vals, (10..20).collect::<Vec<_>>());
}
#[test]
fn range_inclusive_both_ends() {
let mut idx = BPlusIndex::<i32, u64>::new();
for i in 0..100 {
idx.insert(i, i as u64);
}
let vals: Vec<_> = idx.range(10..=20).map(|(k, _)| *k).collect();
assert_eq!(vals, (10..=20).collect::<Vec<_>>());
}
#[test]
fn range_unbounded_start() {
let mut idx = BPlusIndex::<i32, u64>::new();
for i in 0..50 {
idx.insert(i, i as u64);
}
let vals: Vec<_> = idx.range(..10).map(|(k, _)| *k).collect();
assert_eq!(vals, (0..10).collect::<Vec<_>>());
}
#[test]
fn range_unbounded_end() {
let mut idx = BPlusIndex::<i32, u64>::new();
for i in 0..50 {
idx.insert(i, i as u64);
}
let vals: Vec<_> = idx.range(40..).map(|(k, _)| *k).collect();
assert_eq!(vals, (40..50).collect::<Vec<_>>());
}
#[test]
fn range_fully_unbounded() {
let mut idx = BPlusIndex::<i32, u64>::new();
for i in 0..50 {
idx.insert(i, i as u64);
}
let vals: Vec<_> = idx.range(..).map(|(k, _)| *k).collect();
assert_eq!(vals, (0..50).collect::<Vec<_>>());
}
#[test]
fn range_empty_result() {
let mut idx = BPlusIndex::<i32, u64>::new();
for i in 0..50 {
idx.insert(i, i as u64);
}
assert_eq!(idx.range(100..200).count(), 0);
assert_eq!(idx.range(-10..-5).count(), 0);
}
#[test]
fn range_on_empty_tree() {
let idx = BPlusIndex::<i32, u64>::new();
assert_eq!(idx.range(0..100).count(), 0);
assert_eq!(idx.range(..).count(), 0);
}
#[test]
fn range_spanning_multiple_leaves() {
let mut idx = BPlusIndex::<i32, u64>::new();
for i in 0..500 {
idx.insert(i, i as u64);
}
let vals: Vec<_> = idx.range(100..400).map(|(k, _)| *k).collect();
assert_eq!(vals, (100..400).collect::<Vec<_>>());
}
#[test]
fn range_with_non_unique_keys() {
let mut idx = BPlusIndex::<i32, u64>::new();
for k in 0..10 {
for v in 0..5u64 {
idx.insert(k, v);
}
}
let entries: Vec<_> = idx.range(3..7).collect();
assert_eq!(entries.len(), 4 * 5);
for (k, _) in &entries {
assert!(**k >= 3 && **k < 7);
}
}
#[test]
fn iter_empty() {
let idx = BPlusIndex::<i32, u64>::new();
assert_eq!(idx.iter().count(), 0);
}
#[test]
fn iter_sorted_after_reverse_insert() {
let mut idx = BPlusIndex::<i32, u64>::new();
for i in (0..300).rev() {
idx.insert(i, i as u64);
}
idx.validate();
let keys: Vec<_> = idx.iter().map(|(k, _)| *k).collect();
assert_eq!(keys, (0..300).collect::<Vec<_>>());
}
#[test]
fn iter_sorted_after_random_order_insert() {
let mut idx = BPlusIndex::<i32, u64>::new();
let mut keys: Vec<i32> = (0..500).collect();
for i in (1..keys.len()).rev() {
let j = (i * 2654435761) % (i + 1);
keys.swap(i, j);
}
for k in &keys {
idx.insert(*k, *k as u64);
}
idx.validate();
let result: Vec<_> = idx.iter().map(|(k, _)| *k).collect();
assert_eq!(result, (0..500).collect::<Vec<_>>());
}
#[test]
fn update_moves_entry() {
let mut idx = BPlusIndex::<i32, u64>::new();
idx.insert(5, 42);
idx.update(&5, 10, 42);
assert_eq!(idx.lookup_unique(&5), None);
assert_eq!(idx.lookup_unique(&10), Some(&42));
idx.validate();
}
#[test]
fn update_nonexistent_key_still_inserts() {
let mut idx = BPlusIndex::<i32, u64>::new();
idx.insert(5, 42);
idx.update(&999, 10, 42); assert_eq!(idx.lookup_unique(&5), Some(&42));
assert_eq!(idx.lookup_unique(&10), Some(&42));
assert_eq!(idx.len(), 2);
idx.validate();
}
#[test]
fn node_recycling_after_delete_insert() {
let mut idx = BPlusIndex::<i32, u64>::new();
for i in 0..500 {
idx.insert(i, i as u64);
}
let leaves_after_insert = idx.leaves.len();
for i in 0..500 {
idx.delete(&i, &(i as u64));
}
idx.validate();
let free_after_delete = idx.free_leaves.len();
assert!(free_after_delete > 0, "expected freed leaves");
for i in 0..500 {
idx.insert(i, i as u64);
}
idx.validate();
assert_eq!(
idx.leaves.len(),
leaves_after_insert,
"arena grew instead of recycling"
);
}
#[test]
fn stress_insert_delete_interleaved() {
let mut idx = BPlusIndex::<i32, u64>::new();
for i in 0..1000 {
idx.insert(i, i as u64);
}
idx.validate();
for i in (0..1000).step_by(2) {
assert!(idx.delete(&i, &(i as u64)));
}
idx.validate();
assert_eq!(idx.len(), 500);
for i in (1..1000).step_by(2) {
assert_eq!(idx.lookup_unique(&i), Some(&(i as u64)));
}
for i in (0..1000).step_by(2) {
idx.insert(i, i as u64);
}
idx.validate();
assert_eq!(idx.len(), 1000);
let keys: Vec<_> = idx.iter().map(|(k, _)| *k).collect();
assert_eq!(keys, (0..1000).collect::<Vec<_>>());
}
#[test]
fn stress_insert_delete_reinsert_cycles() {
let mut idx = BPlusIndex::<i32, u64>::new();
for cycle in 0..5 {
let base = cycle * 100;
for i in base..base + 100 {
insert_and_validate(&mut idx, i, i as u64);
}
for i in base..base + 50 {
assert!(delete_and_validate(&mut idx, &i, &(i as u64)));
}
}
assert_eq!(idx.len(), 250);
}
#[test]
fn stress_large_tree() {
let mut idx = BPlusIndex::<i32, u64>::new();
let n = 10_000i32;
for i in 0..n {
idx.insert(i, i as u64);
}
idx.validate();
assert_eq!(idx.len(), n as usize);
for i in (0..n).step_by(3) {
assert!(idx.delete(&i, &(i as u64)));
}
idx.validate();
for i in 0..n {
if i % 3 == 0 {
assert_eq!(idx.lookup_unique(&i), None);
} else {
assert_eq!(idx.lookup_unique(&i), Some(&(i as u64)));
}
}
}
#[test]
fn composite_key_range() {
let mut idx = BPlusIndex::<(u8, i32), u64>::new();
for i in 0..100 {
idx.insert((1, 1000 + i), i as u64);
}
for i in 0..50 {
idx.insert((2, 1000 + i), 100 + i as u64);
}
idx.validate();
let tickets: Vec<_> = idx.range((1, 1020)..(1, 1030)).map(|(_, v)| *v).collect();
assert_eq!(tickets, (20..30u64).collect::<Vec<_>>());
let mode2: Vec<_> = idx.range((2, 0)..(3, 0)).map(|(_, v)| *v).collect();
assert_eq!(mode2.len(), 50);
}
#[test]
fn all_same_key() {
let mut idx = BPlusIndex::<i32, u64>::new();
for v in 0..500u64 {
idx.insert(0, v);
}
idx.validate();
assert_eq!(idx.len(), 500);
let vals: Vec<_> = idx.lookup_range(&0).copied().collect();
assert_eq!(vals, (0..500u64).collect::<Vec<_>>());
for v in 0..500u64 {
assert!(delete_and_validate(&mut idx, &0, &v));
}
assert_eq!(idx.len(), 0);
}
}