1#[allow(unused_imports)]
17use rayon::prelude::*;
18
19pub const LEAF_MAX_KEYS: usize = 32;
22
23pub const BRANCH_MAX_KEYS: usize = 32;
25
26pub const MIN_KEYS: usize = LEAF_MAX_KEYS / 2;
28
29#[derive(Debug)]
34pub struct BTree {
35 root: Option<Box<Node>>,
36 len: usize,
37}
38
39#[derive(Debug)]
41enum Node {
42 Leaf(LeafNode),
44 Branch(BranchNode),
46}
47
48#[derive(Debug)]
50struct LeafNode {
51 keys: Vec<Vec<u8>>,
53 values: Vec<Vec<u8>>,
55}
56
57#[derive(Debug)]
59struct BranchNode {
60 keys: Vec<Vec<u8>>,
62 #[allow(clippy::vec_box)]
65 children: Vec<Box<Node>>,
66}
67
68impl BTree {
69 pub fn new() -> Self {
71 Self { root: None, len: 0 }
72 }
73
74 #[inline]
76 pub fn len(&self) -> usize {
77 self.len
78 }
79
80 #[inline]
82 pub fn is_empty(&self) -> bool {
83 self.len == 0
84 }
85
86 pub fn get(&self, key: &[u8]) -> Option<&[u8]> {
90 let root = self.root.as_ref()?;
91 Self::search_node(root, key)
92 }
93
94 pub fn insert(&mut self, key: Vec<u8>, value: Vec<u8>) -> Option<Vec<u8>> {
98 if self.root.is_none() {
99 self.root = Some(Box::new(Node::Leaf(LeafNode {
101 keys: vec![key],
102 values: vec![value],
103 })));
104 self.len = 1;
105 return None;
106 }
107
108 let root = self.root.take().unwrap();
109 let (new_root, old_value, split) = Self::insert_into_node(root, key, value);
110
111 self.root = Some(if let Some((median_key, right_child)) = split {
112 Box::new(Node::Branch(BranchNode {
114 keys: vec![median_key],
115 children: vec![new_root, right_child],
116 }))
117 } else {
118 new_root
119 });
120
121 if old_value.is_none() {
122 self.len += 1;
123 }
124
125 old_value
126 }
127
128 pub fn remove(&mut self, key: &[u8]) -> Option<Vec<u8>> {
132 let root = self.root.take()?;
133 let (new_root, removed_value) = Self::remove_from_node(root, key);
134
135 self.root = new_root;
136
137 if removed_value.is_some() {
138 self.len -= 1;
139 }
140
141 removed_value
142 }
143
144 pub fn insert_bulk<I>(&mut self, entries: I) -> usize
158 where
159 I: IntoIterator<Item = (Vec<u8>, Vec<u8>)>,
160 {
161 let entries: Vec<_> = entries.into_iter().collect();
162 if entries.is_empty() {
163 return 0;
164 }
165
166 let mut inserted_count = 0;
167 for (key, value) in entries {
168 if self.insert(key, value).is_none() {
169 inserted_count += 1;
170 }
171 }
172 inserted_count
173 }
174
175 pub fn insert_bulk_sorted(&mut self, entries: &[(Vec<u8>, Vec<u8>)]) -> usize {
193 if entries.is_empty() {
194 return 0;
195 }
196
197 let mut inserted_count = 0;
198 for (key, value) in entries {
199 if self.insert(key.clone(), value.clone()).is_none() {
200 inserted_count += 1;
201 }
202 }
203 inserted_count
204 }
205
206 fn search_node<'a>(node: &'a Node, key: &[u8]) -> Option<&'a [u8]> {
208 match node {
209 Node::Leaf(leaf) => {
210 match leaf.keys.binary_search_by(|k| k.as_slice().cmp(key)) {
212 Ok(idx) => Some(&leaf.values[idx]),
213 Err(_) => None,
214 }
215 }
216 Node::Branch(branch) => {
217 let child_idx = Self::find_child_index(&branch.keys, key);
219 Self::search_node(&branch.children[child_idx], key)
220 }
221 }
222 }
223
224 fn find_child_index(keys: &[Vec<u8>], key: &[u8]) -> usize {
226 match keys.binary_search_by(|k| k.as_slice().cmp(key)) {
227 Ok(idx) => idx + 1, Err(idx) => idx, }
230 }
231
232 #[allow(clippy::type_complexity)]
237 fn insert_into_node(
238 mut node: Box<Node>,
239 key: Vec<u8>,
240 value: Vec<u8>,
241 ) -> (Box<Node>, Option<Vec<u8>>, Option<(Vec<u8>, Box<Node>)>) {
242 match node.as_mut() {
243 Node::Leaf(leaf) => {
244 match leaf.keys.binary_search_by(|k| k.as_slice().cmp(&key)) {
246 Ok(idx) => {
247 let old_value = std::mem::replace(&mut leaf.values[idx], value);
249 (node, Some(old_value), None)
250 }
251 Err(idx) => {
252 leaf.keys.insert(idx, key);
254 leaf.values.insert(idx, value);
255
256 if leaf.keys.len() > LEAF_MAX_KEYS {
257 let split = Self::split_leaf(leaf);
259 (node, None, Some(split))
260 } else {
261 (node, None, None)
262 }
263 }
264 }
265 }
266 Node::Branch(branch) => {
267 let child_idx = Self::find_child_index(&branch.keys, &key);
269 let child = branch.children.remove(child_idx);
270
271 let (updated_child, old_value, child_split) =
272 Self::insert_into_node(child, key, value);
273
274 branch.children.insert(child_idx, updated_child);
275
276 if let Some((median_key, right_child)) = child_split {
277 branch.keys.insert(child_idx, median_key);
279 branch.children.insert(child_idx + 1, right_child);
280
281 if branch.keys.len() > BRANCH_MAX_KEYS {
282 let split = Self::split_branch(branch);
284 (node, old_value, Some(split))
285 } else {
286 (node, old_value, None)
287 }
288 } else {
289 (node, old_value, None)
290 }
291 }
292 }
293 }
294
295 fn split_leaf(leaf: &mut LeafNode) -> (Vec<u8>, Box<Node>) {
297 let mid = leaf.keys.len() / 2;
298
299 let right_keys = leaf.keys.split_off(mid);
300 let right_values = leaf.values.split_off(mid);
301
302 let median_key = right_keys[0].clone();
304
305 let right_node = Box::new(Node::Leaf(LeafNode {
306 keys: right_keys,
307 values: right_values,
308 }));
309
310 (median_key, right_node)
311 }
312
313 fn split_branch(branch: &mut BranchNode) -> (Vec<u8>, Box<Node>) {
315 let mid = branch.keys.len() / 2;
316
317 let median_key = branch.keys.remove(mid);
319 let right_keys = branch.keys.split_off(mid);
320 let right_children = branch.children.split_off(mid + 1);
321
322 let right_node = Box::new(Node::Branch(BranchNode {
323 keys: right_keys,
324 children: right_children,
325 }));
326
327 (median_key, right_node)
328 }
329
330 fn remove_from_node(mut node: Box<Node>, key: &[u8]) -> (Option<Box<Node>>, Option<Vec<u8>>) {
334 match node.as_mut() {
335 Node::Leaf(leaf) => match leaf.keys.binary_search_by(|k| k.as_slice().cmp(key)) {
336 Ok(idx) => {
337 leaf.keys.remove(idx);
338 let value = leaf.values.remove(idx);
339
340 if leaf.keys.is_empty() {
341 (None, Some(value))
342 } else {
343 (Some(node), Some(value))
344 }
345 }
346 Err(_) => (Some(node), None),
347 },
348 Node::Branch(branch) => {
349 let child_idx = Self::find_child_index(&branch.keys, key);
350 let child = branch.children.remove(child_idx);
351
352 let (updated_child, removed_value) = Self::remove_from_node(child, key);
353
354 match updated_child {
355 Some(child) => {
356 branch.children.insert(child_idx, child);
357
358 if Self::node_key_count(&branch.children[child_idx]) < MIN_KEYS {
360 Self::rebalance_child(branch, child_idx);
361 }
362 }
363 None => {
364 if child_idx > 0 {
366 branch.keys.remove(child_idx - 1);
367 } else if !branch.keys.is_empty() {
368 branch.keys.remove(0);
369 }
370 }
371 }
372
373 if branch.keys.is_empty() && branch.children.len() == 1 {
375 let only_child = branch.children.pop().unwrap();
376 (Some(only_child), removed_value)
377 } else if branch.children.is_empty() {
378 (None, removed_value)
379 } else {
380 (Some(node), removed_value)
381 }
382 }
383 }
384 }
385
386 fn node_key_count(node: &Node) -> usize {
388 match node {
389 Node::Leaf(leaf) => leaf.keys.len(),
390 Node::Branch(branch) => branch.keys.len(),
391 }
392 }
393
394 fn rebalance_child(branch: &mut BranchNode, child_idx: usize) {
396 if child_idx > 0 {
398 let left_count = Self::node_key_count(&branch.children[child_idx - 1]);
399 if left_count > MIN_KEYS {
400 Self::borrow_from_left(branch, child_idx);
401 return;
402 }
403 }
404
405 if child_idx < branch.children.len() - 1 {
407 let right_count = Self::node_key_count(&branch.children[child_idx + 1]);
408 if right_count > MIN_KEYS {
409 Self::borrow_from_right(branch, child_idx);
410 return;
411 }
412 }
413
414 if child_idx > 0 {
416 Self::merge_with_left(branch, child_idx);
417 } else if child_idx < branch.children.len() - 1 {
418 Self::merge_with_right(branch, child_idx);
419 }
420 }
421
422 fn borrow_from_left(branch: &mut BranchNode, child_idx: usize) {
424 let separator_idx = child_idx - 1;
425
426 let (left_slice, right_slice) = branch.children.split_at_mut(child_idx);
428 let left = left_slice.last_mut().unwrap();
429 let right = right_slice.first_mut().unwrap();
430
431 match (left.as_mut(), right.as_mut()) {
432 (Node::Leaf(left), Node::Leaf(right)) => {
433 let key = left.keys.pop().unwrap();
434 let value = left.values.pop().unwrap();
435 right.keys.insert(0, key.clone());
436 right.values.insert(0, value);
437 branch.keys[separator_idx] = key;
438 }
439 (Node::Branch(left), Node::Branch(right)) => {
440 let key = left.keys.pop().unwrap();
441 let child = left.children.pop().unwrap();
442 let separator = std::mem::replace(&mut branch.keys[separator_idx], key);
443 right.keys.insert(0, separator);
444 right.children.insert(0, child);
445 }
446 _ => unreachable!("mismatched node types"),
447 }
448 }
449
450 fn borrow_from_right(branch: &mut BranchNode, child_idx: usize) {
452 let separator_idx = child_idx;
453
454 let (left_slice, right_slice) = branch.children.split_at_mut(child_idx + 1);
456 let left = left_slice.last_mut().unwrap();
457 let right = right_slice.first_mut().unwrap();
458
459 match (left.as_mut(), right.as_mut()) {
460 (Node::Leaf(left), Node::Leaf(right)) => {
461 let key = right.keys.remove(0);
462 let value = right.values.remove(0);
463 left.keys.push(key);
464 left.values.push(value);
465 branch.keys[separator_idx] = right.keys[0].clone();
466 }
467 (Node::Branch(left), Node::Branch(right)) => {
468 let key = right.keys.remove(0);
469 let child = right.children.remove(0);
470 let separator = std::mem::replace(&mut branch.keys[separator_idx], key);
471 left.keys.push(separator);
472 left.children.push(child);
473 }
474 _ => unreachable!("mismatched node types"),
475 }
476 }
477
478 fn merge_with_left(branch: &mut BranchNode, child_idx: usize) {
480 let separator_idx = child_idx - 1;
481 let separator = branch.keys.remove(separator_idx);
482 let right = branch.children.remove(child_idx);
483
484 match (branch.children[child_idx - 1].as_mut(), *right) {
485 (Node::Leaf(left), Node::Leaf(right)) => {
486 left.keys.extend(right.keys);
487 left.values.extend(right.values);
488 }
489 (Node::Branch(left), Node::Branch(right)) => {
490 left.keys.push(separator);
491 left.keys.extend(right.keys);
492 left.children.extend(right.children);
493 }
494 _ => unreachable!("mismatched node types"),
495 }
496 }
497
498 fn merge_with_right(branch: &mut BranchNode, child_idx: usize) {
500 let separator = branch.keys.remove(child_idx);
501 let right = branch.children.remove(child_idx + 1);
502
503 match (branch.children[child_idx].as_mut(), *right) {
504 (Node::Leaf(left), Node::Leaf(right)) => {
505 left.keys.extend(right.keys);
506 left.values.extend(right.values);
507 }
508 (Node::Branch(left), Node::Branch(right)) => {
509 left.keys.push(separator);
510 left.keys.extend(right.keys);
511 left.children.extend(right.children);
512 }
513 _ => unreachable!("mismatched node types"),
514 }
515 }
516
517 pub fn iter(&self) -> BTreeIter<'_> {
519 BTreeIter::new(self.root.as_deref())
520 }
521
522 pub fn range<'a>(&'a self, start: Bound<'a>, end: Bound<'a>) -> BTreeRangeIter<'a> {
526 BTreeRangeIter::new(self, start, end)
527 }
528}
529
530impl Default for BTree {
531 fn default() -> Self {
532 Self::new()
533 }
534}
535
536pub struct BTreeIter<'a> {
538 stack: Vec<(&'a Node, usize)>,
540 current_leaf: Option<(&'a LeafNode, usize)>,
542}
543
544impl<'a> BTreeIter<'a> {
545 fn new(root: Option<&'a Node>) -> Self {
546 let mut iter = Self {
547 stack: Vec::new(),
548 current_leaf: None,
549 };
550
551 if let Some(node) = root {
552 iter.descend_to_leftmost(node);
553 }
554
555 iter
556 }
557
558 fn descend_to_leftmost(&mut self, mut node: &'a Node) {
560 loop {
561 match node {
562 Node::Leaf(leaf) => {
563 if !leaf.keys.is_empty() {
564 self.current_leaf = Some((leaf, 0));
565 }
566 break;
567 }
568 Node::Branch(branch) => {
569 self.stack.push((node, 0));
570 node = &branch.children[0];
571 }
572 }
573 }
574 }
575
576 fn advance_to_next_leaf(&mut self) {
578 self.current_leaf = None;
579
580 while let Some((node, mut idx)) = self.stack.pop() {
581 if let Node::Branch(branch) = node {
582 idx += 1;
583 if idx < branch.children.len() {
584 self.stack.push((node, idx));
585 self.descend_to_leftmost(&branch.children[idx]);
586 return;
587 }
588 }
589 }
590 }
591}
592
593impl<'a> Iterator for BTreeIter<'a> {
594 type Item = (&'a [u8], &'a [u8]);
595
596 fn next(&mut self) -> Option<Self::Item> {
597 let (leaf, idx) = self.current_leaf.as_mut()?;
598
599 let key = &leaf.keys[*idx];
600 let value = &leaf.values[*idx];
601
602 *idx += 1;
603 if *idx >= leaf.keys.len() {
604 self.advance_to_next_leaf();
605 }
606
607 Some((key.as_slice(), value.as_slice()))
608 }
609}
610
611#[derive(Debug, Clone)]
613pub enum Bound<'a> {
614 Unbounded,
616 Included(&'a [u8]),
618 Excluded(&'a [u8]),
620}
621
622pub struct BTreeRangeIter<'a> {
624 inner: BTreeIter<'a>,
626 start_bound: Bound<'a>,
628 end_bound: Bound<'a>,
630 started: bool,
632 finished: bool,
634}
635
636impl<'a> BTreeRangeIter<'a> {
637 pub fn new(tree: &'a BTree, start: Bound<'a>, end: Bound<'a>) -> Self {
639 Self {
640 inner: tree.iter(),
641 start_bound: start,
642 end_bound: end,
643 started: false,
644 finished: false,
645 }
646 }
647
648 #[inline]
650 fn is_at_or_past_start(&self, key: &[u8]) -> bool {
651 match &self.start_bound {
652 Bound::Unbounded => true,
653 Bound::Included(start) => key >= *start,
654 Bound::Excluded(start) => key > *start,
655 }
656 }
657
658 #[inline]
660 fn is_past_end(&self, key: &[u8]) -> bool {
661 match &self.end_bound {
662 Bound::Unbounded => false,
663 Bound::Included(end) => key > *end,
664 Bound::Excluded(end) => key >= *end,
665 }
666 }
667}
668
669impl<'a> Iterator for BTreeRangeIter<'a> {
670 type Item = (&'a [u8], &'a [u8]);
671
672 fn next(&mut self) -> Option<Self::Item> {
673 if self.finished {
674 return None;
675 }
676
677 loop {
678 let (key, value) = self.inner.next()?;
679
680 if !self.started {
682 if self.is_at_or_past_start(key) {
683 self.started = true;
684 } else {
685 continue;
686 }
687 }
688
689 if self.is_past_end(key) {
691 self.finished = true;
692 return None;
693 }
694
695 return Some((key, value));
696 }
697 }
698}
699
700#[cfg(test)]
701mod tests {
702 use super::*;
703
704 #[test]
705 fn test_btree_basic_crud() {
706 let mut tree = BTree::new();
707 assert!(tree.is_empty());
708
709 tree.insert(b"key".to_vec(), b"value".to_vec());
711 assert_eq!(tree.len(), 1);
712 assert_eq!(tree.get(b"key"), Some(b"value".as_slice()));
713
714 let old = tree.insert(b"key".to_vec(), b"new_value".to_vec());
716 assert_eq!(old, Some(b"value".to_vec()));
717 assert_eq!(tree.get(b"key"), Some(b"new_value".as_slice()));
718
719 let removed = tree.remove(b"key");
721 assert_eq!(removed, Some(b"new_value".to_vec()));
722 assert!(tree.is_empty());
723 assert!(tree.get(b"key").is_none());
724 }
725
726 #[test]
727 fn test_btree_ordering_and_iteration() {
728 let mut tree = BTree::new();
729
730 tree.insert(b"zebra".to_vec(), b"z".to_vec());
731 tree.insert(b"apple".to_vec(), b"a".to_vec());
732 tree.insert(b"mango".to_vec(), b"m".to_vec());
733
734 let pairs: Vec<_> = tree.iter().collect();
735 assert_eq!(pairs.len(), 3);
736 assert_eq!(pairs[0].0, b"apple");
737 assert_eq!(pairs[1].0, b"mango");
738 assert_eq!(pairs[2].0, b"zebra");
739 }
740
741 #[test]
742 fn test_btree_many_insertions_causes_splits() {
743 let mut tree = BTree::new();
744
745 for i in 0..200u32 {
746 let key = format!("key_{i:05}");
747 let value = format!("value_{i}");
748 tree.insert(key.into_bytes(), value.into_bytes());
749 }
750
751 assert_eq!(tree.len(), 200);
752
753 for i in 0..200u32 {
754 let key = format!("key_{i:05}");
755 let expected = format!("value_{i}");
756 assert_eq!(tree.get(key.as_bytes()), Some(expected.as_bytes()));
757 }
758
759 let pairs: Vec<_> = tree.iter().collect();
761 for (idx, (key, _)) in pairs.iter().enumerate() {
762 let expected_key = format!("key_{idx:05}");
763 assert_eq!(*key, expected_key.as_bytes());
764 }
765 }
766
767 #[test]
768 fn test_btree_interleaved_insert_delete() {
769 let mut tree = BTree::new();
770
771 for i in 0..100u32 {
772 tree.insert(format!("key{i:03}").into_bytes(), vec![i as u8]);
773 }
774
775 for i in (0..100u32).step_by(2) {
777 tree.remove(format!("key{i:03}").as_bytes());
778 }
779
780 assert_eq!(tree.len(), 50);
781
782 for i in (1..100u32).step_by(2) {
784 assert!(tree.get(format!("key{i:03}").as_bytes()).is_some());
785 }
786 }
787}