cobt/
cobt.rs

1/// Type alias for the result of a node split operation.
2/// Contains the promoted key, value, and the new right sibling node.
3type SplitResult<K, V> = Option<(K, V, Box<Node<K, V>>)>;
4
5/// A single node within a B-Tree structure.
6///
7/// Each node stores a collection of keys and their corresponding values,
8/// along with optional child nodes. The `is_leaf` flag indicates whether
9/// this node has any children.
10///
11/// # Type Parameters
12/// - `K`: The type of the keys stored in the node. Must be comparable.
13/// - `V`: The type of the values associated with each key.
14#[derive(Debug, PartialEq)]
15struct Node<K, V> {
16    /// Keys stored in sorted order for efficient searching.
17    keys: Vec<K>,
18    /// Values corresponding to each key.
19    values: Vec<V>,
20    /// Child nodes. Empty if this node is a leaf.
21    children: Vec<Node<K, V>>,
22    /// Indicates whether this node is a leaf (has no children).
23    is_leaf: bool,
24}
25
26impl<K: Ord + Clone, V: Clone> Node<K, V> {
27    /// Creates a new, empty B-Tree node.
28    ///
29    /// # Parameters
30    /// - `is_leaf`: A boolean indicating whether this node is a leaf.
31    ///
32    /// # Returns
33    /// A [`Node`] instance with no keys, values, or children.
34    ///
35    /// # Examples
36    /// ```ignore
37    /// let leaf: Node<i32, String> = Node::new(true);
38    /// assert!(leaf.is_leaf);
39    /// assert!(leaf.keys.is_empty());
40    /// ```
41    fn new(is_leaf: bool) -> Self {
42        Node {
43            keys: Vec::new(),
44            values: Vec::new(),
45            children: Vec::new(),
46            is_leaf,
47        }
48    }
49
50    /// Searches for a key within a node (and its descendants, if any)
51    ///
52    /// This method uses binary search to efficiently locate the given `key`
53    /// among the node's sorted keys.  If the key is not found and the node is
54    /// not a leaf, the search continues recursively in the appropriate child node.
55    ///
56    /// # Parameters
57    /// - `key`: A reference to the key being searched for.
58    ///
59    /// # Returns
60    /// - `Some(&V)` if the key exists in this node or one if its descendants.
61    /// - `None` if the key is not found anywhere below this node.
62    ///
63    /// # Examples
64    /// ```ignore
65    /// let mut node = Node::<i32, &str>::new(true);
66    /// node.keys.push(5);
67    /// node.values.push("five");
68    ///
69    /// assert_eq!(node.search(&5), Some(&"five"));
70    /// assert_eq!(node.search(&3), None);
71    /// ```
72    fn search(&self, key: &K) -> Option<&V> {
73        match self.keys.binary_search(key) {
74            Ok(i) => Some(&self.values[i]),
75            Err(i) => {
76                if self.is_leaf {
77                    None
78                } else {
79                    self.children[i].search(key)
80                }
81            }
82        }
83    }
84
85    /// Inserts a key-value pair into the B-Tree node.
86    ///
87    /// The insertion process depends on wether this node is a leaf:
88    /// - If the node **is a leaf**, the key and value are inserted directly into
89    ///   their respective vectors at the appropriate sorted position.
90    /// - If the node **is internal**, the insertion is delegated to the appropriate
91    ///   child node, and a split may occur if the child overflows.
92    ///
93    ///  After the insertion, if the number of keys exceeds the node's capacity threshold,
94    ///  the node is split. The threshold is determined dynamically as
95    ///  `ceil(sqrt(keys.len())) * 2`
96    ///
97    /// # Parameters
98    /// - `key`: The key to insert
99    /// - `value`: The value associated with the key.
100    ///
101    /// # Returns
102    /// - `Some((split_key, split_value, split_node))` if the node was split.
103    /// - `None` if no split occurred.
104    ///
105    /// # Panics
106    /// May panic if internal vector insertions exceed capacity limits (unlikely
107    /// under normal use).
108    ///
109    /// # Examples
110    /// ```ignore
111    /// let mut node = Node::<i32, &str>::new(true);
112    /// assert!(node.insert(10, "ten").is_none());
113    /// assert_eq!(node.keys, vec![10]);
114    ///
115    /// // Example split behavior (simplified)
116    /// for i in 0..10 {
117    ///     node.insert(i, "val");
118    /// }
119    /// ```
120    fn insert(&mut self, key: K, value: V) -> SplitResult<K, V> {
121        let pos = self.keys.binary_search(&key).unwrap_or_else(|i| i);
122
123        if self.is_leaf {
124            self.keys.insert(pos, key);
125            self.values.insert(pos, value);
126        } else if let Some((split_key, split_val, split_child)) =
127            self.children[pos].insert(key, value)
128        {
129            self.keys.insert(pos, split_key);
130            self.values.insert(pos, split_val);
131            self.children.insert(pos + 1, *split_child);
132        }
133
134        let threshold = (self.keys.len() as f64).sqrt().ceil() as usize;
135        if self.keys.len() > threshold * 2 {
136            self.split()
137        } else {
138            None
139        }
140    }
141
142    /// Splits the current node into two nodes when it exceeds its capacity.
143    ///
144    /// The node is divided around the median key:
145    /// - Keys and values **after** the median are moved into a newly created sibling node.
146    /// - The median key and its corresponding value are promoted to the parent node
147    ///   to serve as a separator between the two halves.
148    ///
149    /// If the node is internal (non-leaf), its child pointers are split in the same manner.
150    ///
151    /// # Returns
152    /// - `Some((split_key, split_value, new_node))` where:
153    ///   - `split_key` and `split_value` are promoted to the parent node.
154    ///   - `new_node` is the newly created right-hand node containing the upper half of the keys.
155    ///
156    /// # Panics
157    /// Panics if called on an empty node (since there is no median key to promote).
158    ///
159    /// # Examples
160    /// ```ignore
161    /// let mut node = Node::<i32, &str>::new(true);
162    /// for i in 0..6 {
163    ///     node.keys.push(i);
164    ///     node.values.push("val");
165    /// }
166    ///
167    /// let result = node.split();
168    /// assert!(result.is_some());
169    /// ```
170    fn split(&mut self) -> SplitResult<K, V> {
171        let mid = self.keys.len() / 2;
172
173        let mut new_node = Node::new(self.is_leaf);
174        new_node.keys = self.keys.split_off(mid + 1);
175        new_node.values = self.values.split_off(mid + 1);
176
177        if !self.is_leaf {
178            new_node.children = self.children.split_off(mid + 1);
179        }
180
181        let split_key = self.keys.pop().unwrap();
182        let split_value = self.values.pop().unwrap();
183
184        Some((split_key, split_value, Box::new(new_node)))
185    }
186}
187
188/// A cache-oblivious B-Tree for efficient ordered storage and retrieval.
189///
190/// This data structure is designed to maintain key–value pairs in sorted order
191/// while minimizing cache misses across different memory hierarchies. It adapts
192/// automatically to cache line sizes without requiring explicit block tuning,
193/// making it suitable for systems where memory access patterns are critical.
194///
195/// Internally, the tree is composed of recursively nested [`Node`] structures,
196/// each containing sorted keys, associated values, and (for internal nodes)
197/// references to child nodes. The tree automatically splits nodes when they
198/// exceed their capacity.
199///
200/// # Type Parameters
201/// - `K`: The type of the keys stored in the tree. Must implement [`Ord`] and [`Clone`].
202/// - `V`: The type of the values stored in the tree. Must implement [`Clone`].
203///
204/// # Fields
205/// - `root`: The root node of the tree.
206/// - `size`: The total number of key–value pairs stored in the tree.
207///
208/// # Examples
209/// ```
210/// use cobt::CacheObliviousBTree;
211///
212/// let mut tree = CacheObliviousBTree::<i32, &str>::new();
213/// tree.insert(42, "answer");
214/// assert_eq!(tree.search(&42), Some(&"answer"));
215/// assert_eq!(tree.len(), 1);
216/// ```
217///
218/// # References
219/// - [Frigo et al., *Cache-Oblivious B-Trees*, SIAM Journal on Computing, 2000]
220#[derive(Debug, PartialEq)]
221pub struct CacheObliviousBTree<K, V> {
222    /// The root node of the tree.
223    root: Box<Node<K, V>>,
224    /// The total number of key–value pairs in the tree.
225    size: usize,
226}
227
228impl<K: Ord + Clone, V: Clone> CacheObliviousBTree<K, V> {
229    /// Creates an empty [`CacheObliviousBTree`].
230    ///
231    /// Initializes the tree with a single empty root node marked as a leaf.
232    /// The resulting tree contains no keys or values and has a size of zero.
233    ///
234    /// # Returns
235    /// A new, empty [`CacheObliviousBTree`].
236    ///
237    /// # Examples
238    /// ```
239    /// use cobt::CacheObliviousBTree;
240    ///
241    /// let tree: CacheObliviousBTree<i32, &str> = CacheObliviousBTree::new();
242    /// assert_eq!(tree.len(), 0);
243    /// ```
244    pub fn new() -> Self {
245        CacheObliviousBTree {
246            root: Box::new(Node::new(true)),
247            size: 0,
248        }
249    }
250
251    /// Inserts a key–value pair into the tree.
252    ///
253    /// If inserting the new pair causes the root node to split, a new root is
254    /// created, and the promoted key becomes the separator between the two child nodes.
255    ///
256    /// # Parameters
257    /// - `key`: The key to insert into the tree.
258    /// - `value`: The value associated with the key.
259    ///
260    /// # Behavior
261    /// - If the key already exists, this implementation **does not** replace the
262    ///   existing value (keys are treated as unique).
263    /// - The size of the tree increases by one after each insertion.
264    ///
265    /// # Examples
266    /// ```
267    /// use cobt::CacheObliviousBTree;
268    ///
269    /// let mut tree = CacheObliviousBTree::<i32, &str>::new();
270    /// tree.insert(42, "answer");
271    /// assert_eq!(tree.len(), 1);
272    /// ```
273    pub fn insert(&mut self, key: K, value: V) {
274        if let Some((split_key, split_val, split_child)) = self.root.insert(key, value) {
275            let new_root = Node::new(false);
276            let old_root = std::mem::replace(&mut self.root, Box::new(new_root));
277
278            self.root.keys.push(split_key);
279            self.root.values.push(split_val);
280            self.root.children.push(*old_root);
281            self.root.children.push(*split_child);
282        }
283        self.size += 1;
284    }
285
286    /// Searches for a key in the tree and returns a reference to its value if found.
287    ///
288    /// This operation performs a recursive binary search through the tree,
289    /// starting at the root and descending into the appropriate child nodes.
290    ///
291    /// # Parameters
292    /// - `key`: A reference to the key to search for.
293    ///
294    /// # Returns
295    /// - `Some(&V)` if the key exists.
296    /// - `None` if the key is not found in the tree.
297    ///
298    /// # Examples
299    /// ```
300    /// use cobt::CacheObliviousBTree;
301    ///
302    /// let mut tree = CacheObliviousBTree::<i32, &str>::new();
303    /// tree.insert(1, "one");
304    /// assert_eq!(tree.search(&1), Some(&"one"));
305    /// assert_eq!(tree.search(&2), None);
306    /// ```
307    pub fn search(&self, key: &K) -> Option<&V> {
308        self.root.search(key)
309    }
310
311    /// Returns the total number of key–value pairs stored in the tree.
312    ///
313    /// # Examples
314    /// ```
315    /// use cobt::CacheObliviousBTree;
316    ///
317    /// let mut tree = CacheObliviousBTree::<i32, &str>::new();
318    /// tree.insert(10, "ten");
319    /// assert_eq!(tree.len(), 1);
320    /// ```
321    pub fn len(&self) -> usize {
322        self.size
323    }
324
325    /// Returns `true` if the tree contains no key–value pairs.
326    ///
327    /// # Examples
328    /// ```
329    /// use cobt::CacheObliviousBTree;
330    ///
331    /// let mut tree = CacheObliviousBTree::<i32, &str>::new();
332    /// assert!(tree.is_empty());
333    ///
334    /// tree.insert(1, "one");
335    /// assert!(!tree.is_empty());
336    /// ```
337    pub fn is_empty(&self) -> bool {
338        self.size == 0
339    }
340}
341
342impl<K: Ord + Clone, V: Clone> Default for CacheObliviousBTree<K, V> {
343    fn default() -> Self {
344        Self::new()
345    }
346}