cobt/cobt.rs
1/// Type alias for the result of a node split operation.
2/// Contains the promoted key, value, and the new right sibling node.
3type SplitResult<K, V> = Option<(K, V, Box<Node<K, V>>)>;
4
5/// A single node within a B-Tree structure.
6///
7/// Each node stores a collection of keys and their corresponding values,
8/// along with optional child nodes. The `is_leaf` flag indicates whether
9/// this node has any children.
10///
11/// # Type Parameters
12/// - `K`: The type of the keys stored in the node. Must be comparable.
13/// - `V`: The type of the values associated with each key.
14#[derive(Debug, PartialEq)]
15struct Node<K, V> {
16 /// Keys stored in sorted order for efficient searching.
17 keys: Vec<K>,
18 /// Values corresponding to each key.
19 values: Vec<V>,
20 /// Child nodes. Empty if this node is a leaf.
21 children: Vec<Node<K, V>>,
22 /// Indicates whether this node is a leaf (has no children).
23 is_leaf: bool,
24}
25
26impl<K: Ord + Clone, V: Clone> Node<K, V> {
27 /// Creates a new, empty B-Tree node.
28 ///
29 /// # Parameters
30 /// - `is_leaf`: A boolean indicating whether this node is a leaf.
31 ///
32 /// # Returns
33 /// A [`Node`] instance with no keys, values, or children.
34 ///
35 /// # Examples
36 /// ```ignore
37 /// let leaf: Node<i32, String> = Node::new(true);
38 /// assert!(leaf.is_leaf);
39 /// assert!(leaf.keys.is_empty());
40 /// ```
41 fn new(is_leaf: bool) -> Self {
42 Node {
43 keys: Vec::new(),
44 values: Vec::new(),
45 children: Vec::new(),
46 is_leaf,
47 }
48 }
49
50 /// Searches for a key within a node (and its descendants, if any)
51 ///
52 /// This method uses binary search to efficiently locate the given `key`
53 /// among the node's sorted keys. If the key is not found and the node is
54 /// not a leaf, the search continues recursively in the appropriate child node.
55 ///
56 /// # Parameters
57 /// - `key`: A reference to the key being searched for.
58 ///
59 /// # Returns
60 /// - `Some(&V)` if the key exists in this node or one if its descendants.
61 /// - `None` if the key is not found anywhere below this node.
62 ///
63 /// # Examples
64 /// ```ignore
65 /// let mut node = Node::<i32, &str>::new(true);
66 /// node.keys.push(5);
67 /// node.values.push("five");
68 ///
69 /// assert_eq!(node.search(&5), Some(&"five"));
70 /// assert_eq!(node.search(&3), None);
71 /// ```
72 fn search(&self, key: &K) -> Option<&V> {
73 match self.keys.binary_search(key) {
74 Ok(i) => Some(&self.values[i]),
75 Err(i) => {
76 if self.is_leaf {
77 None
78 } else {
79 self.children[i].search(key)
80 }
81 }
82 }
83 }
84
85 /// Inserts a key-value pair into the B-Tree node.
86 ///
87 /// The insertion process depends on wether this node is a leaf:
88 /// - If the node **is a leaf**, the key and value are inserted directly into
89 /// their respective vectors at the appropriate sorted position.
90 /// - If the node **is internal**, the insertion is delegated to the appropriate
91 /// child node, and a split may occur if the child overflows.
92 ///
93 /// After the insertion, if the number of keys exceeds the node's capacity threshold,
94 /// the node is split. The threshold is determined dynamically as
95 /// `ceil(sqrt(keys.len())) * 2`
96 ///
97 /// # Parameters
98 /// - `key`: The key to insert
99 /// - `value`: The value associated with the key.
100 ///
101 /// # Returns
102 /// - `Some((split_key, split_value, split_node))` if the node was split.
103 /// - `None` if no split occurred.
104 ///
105 /// # Panics
106 /// May panic if internal vector insertions exceed capacity limits (unlikely
107 /// under normal use).
108 ///
109 /// # Examples
110 /// ```ignore
111 /// let mut node = Node::<i32, &str>::new(true);
112 /// assert!(node.insert(10, "ten").is_none());
113 /// assert_eq!(node.keys, vec![10]);
114 ///
115 /// // Example split behavior (simplified)
116 /// for i in 0..10 {
117 /// node.insert(i, "val");
118 /// }
119 /// ```
120 fn insert(&mut self, key: K, value: V) -> SplitResult<K, V> {
121 let pos = self.keys.binary_search(&key).unwrap_or_else(|i| i);
122
123 if self.is_leaf {
124 self.keys.insert(pos, key);
125 self.values.insert(pos, value);
126 } else if let Some((split_key, split_val, split_child)) =
127 self.children[pos].insert(key, value)
128 {
129 self.keys.insert(pos, split_key);
130 self.values.insert(pos, split_val);
131 self.children.insert(pos + 1, *split_child);
132 }
133
134 let threshold = (self.keys.len() as f64).sqrt().ceil() as usize;
135 if self.keys.len() > threshold * 2 {
136 self.split()
137 } else {
138 None
139 }
140 }
141
142 /// Splits the current node into two nodes when it exceeds its capacity.
143 ///
144 /// The node is divided around the median key:
145 /// - Keys and values **after** the median are moved into a newly created sibling node.
146 /// - The median key and its corresponding value are promoted to the parent node
147 /// to serve as a separator between the two halves.
148 ///
149 /// If the node is internal (non-leaf), its child pointers are split in the same manner.
150 ///
151 /// # Returns
152 /// - `Some((split_key, split_value, new_node))` where:
153 /// - `split_key` and `split_value` are promoted to the parent node.
154 /// - `new_node` is the newly created right-hand node containing the upper half of the keys.
155 ///
156 /// # Panics
157 /// Panics if called on an empty node (since there is no median key to promote).
158 ///
159 /// # Examples
160 /// ```ignore
161 /// let mut node = Node::<i32, &str>::new(true);
162 /// for i in 0..6 {
163 /// node.keys.push(i);
164 /// node.values.push("val");
165 /// }
166 ///
167 /// let result = node.split();
168 /// assert!(result.is_some());
169 /// ```
170 fn split(&mut self) -> SplitResult<K, V> {
171 let mid = self.keys.len() / 2;
172
173 let mut new_node = Node::new(self.is_leaf);
174 new_node.keys = self.keys.split_off(mid + 1);
175 new_node.values = self.values.split_off(mid + 1);
176
177 if !self.is_leaf {
178 new_node.children = self.children.split_off(mid + 1);
179 }
180
181 let split_key = self.keys.pop().unwrap();
182 let split_value = self.values.pop().unwrap();
183
184 Some((split_key, split_value, Box::new(new_node)))
185 }
186}
187
188/// A cache-oblivious B-Tree for efficient ordered storage and retrieval.
189///
190/// This data structure is designed to maintain key–value pairs in sorted order
191/// while minimizing cache misses across different memory hierarchies. It adapts
192/// automatically to cache line sizes without requiring explicit block tuning,
193/// making it suitable for systems where memory access patterns are critical.
194///
195/// Internally, the tree is composed of recursively nested [`Node`] structures,
196/// each containing sorted keys, associated values, and (for internal nodes)
197/// references to child nodes. The tree automatically splits nodes when they
198/// exceed their capacity.
199///
200/// # Type Parameters
201/// - `K`: The type of the keys stored in the tree. Must implement [`Ord`] and [`Clone`].
202/// - `V`: The type of the values stored in the tree. Must implement [`Clone`].
203///
204/// # Fields
205/// - `root`: The root node of the tree.
206/// - `size`: The total number of key–value pairs stored in the tree.
207///
208/// # Examples
209/// ```
210/// use cobt::CacheObliviousBTree;
211///
212/// let mut tree = CacheObliviousBTree::<i32, &str>::new();
213/// tree.insert(42, "answer");
214/// assert_eq!(tree.search(&42), Some(&"answer"));
215/// assert_eq!(tree.len(), 1);
216/// ```
217///
218/// # References
219/// - [Frigo et al., *Cache-Oblivious B-Trees*, SIAM Journal on Computing, 2000]
220#[derive(Debug, PartialEq)]
221pub struct CacheObliviousBTree<K, V> {
222 /// The root node of the tree.
223 root: Box<Node<K, V>>,
224 /// The total number of key–value pairs in the tree.
225 size: usize,
226}
227
228impl<K: Ord + Clone, V: Clone> CacheObliviousBTree<K, V> {
229 /// Creates an empty [`CacheObliviousBTree`].
230 ///
231 /// Initializes the tree with a single empty root node marked as a leaf.
232 /// The resulting tree contains no keys or values and has a size of zero.
233 ///
234 /// # Returns
235 /// A new, empty [`CacheObliviousBTree`].
236 ///
237 /// # Examples
238 /// ```
239 /// use cobt::CacheObliviousBTree;
240 ///
241 /// let tree: CacheObliviousBTree<i32, &str> = CacheObliviousBTree::new();
242 /// assert_eq!(tree.len(), 0);
243 /// ```
244 pub fn new() -> Self {
245 CacheObliviousBTree {
246 root: Box::new(Node::new(true)),
247 size: 0,
248 }
249 }
250
251 /// Inserts a key–value pair into the tree.
252 ///
253 /// If inserting the new pair causes the root node to split, a new root is
254 /// created, and the promoted key becomes the separator between the two child nodes.
255 ///
256 /// # Parameters
257 /// - `key`: The key to insert into the tree.
258 /// - `value`: The value associated with the key.
259 ///
260 /// # Behavior
261 /// - If the key already exists, this implementation **does not** replace the
262 /// existing value (keys are treated as unique).
263 /// - The size of the tree increases by one after each insertion.
264 ///
265 /// # Examples
266 /// ```
267 /// use cobt::CacheObliviousBTree;
268 ///
269 /// let mut tree = CacheObliviousBTree::<i32, &str>::new();
270 /// tree.insert(42, "answer");
271 /// assert_eq!(tree.len(), 1);
272 /// ```
273 pub fn insert(&mut self, key: K, value: V) {
274 if let Some((split_key, split_val, split_child)) = self.root.insert(key, value) {
275 let new_root = Node::new(false);
276 let old_root = std::mem::replace(&mut self.root, Box::new(new_root));
277
278 self.root.keys.push(split_key);
279 self.root.values.push(split_val);
280 self.root.children.push(*old_root);
281 self.root.children.push(*split_child);
282 }
283 self.size += 1;
284 }
285
286 /// Searches for a key in the tree and returns a reference to its value if found.
287 ///
288 /// This operation performs a recursive binary search through the tree,
289 /// starting at the root and descending into the appropriate child nodes.
290 ///
291 /// # Parameters
292 /// - `key`: A reference to the key to search for.
293 ///
294 /// # Returns
295 /// - `Some(&V)` if the key exists.
296 /// - `None` if the key is not found in the tree.
297 ///
298 /// # Examples
299 /// ```
300 /// use cobt::CacheObliviousBTree;
301 ///
302 /// let mut tree = CacheObliviousBTree::<i32, &str>::new();
303 /// tree.insert(1, "one");
304 /// assert_eq!(tree.search(&1), Some(&"one"));
305 /// assert_eq!(tree.search(&2), None);
306 /// ```
307 pub fn search(&self, key: &K) -> Option<&V> {
308 self.root.search(key)
309 }
310
311 /// Returns the total number of key–value pairs stored in the tree.
312 ///
313 /// # Examples
314 /// ```
315 /// use cobt::CacheObliviousBTree;
316 ///
317 /// let mut tree = CacheObliviousBTree::<i32, &str>::new();
318 /// tree.insert(10, "ten");
319 /// assert_eq!(tree.len(), 1);
320 /// ```
321 pub fn len(&self) -> usize {
322 self.size
323 }
324
325 /// Returns `true` if the tree contains no key–value pairs.
326 ///
327 /// # Examples
328 /// ```
329 /// use cobt::CacheObliviousBTree;
330 ///
331 /// let mut tree = CacheObliviousBTree::<i32, &str>::new();
332 /// assert!(tree.is_empty());
333 ///
334 /// tree.insert(1, "one");
335 /// assert!(!tree.is_empty());
336 /// ```
337 pub fn is_empty(&self) -> bool {
338 self.size == 0
339 }
340}
341
342impl<K: Ord + Clone, V: Clone> Default for CacheObliviousBTree<K, V> {
343 fn default() -> Self {
344 Self::new()
345 }
346}