cobt 0.1.1

A Cache-Oblivious B-Tree implementation in Rust
Documentation
/// Type alias for the result of a node split operation.
/// Contains the promoted key, value, and the new right sibling node.
type SplitResult<K, V> = Option<(K, V, Box<Node<K, V>>)>;

/// A single node within a B-Tree structure.
///
/// Each node stores a collection of keys and their corresponding values,
/// along with optional child nodes. The `is_leaf` flag indicates whether
/// this node has any children.
///
/// # Type Parameters
/// - `K`: The type of the keys stored in the node. Must be comparable.
/// - `V`: The type of the values associated with each key.
#[derive(Debug, PartialEq)]
struct Node<K, V> {
    /// Keys stored in sorted order for efficient searching.
    keys: Vec<K>,
    /// Values corresponding to each key.
    values: Vec<V>,
    /// Child nodes. Empty if this node is a leaf.
    children: Vec<Node<K, V>>,
    /// Indicates whether this node is a leaf (has no children).
    is_leaf: bool,
}

impl<K: Ord + Clone, V: Clone> Node<K, V> {
    /// Creates a new, empty B-Tree node.
    ///
    /// # Parameters
    /// - `is_leaf`: A boolean indicating whether this node is a leaf.
    ///
    /// # Returns
    /// A [`Node`] instance with no keys, values, or children.
    ///
    /// # Examples
    /// ```ignore
    /// let leaf: Node<i32, String> = Node::new(true);
    /// assert!(leaf.is_leaf);
    /// assert!(leaf.keys.is_empty());
    /// ```
    fn new(is_leaf: bool) -> Self {
        Node {
            keys: Vec::new(),
            values: Vec::new(),
            children: Vec::new(),
            is_leaf,
        }
    }

    /// Searches for a key within a node (and its descendants, if any)
    ///
    /// This method uses binary search to efficiently locate the given `key`
    /// among the node's sorted keys.  If the key is not found and the node is
    /// not a leaf, the search continues recursively in the appropriate child node.
    ///
    /// # Parameters
    /// - `key`: A reference to the key being searched for.
    ///
    /// # Returns
    /// - `Some(&V)` if the key exists in this node or one if its descendants.
    /// - `None` if the key is not found anywhere below this node.
    ///
    /// # Examples
    /// ```ignore
    /// let mut node = Node::<i32, &str>::new(true);
    /// node.keys.push(5);
    /// node.values.push("five");
    ///
    /// assert_eq!(node.search(&5), Some(&"five"));
    /// assert_eq!(node.search(&3), None);
    /// ```
    fn search(&self, key: &K) -> Option<&V> {
        match self.keys.binary_search(key) {
            Ok(i) => Some(&self.values[i]),
            Err(i) => {
                if self.is_leaf {
                    None
                } else {
                    self.children[i].search(key)
                }
            }
        }
    }

    /// Inserts a key-value pair into the B-Tree node.
    ///
    /// The insertion process depends on wether this node is a leaf:
    /// - If the node **is a leaf**, the key and value are inserted directly into
    ///   their respective vectors at the appropriate sorted position.
    /// - If the node **is internal**, the insertion is delegated to the appropriate
    ///   child node, and a split may occur if the child overflows.
    ///
    ///  After the insertion, if the number of keys exceeds the node's capacity threshold,
    ///  the node is split. The threshold is determined dynamically as
    ///  `ceil(sqrt(keys.len())) * 2`
    ///
    /// # Parameters
    /// - `key`: The key to insert
    /// - `value`: The value associated with the key.
    ///
    /// # Returns
    /// - `Some((split_key, split_value, split_node))` if the node was split.
    /// - `None` if no split occurred.
    ///
    /// # Panics
    /// May panic if internal vector insertions exceed capacity limits (unlikely
    /// under normal use).
    ///
    /// # Examples
    /// ```ignore
    /// let mut node = Node::<i32, &str>::new(true);
    /// assert!(node.insert(10, "ten").is_none());
    /// assert_eq!(node.keys, vec![10]);
    ///
    /// // Example split behavior (simplified)
    /// for i in 0..10 {
    ///     node.insert(i, "val");
    /// }
    /// ```
    fn insert(&mut self, key: K, value: V) -> SplitResult<K, V> {
        let pos = self.keys.binary_search(&key).unwrap_or_else(|i| i);

        if self.is_leaf {
            self.keys.insert(pos, key);
            self.values.insert(pos, value);
        } else if let Some((split_key, split_val, split_child)) =
            self.children[pos].insert(key, value)
        {
            self.keys.insert(pos, split_key);
            self.values.insert(pos, split_val);
            self.children.insert(pos + 1, *split_child);
        }

        let threshold = (self.keys.len() as f64).sqrt().ceil() as usize;
        if self.keys.len() > threshold * 2 {
            self.split()
        } else {
            None
        }
    }

    /// Splits the current node into two nodes when it exceeds its capacity.
    ///
    /// The node is divided around the median key:
    /// - Keys and values **after** the median are moved into a newly created sibling node.
    /// - The median key and its corresponding value are promoted to the parent node
    ///   to serve as a separator between the two halves.
    ///
    /// If the node is internal (non-leaf), its child pointers are split in the same manner.
    ///
    /// # Returns
    /// - `Some((split_key, split_value, new_node))` where:
    ///   - `split_key` and `split_value` are promoted to the parent node.
    ///   - `new_node` is the newly created right-hand node containing the upper half of the keys.
    ///
    /// # Panics
    /// Panics if called on an empty node (since there is no median key to promote).
    ///
    /// # Examples
    /// ```ignore
    /// let mut node = Node::<i32, &str>::new(true);
    /// for i in 0..6 {
    ///     node.keys.push(i);
    ///     node.values.push("val");
    /// }
    ///
    /// let result = node.split();
    /// assert!(result.is_some());
    /// ```
    fn split(&mut self) -> SplitResult<K, V> {
        let mid = self.keys.len() / 2;

        let mut new_node = Node::new(self.is_leaf);
        new_node.keys = self.keys.split_off(mid + 1);
        new_node.values = self.values.split_off(mid + 1);

        if !self.is_leaf {
            new_node.children = self.children.split_off(mid + 1);
        }

        let split_key = self.keys.pop().unwrap();
        let split_value = self.values.pop().unwrap();

        Some((split_key, split_value, Box::new(new_node)))
    }
}

/// A cache-oblivious B-Tree for efficient ordered storage and retrieval.
///
/// This data structure is designed to maintain key–value pairs in sorted order
/// while minimizing cache misses across different memory hierarchies. It adapts
/// automatically to cache line sizes without requiring explicit block tuning,
/// making it suitable for systems where memory access patterns are critical.
///
/// Internally, the tree is composed of recursively nested [`Node`] structures,
/// each containing sorted keys, associated values, and (for internal nodes)
/// references to child nodes. The tree automatically splits nodes when they
/// exceed their capacity.
///
/// # Type Parameters
/// - `K`: The type of the keys stored in the tree. Must implement [`Ord`] and [`Clone`].
/// - `V`: The type of the values stored in the tree. Must implement [`Clone`].
///
/// # Fields
/// - `root`: The root node of the tree.
/// - `size`: The total number of key–value pairs stored in the tree.
///
/// # Examples
/// ```
/// use cobt::CacheObliviousBTree;
///
/// let mut tree = CacheObliviousBTree::<i32, &str>::new();
/// tree.insert(42, "answer");
/// assert_eq!(tree.search(&42), Some(&"answer"));
/// assert_eq!(tree.len(), 1);
/// ```
///
/// # References
/// - [Frigo et al., *Cache-Oblivious B-Trees*, SIAM Journal on Computing, 2000]
#[derive(Debug, PartialEq)]
pub struct CacheObliviousBTree<K, V> {
    /// The root node of the tree.
    root: Box<Node<K, V>>,
    /// The total number of key–value pairs in the tree.
    size: usize,
}

impl<K: Ord + Clone, V: Clone> CacheObliviousBTree<K, V> {
    /// Creates an empty [`CacheObliviousBTree`].
    ///
    /// Initializes the tree with a single empty root node marked as a leaf.
    /// The resulting tree contains no keys or values and has a size of zero.
    ///
    /// # Returns
    /// A new, empty [`CacheObliviousBTree`].
    ///
    /// # Examples
    /// ```
    /// use cobt::CacheObliviousBTree;
    ///
    /// let tree: CacheObliviousBTree<i32, &str> = CacheObliviousBTree::new();
    /// assert_eq!(tree.len(), 0);
    /// ```
    pub fn new() -> Self {
        CacheObliviousBTree {
            root: Box::new(Node::new(true)),
            size: 0,
        }
    }

    /// Inserts a key–value pair into the tree.
    ///
    /// If inserting the new pair causes the root node to split, a new root is
    /// created, and the promoted key becomes the separator between the two child nodes.
    ///
    /// # Parameters
    /// - `key`: The key to insert into the tree.
    /// - `value`: The value associated with the key.
    ///
    /// # Behavior
    /// - If the key already exists, this implementation **does not** replace the
    ///   existing value (keys are treated as unique).
    /// - The size of the tree increases by one after each insertion.
    ///
    /// # Examples
    /// ```
    /// use cobt::CacheObliviousBTree;
    ///
    /// let mut tree = CacheObliviousBTree::<i32, &str>::new();
    /// tree.insert(42, "answer");
    /// assert_eq!(tree.len(), 1);
    /// ```
    pub fn insert(&mut self, key: K, value: V) {
        if let Some((split_key, split_val, split_child)) = self.root.insert(key, value) {
            let new_root = Node::new(false);
            let old_root = std::mem::replace(&mut self.root, Box::new(new_root));

            self.root.keys.push(split_key);
            self.root.values.push(split_val);
            self.root.children.push(*old_root);
            self.root.children.push(*split_child);
        }
        self.size += 1;
    }

    /// Searches for a key in the tree and returns a reference to its value if found.
    ///
    /// This operation performs a recursive binary search through the tree,
    /// starting at the root and descending into the appropriate child nodes.
    ///
    /// # Parameters
    /// - `key`: A reference to the key to search for.
    ///
    /// # Returns
    /// - `Some(&V)` if the key exists.
    /// - `None` if the key is not found in the tree.
    ///
    /// # Examples
    /// ```
    /// use cobt::CacheObliviousBTree;
    ///
    /// let mut tree = CacheObliviousBTree::<i32, &str>::new();
    /// tree.insert(1, "one");
    /// assert_eq!(tree.search(&1), Some(&"one"));
    /// assert_eq!(tree.search(&2), None);
    /// ```
    pub fn search(&self, key: &K) -> Option<&V> {
        self.root.search(key)
    }

    /// Returns the total number of key–value pairs stored in the tree.
    ///
    /// # Examples
    /// ```
    /// use cobt::CacheObliviousBTree;
    ///
    /// let mut tree = CacheObliviousBTree::<i32, &str>::new();
    /// tree.insert(10, "ten");
    /// assert_eq!(tree.len(), 1);
    /// ```
    pub fn len(&self) -> usize {
        self.size
    }

    /// Returns `true` if the tree contains no key–value pairs.
    ///
    /// # Examples
    /// ```
    /// use cobt::CacheObliviousBTree;
    ///
    /// let mut tree = CacheObliviousBTree::<i32, &str>::new();
    /// assert!(tree.is_empty());
    ///
    /// tree.insert(1, "one");
    /// assert!(!tree.is_empty());
    /// ```
    pub fn is_empty(&self) -> bool {
        self.size == 0
    }
}

impl<K: Ord + Clone, V: Clone> Default for CacheObliviousBTree<K, V> {
    fn default() -> Self {
        Self::new()
    }
}