logvec 1.0.1

A cache-friendly Bentley-Saxe logarithmic array data structure.
Documentation
//! `logvec` provides [`LogArray`], a cache-friendly Bentley-Saxe logarithmic array.
//!
//! It stores all elements in one contiguous `Vec<T>` and maintains sorted runs whose
//! sizes follow the binary decomposition of `len()`.
//!
//! # Complexity
//! - `insert`: amortized `O(log n)`
//! - `contains`: `O(log^2 n)` (binary search across sorted runs)
//! - `delete`: `O(n)` worst-case (removal + re-sort)
//!
//! # Example
//! ```rust
//! use logvec::LogArray;
//!
//! let mut arr = LogArray::new();
//! arr.insert(5);
//! arr.insert(3);
//! arr.insert(8);
//!
//! assert!(arr.contains(&5));
//! assert!(!arr.contains(&42));
//! assert_eq!(arr.delete(&3), Some(3));
//! assert!(!arr.contains(&3));
//! ```

/// A Bentley-Saxe logarithmic array backed by a contiguous `Vec<T>`.
#[derive(Debug)]
pub struct LogArray<T> {
    /// Arrays stored sequentially in increasing size 2^i, 2^i-1, ..., 4, 2, 1
    items: Vec<T>,
}

impl<T> LogArray<T> {
    /// Returns the number of elements in the collection.
    pub fn len(&self) -> usize {
        self.items.len()
    }

    /// Returns `true` if the collection contains no elements.
    pub fn is_empty(&self) -> bool {
        self.len() == 0
    }

    /// Creates an empty [`LogArray`].
    pub fn new() -> Self {
        Self { items: Vec::new() }
    }
}

impl<T> Default for LogArray<T> {
    fn default() -> Self {
        Self::new()
    }
}

impl<T: Ord> LogArray<T> {
    /// Inserts a value into the structure.
    ///
    /// The insertion is amortized `O(log n)`.
    pub fn insert(&mut self, value: T) {
        self.items.push(value);

        // TODO: We are just extracting the first bit here, use a bithack instead
        let z = self.items.len().trailing_zeros();
        let merge_size = 1_usize << z;
        let start = self.items.len() - merge_size;

        self.items[start..].sort_unstable();
    }

    /// Returns `true` if the query exists in the structure.
    ///
    /// This performs binary searches over internal sorted runs.
    pub fn contains(&self, query: &T) -> bool {
        if self.is_empty() {
            return false;
        }

        // Start from the highest power of 2 present in the length
        let mut size = 1 << self.items.len().ilog2();

        while size > 0 {
            if self.items.len() & size != 0 {
                let end = self.items.len() & !(size - 1);
                let start = end - size;

                if self.items[start..end].binary_search(query).is_ok() {
                    return true;
                }
            }

            size >>= 1; // Bitshift downwards
        }

        false
    }

    /// Deletes one matching element and returns it.
    ///
    /// Returns `None` if the element was not found.
    pub fn delete(&mut self, query: &T) -> Option<T> {
        let mut size = 1;

        while size <= self.items.len() {
            if self.items.len() & size != 0 {
                let end = self.items.len() & !(size - 1);
                let start = end - size;

                if let Ok(local_idx) = self.items[start..end].binary_search(query) {
                    let absolute_idx = start + local_idx;

                    // 1. Remove the element O(n)
                    let item = self.items.remove(absolute_idx);

                    // 2. The binary representation of the length has changed.
                    // By fully sorting the array, we guarantee that whatever new
                    // block sizes the binary representation demands, they will
                    // inherently be sorted.
                    // Because the array is mostly sorted runs, this is very fast.
                    self.items.sort();

                    return Some(item);
                }
            }

            size <<= 1;
        }

        None
    }
}

#[cfg(test)]
mod tests {
    use super::*;

    #[test]
    fn test_insert_and_contains() {
        let mut arr = LogArray::new();
        assert!(!arr.contains(&5));

        arr.insert(5);
        assert!(arr.contains(&5));
        assert!(!arr.contains(&3));

        arr.insert(3);
        arr.insert(8);
        arr.insert(1);
        arr.insert(10);

        // Binary length is 5 (101): one block of 4, one block of 1.
        for &val in &[1, 3, 5, 8, 10] {
            assert!(arr.contains(&val), "Should contain {}", val);
        }
        assert!(!arr.contains(&2));
    }

    #[test]
    fn test_deletion() {
        let mut arr = LogArray::new();
        for i in 0..10 {
            arr.insert(i); // Insert 0 through 9
        }

        assert!(arr.contains(&5));

        // Delete a middle element
        assert!(arr.delete(&5).is_some());
        assert!(!arr.contains(&5));

        // Ensure everything else survived the block reshuffling
        for i in 0..10 {
            if i != 5 {
                assert!(arr.contains(&i), "Should still contain {}", i);
            }
        }

        // Deleting a non-existent element
        assert!(arr.delete(&99).is_none());
    }

    #[test]
    fn test_large_fuzz() {
        // Stress test to ensure block boundaries never panic
        let mut arr = LogArray::new();
        for i in 0..1000 {
            arr.insert(i);
        }
        for i in 0..1000 {
            assert!(arr.contains(&i));
        }
        for i in (0..1000).step_by(2) {
            arr.delete(&i); // Delete even numbers
        }
        for i in 0..1000 {
            assert_eq!(arr.contains(&i), i % 2 != 0);
        }
    }
}