splinter_rs/
lib.rs

1//! Splinter is a compressed bitmap format similar to [Roaring Bitmaps](https://roaringbitmap.org/), optimized specifically for small, sparse sets of 32-bit unsigned integers (`u32`).
2//!
3//! ## Key Features:
4//!
5//! - **Tree-based Encoding**: Splinter encodes `u32` values into a 256-way tree structure by decomposing integers into big-endian component bytes. Leaf nodes efficiently transition from byte lists to compact bitmaps at up to 32 values.
6//!
7//! - **Zero-copy Access**: Designed for efficient querying without deserialization, the `SplinterRef` type allows direct, zero-copy reads from any type implementing `AsRef<[u8]>`.
8
9use std::ops::RangeBounds;
10
11use thiserror::Error;
12
13mod bitmap;
14mod block;
15pub mod cow;
16mod index;
17pub mod ops;
18mod partition;
19mod relational;
20mod splinter;
21mod util;
22
23#[cfg(test)]
24mod testutil;
25
26pub use splinter::{SPLINTER_MAX_VALUE, Splinter, SplinterRef};
27
28type Segment = u8;
29
30#[derive(Debug, Error)]
31pub enum DecodeErr {
32    #[error("Unable to decode header")]
33    InvalidHeader,
34
35    #[error("Invalid magic number")]
36    InvalidMagic,
37
38    #[error("Unable to decode footer")]
39    InvalidFooter,
40}
41
42pub trait SplinterRead {
43    /// Returns `true` if the Splinter is empty.
44    ///
45    /// # Examples
46    ///
47    /// ```
48    /// # use splinter_rs::{Splinter, SplinterRead, SplinterWrite};
49    ///
50    /// let mut splinter = Splinter::default();
51    /// assert!(splinter.is_empty());
52    /// splinter.insert(1);
53    /// assert!(!splinter.is_empty());
54    /// ```
55    fn is_empty(&self) -> bool;
56
57    /// Returns `true` if the Splinter contains the given key.
58    ///
59    /// # Examples
60    ///
61    /// ```
62    /// # use splinter_rs::{Splinter, SplinterRead, SplinterWrite};
63    ///
64    /// let mut splinter = Splinter::default();
65    /// splinter.insert(1);
66    /// splinter.insert(3);
67    ///
68    /// assert!(splinter.contains(1));
69    /// assert!(!splinter.contains(2));
70    /// assert!(splinter.contains(3));
71    /// ```
72    fn contains(&self, key: u32) -> bool;
73
74    /// Calculates the total number of values stored in the Splinter.
75    ///
76    /// # Examples
77    ///
78    /// ```
79    /// # use splinter_rs::{Splinter, SplinterRead, SplinterWrite};
80    ///
81    /// let mut splinter = Splinter::default();
82    /// splinter.insert(6);
83    /// splinter.insert(1);
84    /// splinter.insert(3);
85    ///
86    /// assert_eq!(3, splinter.cardinality());
87    /// ```
88    fn cardinality(&self) -> usize;
89
90    /// Returns an sorted [`Iterator`] over all keys.
91    ///
92    /// # Examples
93    ///
94    /// ```
95    /// # use splinter_rs::{Splinter, SplinterRead, SplinterWrite};
96    ///
97    /// let mut splinter = Splinter::default();
98    /// splinter.insert(6);
99    /// splinter.insert(1);
100    /// splinter.insert(3);
101    ///
102    /// assert_eq!(&[1, 3, 6], &*splinter.iter().collect::<Vec<_>>());
103    /// ```
104    fn iter(&self) -> impl Iterator<Item = u32> + '_;
105
106    /// Returns an sorted [`Iterator`] over all keys contained by the provided range.
107    ///
108    /// # Examples
109    ///
110    /// ```
111    /// # use splinter_rs::{Splinter, SplinterRead, SplinterWrite};
112    ///
113    /// let mut splinter = Splinter::default();
114    /// splinter.insert(6);
115    /// splinter.insert(1);
116    /// splinter.insert(3);
117    /// splinter.insert(5);
118    /// splinter.insert(9);
119    ///
120    /// assert_eq!(&[3, 5, 6], &*splinter.range(3..=6).collect::<Vec<_>>());
121    /// ```
122    fn range<'a, R>(&'a self, range: R) -> impl Iterator<Item = u32> + 'a
123    where
124        R: RangeBounds<u32> + 'a;
125
126    /// Returns the last key in the set
127    ///
128    /// # Examples
129    ///
130    /// ```
131    /// # use splinter_rs::{Splinter, SplinterRead, SplinterWrite};
132    ///
133    /// let mut splinter = Splinter::default();
134    ///
135    /// assert_eq!(None, splinter.last());
136    /// splinter.insert(6);
137    /// splinter.insert(1);
138    /// splinter.insert(3);
139    /// assert_eq!(Some(6), splinter.last());
140    /// ```
141    fn last(&self) -> Option<u32>;
142}
143
144pub trait SplinterWrite {
145    /// Attempts to insert a key into the Splinter, returning true if a key was inserted
146    ///
147    /// # Examples
148    ///
149    /// ```
150    /// # use splinter_rs::{Splinter, SplinterRead, SplinterWrite};
151    ///
152    /// let mut splinter = Splinter::default();
153    /// assert!(splinter.insert(6));
154    /// assert!(!splinter.insert(6));
155    ///
156    /// assert_eq!(&[6], &*splinter.iter().collect::<Vec<_>>());
157    /// ```
158    fn insert(&mut self, key: u32) -> bool;
159}
160
161#[cfg(test)]
162mod tests {
163    use bytes::Bytes;
164
165    use super::*;
166    use crate::{
167        cow::CowSplinter,
168        testutil::{SetGen, harness_read, harness_write, mksplinter, mksplinter_ref},
169    };
170
171    #[test]
172    fn test_splinter_read_impls() {
173        let mut set_gen = SetGen::new(0xC0DE);
174        let sets = vec![
175            vec![],
176            vec![1],
177            (0..10).collect(),
178            set_gen.random(64),
179            set_gen.distributed(2, 2, 2, 4, 32),
180        ];
181
182        for values in sets {
183            harness_read(&mksplinter(values.clone()), &values);
184            harness_read(&mksplinter_ref(values.clone()), &values);
185
186            let cow_owned: CowSplinter<Bytes> = CowSplinter::from_owned(mksplinter(values.clone()));
187            harness_read(&cow_owned, &values);
188
189            let cow_ref: CowSplinter<Bytes> = CowSplinter::from_ref(mksplinter_ref(values.clone()));
190            harness_read(&cow_ref, &values);
191        }
192    }
193
194    #[test]
195    fn test_splinter_write_impls() {
196        let mut set_gen = SetGen::new(0xBEEF);
197        let sets = vec![
198            vec![],
199            vec![2],
200            (0..20).step_by(3).collect(),
201            set_gen.random(64),
202            set_gen.distributed(2, 1, 1, 16, 32),
203        ];
204
205        for values in sets {
206            harness_write::<Splinter>(&values);
207            harness_write::<CowSplinter<Bytes>>(&values);
208        }
209    }
210}