splinter_rs/
splinter_ref.rs

1use std::{fmt::Debug, ops::Deref};
2
3use bytes::Bytes;
4use zerocopy::FromBytes;
5
6use crate::{
7    Splinter,
8    codec::{
9        DecodeErr, Encodable,
10        encoder::Encoder,
11        footer::{Footer, SPLINTER_V2_MAGIC},
12        partition_ref::PartitionRef,
13    },
14    level::High,
15    traits::PartitionRead,
16};
17
18/// A zero-copy reference to serialized splinter data.
19///
20/// `SplinterRef` allows efficient querying of compressed bitmap data without
21/// deserializing the underlying structure. It wraps any type that can be
22/// dereferenced to `[u8]` and provides all the same read operations as
23/// [`Splinter`], but with minimal memory overhead and no allocation during
24/// queries.
25///
26/// This is the preferred type for read-only operations on serialized splinter
27/// data, especially when the data comes from files, network, or other external
28/// sources.
29///
30/// # Type Parameter
31///
32/// - `B`: Any type that implements `Deref<Target = [u8]>` such as `&[u8]`,
33///   `Vec<u8>`, `Bytes`, `Arc<[u8]>`, etc.
34///
35/// # Examples
36///
37/// Creating from serialized bytes:
38///
39/// ```
40/// use splinter_rs::{Splinter, SplinterRef, PartitionWrite, PartitionRead, Encodable};
41///
42/// // Create and populate a splinter
43/// let mut splinter = Splinter::EMPTY;
44/// splinter.insert(100);
45/// splinter.insert(200);
46///
47/// // Serialize it to bytes
48/// let bytes = splinter.encode_to_bytes();
49///
50/// // Create a zero-copy reference
51/// let splinter_ref = SplinterRef::from_bytes(bytes).unwrap();
52/// assert_eq!(splinter_ref.cardinality(), 2);
53/// assert!(splinter_ref.contains(100));
54/// ```
55///
56/// Working with different buffer types:
57///
58/// ```
59/// use splinter_rs::{Splinter, SplinterRef, PartitionWrite, PartitionRead, Encodable};
60/// use std::sync::Arc;
61///
62/// let mut splinter = Splinter::EMPTY;
63/// splinter.insert(42);
64///
65/// let bytes = splinter.encode_to_bytes();
66/// let shared_bytes: Arc<[u8]> = bytes.to_vec().into();
67///
68/// let splinter_ref = SplinterRef::from_bytes(shared_bytes).unwrap();
69/// assert!(splinter_ref.contains(42));
70/// ```
71#[derive(Clone)]
72pub struct SplinterRef<B> {
73    pub(crate) data: B,
74}
75
76impl<B: Deref<Target = [u8]>> Debug for SplinterRef<B> {
77    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
78        f.debug_tuple("SplinterRef")
79            .field(&self.load_unchecked())
80            .finish()
81    }
82}
83
84impl<B> SplinterRef<B> {
85    /// Returns a reference to the underlying data buffer.
86    ///
87    /// This provides access to the raw bytes that store the serialized splinter
88    /// data.
89    ///
90    /// # Examples
91    ///
92    /// ```
93    /// use splinter_rs::{Splinter, SplinterRef, PartitionWrite, Encodable};
94    ///
95    /// let mut splinter = Splinter::EMPTY;
96    /// splinter.insert(42);
97    /// let bytes = splinter.encode_to_bytes();
98    /// let splinter_ref = SplinterRef::from_bytes(bytes).unwrap();
99    ///
100    /// let inner_bytes = splinter_ref.inner();
101    /// assert!(!inner_bytes.is_empty());
102    /// ```
103    #[inline]
104    pub fn inner(&self) -> &B {
105        &self.data
106    }
107
108    /// Consumes the `SplinterRef` and returns the underlying data buffer.
109    ///
110    /// This is useful when you need to take ownership of the underlying data
111    /// after you're done querying the splinter.
112    ///
113    /// # Examples
114    ///
115    /// ```
116    /// use splinter_rs::{Splinter, SplinterRef, PartitionWrite, Encodable};
117    ///
118    /// let mut splinter = Splinter::EMPTY;
119    /// splinter.insert(42);
120    /// let bytes = splinter.encode_to_bytes();
121    /// let splinter_ref = SplinterRef::from_bytes(bytes.clone()).unwrap();
122    ///
123    /// let recovered_bytes = splinter_ref.into_inner();
124    /// assert_eq!(recovered_bytes, bytes);
125    /// ```
126    #[inline]
127    pub fn into_inner(self) -> B {
128        self.data
129    }
130}
131
132impl SplinterRef<Bytes> {
133    /// Returns a clone of the underlying bytes.
134    ///
135    /// This is efficient for `Bytes` since it uses reference counting
136    /// internally and doesn't actually copy the data.
137    ///
138    /// # Examples
139    ///
140    /// ```
141    /// use splinter_rs::{Splinter, PartitionWrite};
142    ///
143    /// let mut splinter = Splinter::EMPTY;
144    /// splinter.insert(42);
145    /// let splinter_ref = splinter.encode_to_splinter_ref();
146    ///
147    /// let bytes_copy = splinter_ref.encode_to_bytes();
148    /// assert!(!bytes_copy.is_empty());
149    /// ```
150    #[inline]
151    pub fn encode_to_bytes(&self) -> Bytes {
152        self.data.clone()
153    }
154}
155
156impl<B: Deref<Target = [u8]>> Encodable for SplinterRef<B> {
157    #[inline]
158    fn encoded_size(&self) -> usize {
159        self.data.len()
160    }
161
162    #[inline]
163    fn encode<T: bytes::BufMut>(&self, encoder: &mut Encoder<T>) {
164        encoder.write_splinter(&self.data);
165    }
166}
167
168impl<B: Deref<Target = [u8]>> SplinterRef<B> {
169    /// Converts this reference back to an owned [`Splinter`].
170    ///
171    /// This method deserializes the underlying data and creates a new owned
172    /// `Splinter` that supports mutation. This involves iterating through all
173    /// values and rebuilding the data structure, so it has a cost proportional
174    /// to the number of elements.
175    ///
176    /// # Examples
177    ///
178    /// ```
179    /// use splinter_rs::{Splinter, SplinterRef, PartitionWrite, PartitionRead};
180    ///
181    /// let mut original = Splinter::EMPTY;
182    /// original.insert(100);
183    /// original.insert(200);
184    ///
185    /// let splinter_ref = original.encode_to_splinter_ref();
186    /// let decoded = splinter_ref.decode_to_splinter();
187    ///
188    /// assert_eq!(decoded.cardinality(), 2);
189    /// assert!(decoded.contains(100));
190    /// assert!(decoded.contains(200));
191    /// ```
192    pub fn decode_to_splinter(&self) -> Splinter {
193        Splinter::from_iter(self.iter())
194    }
195
196    /// Creates a `SplinterRef` from raw bytes, validating the format.
197    ///
198    /// This method parses and validates the serialized splinter format, checking:
199    /// - Sufficient data length
200    /// - Valid magic bytes
201    /// - Correct checksum
202    ///
203    /// IMPORTANT: This method *does not* recursively verify the entire
204    /// splinter, opting instead to rely on the checksum to detect any
205    /// corruption. Do not use Splinter with untrusted data as it's trivial to
206    /// construct a Splinter which will cause your program to panic at runtime.
207    ///
208    /// Returns an error if the data is corrupted or in an invalid format.
209    ///
210    /// # Errors
211    ///
212    /// - [`DecodeErr::Length`]: Not enough bytes in the buffer
213    /// - [`DecodeErr::Magic`]: Invalid magic bytes
214    /// - [`DecodeErr::Checksum`]: Data corruption detected
215    /// - [`DecodeErr::Validity`]: Invalid internal structure
216    /// - [`DecodeErr::SplinterV1`]: Data is from incompatible v1 format
217    ///
218    /// # Examples
219    ///
220    /// ```
221    /// use splinter_rs::{Splinter, SplinterRef, PartitionWrite, PartitionRead, Encodable};
222    ///
223    /// let mut splinter = Splinter::EMPTY;
224    /// splinter.insert(42);
225    /// let bytes = splinter.encode_to_bytes();
226    ///
227    /// let splinter_ref = SplinterRef::from_bytes(bytes).unwrap();
228    /// assert!(splinter_ref.contains(42));
229    /// ```
230    ///
231    /// Error handling:
232    ///
233    /// ```
234    /// use splinter_rs::{SplinterRef, codec::DecodeErr};
235    ///
236    /// let invalid_bytes = vec![0u8; 5]; // Too short
237    /// let result = SplinterRef::from_bytes(invalid_bytes);
238    /// assert!(matches!(result, Err(DecodeErr::Length)));
239    /// ```
240    pub fn from_bytes(data: B) -> Result<Self, DecodeErr> {
241        pub(crate) const SPLINTER_V1_MAGIC: [u8; 4] = [0xDA, 0xAE, 0x12, 0xDF];
242        if data.len() >= 4
243            && data.starts_with(&SPLINTER_V1_MAGIC)
244            && !data.ends_with(&SPLINTER_V2_MAGIC)
245        {
246            return Err(DecodeErr::SplinterV1);
247        }
248
249        if data.len() < Footer::SIZE {
250            return Err(DecodeErr::Length);
251        }
252        let (partitions, footer) = data.split_at(data.len() - Footer::SIZE);
253        Footer::ref_from_bytes(footer)?.validate(partitions)?;
254        PartitionRef::<High>::from_suffix(partitions)?;
255        Ok(Self { data })
256    }
257
258    pub(crate) fn load_unchecked(&self) -> PartitionRef<'_, High> {
259        let without_footer = &self.data[..(self.data.len() - Footer::SIZE)];
260        PartitionRef::from_suffix(without_footer).unwrap()
261    }
262}
263
264impl<B: Deref<Target = [u8]>> PartitionRead<High> for SplinterRef<B> {
265    fn cardinality(&self) -> usize {
266        self.load_unchecked().cardinality()
267    }
268
269    fn is_empty(&self) -> bool {
270        self.load_unchecked().is_empty()
271    }
272
273    fn contains(&self, value: u32) -> bool {
274        self.load_unchecked().contains(value)
275    }
276
277    fn rank(&self, value: u32) -> usize {
278        self.load_unchecked().rank(value)
279    }
280
281    fn select(&self, idx: usize) -> Option<u32> {
282        self.load_unchecked().select(idx)
283    }
284
285    fn last(&self) -> Option<u32> {
286        self.load_unchecked().last()
287    }
288
289    fn iter(&self) -> impl Iterator<Item = u32> {
290        self.load_unchecked().into_iter()
291    }
292}
293
294impl<B: Deref<Target = [u8]>> PartialEq<Splinter> for SplinterRef<B> {
295    #[inline]
296    fn eq(&self, other: &Splinter) -> bool {
297        other == self
298    }
299}
300
301impl<B: Deref<Target = [u8]>, B2: Deref<Target = [u8]>> PartialEq<SplinterRef<B2>>
302    for SplinterRef<B>
303{
304    fn eq(&self, other: &SplinterRef<B2>) -> bool {
305        self.load_unchecked() == other.load_unchecked()
306    }
307}
308
309#[cfg(test)]
310mod test {
311    use quickcheck::TestResult;
312    use quickcheck_macros::quickcheck;
313
314    use crate::{
315        Optimizable, PartitionRead, Splinter,
316        testutil::{SetGen, mksplinter},
317    };
318
319    #[test]
320    fn test_empty() {
321        let splinter = mksplinter(&[]).encode_to_splinter_ref();
322
323        assert_eq!(splinter.decode_to_splinter(), Splinter::EMPTY);
324        assert!(!splinter.contains(0));
325        assert_eq!(splinter.cardinality(), 0);
326        assert_eq!(splinter.last(), None);
327    }
328
329    /// This is a regression test for a bug in the SplinterRef encoding. The bug
330    /// was that we used LittleEndian encoded values to store unaligned values,
331    /// which sort in reverse order from what we expect.
332    #[test]
333    fn test_contains_bug() {
334        let mut set_gen = SetGen::new(0xDEAD_BEEF);
335        let set = set_gen.random(1024);
336        let lookup = set[(set.len() / 3) as usize];
337        let splinter = mksplinter(&set).encode_to_splinter_ref();
338        assert!(splinter.contains(lookup))
339    }
340
341    #[quickcheck]
342    fn test_splinter_ref_quickcheck(set: Vec<u32>) -> bool {
343        let splinter = mksplinter(&set).encode_to_splinter_ref();
344        if set.is_empty() {
345            !splinter.contains(123)
346        } else {
347            let lookup = set[set.len() / 3];
348            splinter.contains(lookup)
349        }
350    }
351
352    #[quickcheck]
353    fn test_splinter_opt_ref_quickcheck(set: Vec<u32>) -> bool {
354        let mut splinter = mksplinter(&set);
355        splinter.optimize();
356        let splinter = splinter.encode_to_splinter_ref();
357        if set.is_empty() {
358            !splinter.contains(123)
359        } else {
360            let lookup = set[set.len() / 3];
361            splinter.contains(lookup)
362        }
363    }
364
365    #[quickcheck]
366    fn test_splinter_ref_eq_quickcheck(set: Vec<u32>) -> bool {
367        let ref1 = mksplinter(&set).encode_to_splinter_ref();
368        let ref2 = mksplinter(&set).encode_to_splinter_ref();
369        ref1 == ref2
370    }
371
372    #[quickcheck]
373    fn test_splinter_opt_ref_eq_quickcheck(set: Vec<u32>) -> bool {
374        let mut ref1 = mksplinter(&set);
375        ref1.optimize();
376        let ref1 = ref1.encode_to_splinter_ref();
377        let ref2 = mksplinter(&set).encode_to_splinter_ref();
378        ref1 == ref2
379    }
380
381    #[quickcheck]
382    fn test_splinter_ref_ne_quickcheck(set1: Vec<u32>, set2: Vec<u32>) -> TestResult {
383        if set1 == set2 {
384            TestResult::discard()
385        } else {
386            let ref1 = mksplinter(&set1).encode_to_splinter_ref();
387            let ref2 = mksplinter(&set2).encode_to_splinter_ref();
388            TestResult::from_bool(ref1 != ref2)
389        }
390    }
391
392    #[quickcheck]
393    fn test_splinter_opt_ref_ne_quickcheck(set1: Vec<u32>, set2: Vec<u32>) -> TestResult {
394        if set1 == set2 {
395            TestResult::discard()
396        } else {
397            let mut ref1 = mksplinter(&set1);
398            ref1.optimize();
399            let ref1 = ref1.encode_to_splinter_ref();
400            let ref2 = mksplinter(&set2).encode_to_splinter_ref();
401            TestResult::from_bool(ref1 != ref2)
402        }
403    }
404}