splinter_rs/splinter_ref.rs
1use std::{fmt::Debug, ops::Deref};
2
3use bytes::Bytes;
4use zerocopy::FromBytes;
5
6use crate::{
7 Splinter,
8 codec::{
9 DecodeErr, Encodable,
10 encoder::Encoder,
11 footer::{Footer, SPLINTER_V2_MAGIC},
12 partition_ref::PartitionRef,
13 },
14 level::High,
15 traits::PartitionRead,
16};
17
18/// A zero-copy reference to serialized splinter data.
19///
20/// `SplinterRef` allows efficient querying of compressed bitmap data without
21/// deserializing the underlying structure. It wraps any type that can be
22/// dereferenced to `[u8]` and provides all the same read operations as
23/// [`Splinter`], but with minimal memory overhead and no allocation during
24/// queries.
25///
26/// This is the preferred type for read-only operations on serialized splinter
27/// data, especially when the data comes from files, network, or other external
28/// sources.
29///
30/// # Type Parameter
31///
32/// - `B`: Any type that implements `Deref<Target = [u8]>` such as `&[u8]`,
33/// `Vec<u8>`, `Bytes`, `Arc<[u8]>`, etc.
34///
35/// # Examples
36///
37/// Creating from serialized bytes:
38///
39/// ```
40/// use splinter_rs::{Splinter, SplinterRef, PartitionWrite, PartitionRead, Encodable};
41///
42/// // Create and populate a splinter
43/// let mut splinter = Splinter::EMPTY;
44/// splinter.insert(100);
45/// splinter.insert(200);
46///
47/// // Serialize it to bytes
48/// let bytes = splinter.encode_to_bytes();
49///
50/// // Create a zero-copy reference
51/// let splinter_ref = SplinterRef::from_bytes(bytes).unwrap();
52/// assert_eq!(splinter_ref.cardinality(), 2);
53/// assert!(splinter_ref.contains(100));
54/// ```
55///
56/// Working with different buffer types:
57///
58/// ```
59/// use splinter_rs::{Splinter, SplinterRef, PartitionWrite, PartitionRead, Encodable};
60/// use std::sync::Arc;
61///
62/// let mut splinter = Splinter::EMPTY;
63/// splinter.insert(42);
64///
65/// let bytes = splinter.encode_to_bytes();
66/// let shared_bytes: Arc<[u8]> = bytes.to_vec().into();
67///
68/// let splinter_ref = SplinterRef::from_bytes(shared_bytes).unwrap();
69/// assert!(splinter_ref.contains(42));
70/// ```
71#[derive(Clone)]
72pub struct SplinterRef<B> {
73 pub(crate) data: B,
74}
75
76impl<B: Deref<Target = [u8]>> Debug for SplinterRef<B> {
77 fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
78 f.debug_tuple("SplinterRef")
79 .field(&self.load_unchecked())
80 .finish()
81 }
82}
83
84impl<B> SplinterRef<B> {
85 /// Returns a reference to the underlying data buffer.
86 ///
87 /// This provides access to the raw bytes that store the serialized splinter
88 /// data.
89 ///
90 /// # Examples
91 ///
92 /// ```
93 /// use splinter_rs::{Splinter, SplinterRef, PartitionWrite, Encodable};
94 ///
95 /// let mut splinter = Splinter::EMPTY;
96 /// splinter.insert(42);
97 /// let bytes = splinter.encode_to_bytes();
98 /// let splinter_ref = SplinterRef::from_bytes(bytes).unwrap();
99 ///
100 /// let inner_bytes = splinter_ref.inner();
101 /// assert!(!inner_bytes.is_empty());
102 /// ```
103 #[inline]
104 pub fn inner(&self) -> &B {
105 &self.data
106 }
107
108 /// Consumes the `SplinterRef` and returns the underlying data buffer.
109 ///
110 /// This is useful when you need to take ownership of the underlying data
111 /// after you're done querying the splinter.
112 ///
113 /// # Examples
114 ///
115 /// ```
116 /// use splinter_rs::{Splinter, SplinterRef, PartitionWrite, Encodable};
117 ///
118 /// let mut splinter = Splinter::EMPTY;
119 /// splinter.insert(42);
120 /// let bytes = splinter.encode_to_bytes();
121 /// let splinter_ref = SplinterRef::from_bytes(bytes.clone()).unwrap();
122 ///
123 /// let recovered_bytes = splinter_ref.into_inner();
124 /// assert_eq!(recovered_bytes, bytes);
125 /// ```
126 #[inline]
127 pub fn into_inner(self) -> B {
128 self.data
129 }
130}
131
132impl SplinterRef<Bytes> {
133 /// Returns a clone of the underlying bytes.
134 ///
135 /// This is efficient for `Bytes` since it uses reference counting
136 /// internally and doesn't actually copy the data.
137 ///
138 /// # Examples
139 ///
140 /// ```
141 /// use splinter_rs::{Splinter, PartitionWrite};
142 ///
143 /// let mut splinter = Splinter::EMPTY;
144 /// splinter.insert(42);
145 /// let splinter_ref = splinter.encode_to_splinter_ref();
146 ///
147 /// let bytes_copy = splinter_ref.encode_to_bytes();
148 /// assert!(!bytes_copy.is_empty());
149 /// ```
150 #[inline]
151 pub fn encode_to_bytes(&self) -> Bytes {
152 self.data.clone()
153 }
154}
155
156impl<B: Deref<Target = [u8]>> Encodable for SplinterRef<B> {
157 #[inline]
158 fn encoded_size(&self) -> usize {
159 self.data.len()
160 }
161
162 #[inline]
163 fn encode<T: bytes::BufMut>(&self, encoder: &mut Encoder<T>) {
164 encoder.write_splinter(&self.data);
165 }
166}
167
168impl<B: Deref<Target = [u8]>> SplinterRef<B> {
169 /// Converts this reference back to an owned [`Splinter`].
170 ///
171 /// This method deserializes the underlying data and creates a new owned
172 /// `Splinter` that supports mutation. This involves iterating through all
173 /// values and rebuilding the data structure, so it has a cost proportional
174 /// to the number of elements.
175 ///
176 /// # Examples
177 ///
178 /// ```
179 /// use splinter_rs::{Splinter, SplinterRef, PartitionWrite, PartitionRead};
180 ///
181 /// let mut original = Splinter::EMPTY;
182 /// original.insert(100);
183 /// original.insert(200);
184 ///
185 /// let splinter_ref = original.encode_to_splinter_ref();
186 /// let decoded = splinter_ref.decode_to_splinter();
187 ///
188 /// assert_eq!(decoded.cardinality(), 2);
189 /// assert!(decoded.contains(100));
190 /// assert!(decoded.contains(200));
191 /// ```
192 pub fn decode_to_splinter(&self) -> Splinter {
193 Splinter::from_iter(self.iter())
194 }
195
196 /// Creates a `SplinterRef` from raw bytes, validating the format.
197 ///
198 /// This method parses and validates the serialized splinter format, checking:
199 /// - Sufficient data length
200 /// - Valid magic bytes
201 /// - Correct checksum
202 ///
203 /// IMPORTANT: This method *does not* recursively verify the entire
204 /// splinter, opting instead to rely on the checksum to detect any
205 /// corruption. Do not use Splinter with untrusted data as it's trivial to
206 /// construct a Splinter which will cause your program to panic at runtime.
207 ///
208 /// Returns an error if the data is corrupted or in an invalid format.
209 ///
210 /// # Errors
211 ///
212 /// - [`DecodeErr::Length`]: Not enough bytes in the buffer
213 /// - [`DecodeErr::Magic`]: Invalid magic bytes
214 /// - [`DecodeErr::Checksum`]: Data corruption detected
215 /// - [`DecodeErr::Validity`]: Invalid internal structure
216 /// - [`DecodeErr::SplinterV1`]: Data is from incompatible v1 format
217 ///
218 /// # Examples
219 ///
220 /// ```
221 /// use splinter_rs::{Splinter, SplinterRef, PartitionWrite, PartitionRead, Encodable};
222 ///
223 /// let mut splinter = Splinter::EMPTY;
224 /// splinter.insert(42);
225 /// let bytes = splinter.encode_to_bytes();
226 ///
227 /// let splinter_ref = SplinterRef::from_bytes(bytes).unwrap();
228 /// assert!(splinter_ref.contains(42));
229 /// ```
230 ///
231 /// Error handling:
232 ///
233 /// ```
234 /// use splinter_rs::{SplinterRef, codec::DecodeErr};
235 ///
236 /// let invalid_bytes = vec![0u8; 5]; // Too short
237 /// let result = SplinterRef::from_bytes(invalid_bytes);
238 /// assert!(matches!(result, Err(DecodeErr::Length)));
239 /// ```
240 pub fn from_bytes(data: B) -> Result<Self, DecodeErr> {
241 pub(crate) const SPLINTER_V1_MAGIC: [u8; 4] = [0xDA, 0xAE, 0x12, 0xDF];
242 if data.len() >= 4
243 && data.starts_with(&SPLINTER_V1_MAGIC)
244 && !data.ends_with(&SPLINTER_V2_MAGIC)
245 {
246 return Err(DecodeErr::SplinterV1);
247 }
248
249 if data.len() < Footer::SIZE {
250 return Err(DecodeErr::Length);
251 }
252 let (partitions, footer) = data.split_at(data.len() - Footer::SIZE);
253 Footer::ref_from_bytes(footer)?.validate(partitions)?;
254 PartitionRef::<High>::from_suffix(partitions)?;
255 Ok(Self { data })
256 }
257
258 pub(crate) fn load_unchecked(&self) -> PartitionRef<'_, High> {
259 let without_footer = &self.data[..(self.data.len() - Footer::SIZE)];
260 PartitionRef::from_suffix(without_footer).unwrap()
261 }
262}
263
264impl<B: Deref<Target = [u8]>> PartitionRead<High> for SplinterRef<B> {
265 fn cardinality(&self) -> usize {
266 self.load_unchecked().cardinality()
267 }
268
269 fn is_empty(&self) -> bool {
270 self.load_unchecked().is_empty()
271 }
272
273 fn contains(&self, value: u32) -> bool {
274 self.load_unchecked().contains(value)
275 }
276
277 fn rank(&self, value: u32) -> usize {
278 self.load_unchecked().rank(value)
279 }
280
281 fn select(&self, idx: usize) -> Option<u32> {
282 self.load_unchecked().select(idx)
283 }
284
285 fn last(&self) -> Option<u32> {
286 self.load_unchecked().last()
287 }
288
289 fn iter(&self) -> impl Iterator<Item = u32> {
290 self.load_unchecked().into_iter()
291 }
292}
293
294impl<B: Deref<Target = [u8]>> PartialEq<Splinter> for SplinterRef<B> {
295 #[inline]
296 fn eq(&self, other: &Splinter) -> bool {
297 other == self
298 }
299}
300
301impl<B: Deref<Target = [u8]>, B2: Deref<Target = [u8]>> PartialEq<SplinterRef<B2>>
302 for SplinterRef<B>
303{
304 fn eq(&self, other: &SplinterRef<B2>) -> bool {
305 self.load_unchecked() == other.load_unchecked()
306 }
307}
308
309#[cfg(test)]
310mod test {
311 use quickcheck::TestResult;
312 use quickcheck_macros::quickcheck;
313
314 use crate::{
315 Optimizable, PartitionRead, Splinter,
316 testutil::{SetGen, mksplinter},
317 };
318
319 #[test]
320 fn test_empty() {
321 let splinter = mksplinter(&[]).encode_to_splinter_ref();
322
323 assert_eq!(splinter.decode_to_splinter(), Splinter::EMPTY);
324 assert!(!splinter.contains(0));
325 assert_eq!(splinter.cardinality(), 0);
326 assert_eq!(splinter.last(), None);
327 }
328
329 /// This is a regression test for a bug in the SplinterRef encoding. The bug
330 /// was that we used LittleEndian encoded values to store unaligned values,
331 /// which sort in reverse order from what we expect.
332 #[test]
333 fn test_contains_bug() {
334 let mut set_gen = SetGen::new(0xDEAD_BEEF);
335 let set = set_gen.random(1024);
336 let lookup = set[(set.len() / 3) as usize];
337 let splinter = mksplinter(&set).encode_to_splinter_ref();
338 assert!(splinter.contains(lookup))
339 }
340
341 #[quickcheck]
342 fn test_splinter_ref_quickcheck(set: Vec<u32>) -> bool {
343 let splinter = mksplinter(&set).encode_to_splinter_ref();
344 if set.is_empty() {
345 !splinter.contains(123)
346 } else {
347 let lookup = set[set.len() / 3];
348 splinter.contains(lookup)
349 }
350 }
351
352 #[quickcheck]
353 fn test_splinter_opt_ref_quickcheck(set: Vec<u32>) -> bool {
354 let mut splinter = mksplinter(&set);
355 splinter.optimize();
356 let splinter = splinter.encode_to_splinter_ref();
357 if set.is_empty() {
358 !splinter.contains(123)
359 } else {
360 let lookup = set[set.len() / 3];
361 splinter.contains(lookup)
362 }
363 }
364
365 #[quickcheck]
366 fn test_splinter_ref_eq_quickcheck(set: Vec<u32>) -> bool {
367 let ref1 = mksplinter(&set).encode_to_splinter_ref();
368 let ref2 = mksplinter(&set).encode_to_splinter_ref();
369 ref1 == ref2
370 }
371
372 #[quickcheck]
373 fn test_splinter_opt_ref_eq_quickcheck(set: Vec<u32>) -> bool {
374 let mut ref1 = mksplinter(&set);
375 ref1.optimize();
376 let ref1 = ref1.encode_to_splinter_ref();
377 let ref2 = mksplinter(&set).encode_to_splinter_ref();
378 ref1 == ref2
379 }
380
381 #[quickcheck]
382 fn test_splinter_ref_ne_quickcheck(set1: Vec<u32>, set2: Vec<u32>) -> TestResult {
383 if set1 == set2 {
384 TestResult::discard()
385 } else {
386 let ref1 = mksplinter(&set1).encode_to_splinter_ref();
387 let ref2 = mksplinter(&set2).encode_to_splinter_ref();
388 TestResult::from_bool(ref1 != ref2)
389 }
390 }
391
392 #[quickcheck]
393 fn test_splinter_opt_ref_ne_quickcheck(set1: Vec<u32>, set2: Vec<u32>) -> TestResult {
394 if set1 == set2 {
395 TestResult::discard()
396 } else {
397 let mut ref1 = mksplinter(&set1);
398 ref1.optimize();
399 let ref1 = ref1.encode_to_splinter_ref();
400 let ref2 = mksplinter(&set2).encode_to_splinter_ref();
401 TestResult::from_bool(ref1 != ref2)
402 }
403 }
404}