splinter_rs/splinter_ref.rs
1use std::{fmt::Debug, ops::Deref};
2
3use bytes::Bytes;
4use culprit::Culprit;
5use zerocopy::FromBytes;
6
7use crate::{
8 Splinter,
9 codec::{
10 DecodeErr, Encodable,
11 encoder::Encoder,
12 footer::{Footer, SPLINTER_V2_MAGIC},
13 partition_ref::PartitionRef,
14 },
15 level::High,
16 traits::PartitionRead,
17};
18
19/// A zero-copy reference to serialized splinter data.
20///
21/// `SplinterRef` allows efficient querying of compressed bitmap data without
22/// deserializing the underlying structure. It wraps any type that can be
23/// dereferenced to `[u8]` and provides all the same read operations as
24/// [`Splinter`], but with minimal memory overhead and no allocation during
25/// queries.
26///
27/// This is the preferred type for read-only operations on serialized splinter
28/// data, especially when the data comes from files, network, or other external
29/// sources.
30///
31/// # Type Parameter
32///
33/// - `B`: Any type that implements `Deref<Target = [u8]>` such as `&[u8]`,
34/// `Vec<u8>`, `Bytes`, `Arc<[u8]>`, etc.
35///
36/// # Examples
37///
38/// Creating from serialized bytes:
39///
40/// ```
41/// use splinter_rs::{Splinter, SplinterRef, PartitionWrite, PartitionRead, Encodable};
42///
43/// // Create and populate a splinter
44/// let mut splinter = Splinter::EMPTY;
45/// splinter.insert(100);
46/// splinter.insert(200);
47///
48/// // Serialize it to bytes
49/// let bytes = splinter.encode_to_bytes();
50///
51/// // Create a zero-copy reference
52/// let splinter_ref = SplinterRef::from_bytes(bytes).unwrap();
53/// assert_eq!(splinter_ref.cardinality(), 2);
54/// assert!(splinter_ref.contains(100));
55/// ```
56///
57/// Working with different buffer types:
58///
59/// ```
60/// use splinter_rs::{Splinter, SplinterRef, PartitionWrite, PartitionRead, Encodable};
61/// use std::sync::Arc;
62///
63/// let mut splinter = Splinter::EMPTY;
64/// splinter.insert(42);
65///
66/// let bytes = splinter.encode_to_bytes();
67/// let shared_bytes: Arc<[u8]> = bytes.to_vec().into();
68///
69/// let splinter_ref = SplinterRef::from_bytes(shared_bytes).unwrap();
70/// assert!(splinter_ref.contains(42));
71/// ```
72#[derive(Clone)]
73pub struct SplinterRef<B> {
74 pub(crate) data: B,
75}
76
77impl<B: Deref<Target = [u8]>> Debug for SplinterRef<B> {
78 fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
79 f.debug_tuple("SplinterRef")
80 .field(&self.load_unchecked())
81 .finish()
82 }
83}
84
85impl<B> SplinterRef<B> {
86 /// Returns a reference to the underlying data buffer.
87 ///
88 /// This provides access to the raw bytes that store the serialized splinter
89 /// data.
90 ///
91 /// # Examples
92 ///
93 /// ```
94 /// use splinter_rs::{Splinter, SplinterRef, PartitionWrite, Encodable};
95 ///
96 /// let mut splinter = Splinter::EMPTY;
97 /// splinter.insert(42);
98 /// let bytes = splinter.encode_to_bytes();
99 /// let splinter_ref = SplinterRef::from_bytes(bytes).unwrap();
100 ///
101 /// let inner_bytes = splinter_ref.inner();
102 /// assert!(!inner_bytes.is_empty());
103 /// ```
104 #[inline]
105 pub fn inner(&self) -> &B {
106 &self.data
107 }
108
109 /// Consumes the `SplinterRef` and returns the underlying data buffer.
110 ///
111 /// This is useful when you need to take ownership of the underlying data
112 /// after you're done querying the splinter.
113 ///
114 /// # Examples
115 ///
116 /// ```
117 /// use splinter_rs::{Splinter, SplinterRef, PartitionWrite, Encodable};
118 ///
119 /// let mut splinter = Splinter::EMPTY;
120 /// splinter.insert(42);
121 /// let bytes = splinter.encode_to_bytes();
122 /// let splinter_ref = SplinterRef::from_bytes(bytes.clone()).unwrap();
123 ///
124 /// let recovered_bytes = splinter_ref.into_inner();
125 /// assert_eq!(recovered_bytes, bytes);
126 /// ```
127 #[inline]
128 pub fn into_inner(self) -> B {
129 self.data
130 }
131}
132
133impl SplinterRef<Bytes> {
134 /// Returns a clone of the underlying bytes.
135 ///
136 /// This is efficient for `Bytes` since it uses reference counting
137 /// internally and doesn't actually copy the data.
138 ///
139 /// # Examples
140 ///
141 /// ```
142 /// use splinter_rs::{Splinter, PartitionWrite};
143 ///
144 /// let mut splinter = Splinter::EMPTY;
145 /// splinter.insert(42);
146 /// let splinter_ref = splinter.encode_to_splinter_ref();
147 ///
148 /// let bytes_copy = splinter_ref.encode_to_bytes();
149 /// assert!(!bytes_copy.is_empty());
150 /// ```
151 #[inline]
152 pub fn encode_to_bytes(&self) -> Bytes {
153 self.data.clone()
154 }
155}
156
157impl<B: Deref<Target = [u8]>> Encodable for SplinterRef<B> {
158 #[inline]
159 fn encoded_size(&self) -> usize {
160 self.data.len()
161 }
162
163 #[inline]
164 fn encode<T: bytes::BufMut>(&self, encoder: &mut Encoder<T>) {
165 encoder.write_splinter(&self.data);
166 }
167}
168
169impl<B: Deref<Target = [u8]>> SplinterRef<B> {
170 /// Converts this reference back to an owned [`Splinter`].
171 ///
172 /// This method deserializes the underlying data and creates a new owned
173 /// `Splinter` that supports mutation. This involves iterating through all
174 /// values and rebuilding the data structure, so it has a cost proportional
175 /// to the number of elements.
176 ///
177 /// # Examples
178 ///
179 /// ```
180 /// use splinter_rs::{Splinter, SplinterRef, PartitionWrite, PartitionRead};
181 ///
182 /// let mut original = Splinter::EMPTY;
183 /// original.insert(100);
184 /// original.insert(200);
185 ///
186 /// let splinter_ref = original.encode_to_splinter_ref();
187 /// let decoded = splinter_ref.decode_to_splinter();
188 ///
189 /// assert_eq!(decoded.cardinality(), 2);
190 /// assert!(decoded.contains(100));
191 /// assert!(decoded.contains(200));
192 /// ```
193 pub fn decode_to_splinter(&self) -> Splinter {
194 Splinter::new((&self.load_unchecked()).into())
195 }
196
197 /// Creates a `SplinterRef` from raw bytes, validating the format.
198 ///
199 /// This method parses and validates the serialized splinter format, checking:
200 /// - Sufficient data length
201 /// - Valid magic bytes
202 /// - Correct checksum
203 ///
204 /// IMPORTANT: This method *does not* recursively verify the entire
205 /// splinter, opting instead to rely on the checksum to detect any
206 /// corruption. Do not use Splinter with untrusted data as it's trivial to
207 /// construct a Splinter which will cause your program to panic at runtime.
208 ///
209 /// Returns an error if the data is corrupted or in an invalid format.
210 ///
211 /// # Errors
212 ///
213 /// - [`DecodeErr::Length`]: Not enough bytes in the buffer
214 /// - [`DecodeErr::Magic`]: Invalid magic bytes
215 /// - [`DecodeErr::Checksum`]: Data corruption detected
216 /// - [`DecodeErr::Validity`]: Invalid internal structure
217 /// - [`DecodeErr::SplinterV1`]: Data is from incompatible v1 format
218 ///
219 /// # Examples
220 ///
221 /// ```
222 /// use splinter_rs::{Splinter, SplinterRef, PartitionWrite, PartitionRead, Encodable};
223 ///
224 /// let mut splinter = Splinter::EMPTY;
225 /// splinter.insert(42);
226 /// let bytes = splinter.encode_to_bytes();
227 ///
228 /// let splinter_ref = SplinterRef::from_bytes(bytes).unwrap();
229 /// assert!(splinter_ref.contains(42));
230 /// ```
231 ///
232 /// Error handling:
233 ///
234 /// ```
235 /// use splinter_rs::{SplinterRef, codec::DecodeErr};
236 ///
237 /// let invalid_bytes = vec![0u8; 5]; // Too short
238 /// let result = SplinterRef::from_bytes(invalid_bytes);
239 /// assert!(matches!(result.unwrap_err().ctx(), DecodeErr::Length));
240 /// ```
241 pub fn from_bytes(data: B) -> culprit::Result<Self, DecodeErr> {
242 pub(crate) const SPLINTER_V1_MAGIC: [u8; 4] = [0xDA, 0xAE, 0x12, 0xDF];
243 if data.len() >= 4
244 && data.starts_with(&SPLINTER_V1_MAGIC)
245 && !data.ends_with(&SPLINTER_V2_MAGIC)
246 {
247 return Err(Culprit::new(DecodeErr::SplinterV1));
248 }
249
250 if data.len() < Footer::SIZE {
251 return Err(Culprit::new(DecodeErr::Length));
252 }
253 let (partitions, footer) = data.split_at(data.len() - Footer::SIZE);
254 Footer::ref_from_bytes(footer)?.validate(partitions)?;
255 PartitionRef::<High>::from_suffix(partitions)?;
256 Ok(Self { data })
257 }
258
259 pub(crate) fn load_unchecked(&self) -> PartitionRef<'_, High> {
260 let without_footer = &self.data[..(self.data.len() - Footer::SIZE)];
261 PartitionRef::from_suffix(without_footer).unwrap()
262 }
263}
264
265impl<B: Deref<Target = [u8]>> PartitionRead<High> for SplinterRef<B> {
266 fn cardinality(&self) -> usize {
267 self.load_unchecked().cardinality()
268 }
269
270 fn is_empty(&self) -> bool {
271 self.load_unchecked().is_empty()
272 }
273
274 fn contains(&self, value: u32) -> bool {
275 self.load_unchecked().contains(value)
276 }
277
278 fn rank(&self, value: u32) -> usize {
279 self.load_unchecked().rank(value)
280 }
281
282 fn select(&self, idx: usize) -> Option<u32> {
283 self.load_unchecked().select(idx)
284 }
285
286 fn last(&self) -> Option<u32> {
287 self.load_unchecked().last()
288 }
289
290 fn iter(&self) -> impl Iterator<Item = u32> {
291 self.load_unchecked().into_iter()
292 }
293}
294
295#[cfg(test)]
296mod test {
297 use proptest::{collection::vec, prop_assume, proptest};
298
299 use crate::{
300 Optimizable, PartitionRead, Splinter,
301 testutil::{SetGen, mksplinter},
302 };
303
304 #[test]
305 fn test_empty() {
306 let splinter = mksplinter(&[]).encode_to_splinter_ref();
307
308 assert_eq!(splinter.decode_to_splinter(), Splinter::EMPTY);
309 assert!(!splinter.contains(0));
310 assert_eq!(splinter.cardinality(), 0);
311 assert_eq!(splinter.last(), None);
312 }
313
314 /// This is a regression test for a bug in the SplinterRef encoding. The bug
315 /// was that we used LittleEndian encoded values to store unaligned values,
316 /// which sort in reverse order from what we expect.
317 #[test]
318 fn test_contains_bug() {
319 let mut set_gen = SetGen::new(0xDEAD_BEEF);
320 let set = set_gen.random(1024);
321 let lookup = set[(set.len() / 3) as usize];
322 let splinter = mksplinter(&set).encode_to_splinter_ref();
323 assert!(splinter.contains(lookup))
324 }
325
326 proptest! {
327 #[test]
328 fn test_splinter_ref_proptest(set in vec(0u32..16384, 0..1024)) {
329 let splinter = mksplinter(&set).encode_to_splinter_ref();
330 if set.is_empty() {
331 assert!(!splinter.contains(123))
332 } else {
333 let lookup = set[set.len() / 3];
334 assert!(splinter.contains(lookup))
335 }
336 }
337
338 #[test]
339 fn test_splinter_opt_ref_proptest(set in vec(0u32..16384, 0..1024)) {
340 let mut splinter = mksplinter(&set);
341 splinter.optimize();
342 let splinter = splinter.encode_to_splinter_ref();
343 if set.is_empty() {
344 assert!(!splinter.contains(123))
345 } else {
346 let lookup = set[set.len() / 3];
347 assert!(splinter.contains(lookup))
348 }
349 }
350
351 #[test]
352 fn test_splinter_ref_eq_proptest(set in vec(0u32..16384, 0..1024)) {
353 let ref1 = mksplinter(&set).encode_to_splinter_ref();
354 let ref2 = mksplinter(&set).encode_to_splinter_ref();
355 assert_eq!(ref1, ref2)
356 }
357
358 #[test]
359 fn test_splinter_opt_ref_eq_proptest(set in vec(0u32..16384, 0..1024)) {
360 let mut ref1 = mksplinter(&set);
361 ref1.optimize();
362 let ref1 = ref1.encode_to_splinter_ref();
363 let ref2 = mksplinter(&set).encode_to_splinter_ref();
364 assert_eq!(ref1, ref2)
365 }
366
367 #[test]
368 fn test_splinter_ref_ne_proptest(
369 set1 in vec(0u32..16384, 0..1024),
370 set2 in vec(0u32..16384, 0..1024),
371 ) {
372 prop_assume!(set1 != set2);
373
374 let ref1 = mksplinter(&set1).encode_to_splinter_ref();
375 let ref2 = mksplinter(&set2).encode_to_splinter_ref();
376 assert_ne!(ref1, ref2)
377 }
378
379 #[test]
380 fn test_splinter_opt_ref_ne_proptest(
381 set1 in vec(0u32..16384, 0..1024),
382 set2 in vec(0u32..16384, 0..1024),
383 ) {
384 prop_assume!(set1 != set2);
385
386 let mut ref1 = mksplinter(&set1);
387 ref1.optimize();
388 let ref1 = ref1.encode_to_splinter_ref();
389 let ref2 = mksplinter(&set2).encode_to_splinter_ref();
390 assert_ne!(ref1 ,ref2)
391 }
392 }
393
394 #[test]
395 fn test_ref_wat() {
396 #[rustfmt::skip]
397 let set = [ 6400, 11776, 768, 15872, 6912, 0, 11008, 769, 770, 11009, 4608, 771, 0, 768, 6401, 0, 8192, 8192, 4609, 772, 4610, 0, 0, 0, 0, 0, 768, 773, 774, 14336, 0, 0, 0, 15872, 11010, 775, 0, 768, 11777, 776, 0, 0, 0, 6400, 14337, 8193, 0, 0, 0, 0, 0, 0, 0, ];
398 let mut ref1 = mksplinter(&set);
399 ref1.optimize();
400 let ref1 = ref1.encode_to_splinter_ref();
401 let ref2 = mksplinter(&set).encode_to_splinter_ref();
402 assert_eq!(ref1, ref2)
403 }
404}