splinter_rs/splinter.rs
1use std::{fmt::Debug, ops::RangeBounds};
2
3use bytes::Bytes;
4
5use crate::{
6 Encodable, Optimizable, SplinterRef,
7 codec::{encoder::Encoder, footer::Footer},
8 level::High,
9 partition::Partition,
10 traits::{PartitionRead, PartitionWrite},
11 util::RangeExt,
12};
13
14/// A compressed bitmap optimized for small, sparse sets of 32-bit unsigned integers.
15///
16/// `Splinter` is the main owned data structure that can be built incrementally by inserting
17/// values and then optimized for size and query performance. It uses a 256-way tree structure
18/// by decomposing integers into big-endian component bytes, with nodes optimized into four
19/// different storage classes: tree, vec, bitmap, and run.
20///
21/// For zero-copy querying of serialized data, see [`SplinterRef`].
22/// For a clone-on-write wrapper, see [`crate::CowSplinter`].
23///
24/// # Examples
25///
26/// Basic usage:
27///
28/// ```
29/// use splinter_rs::{Splinter, PartitionWrite, PartitionRead, Optimizable};
30///
31/// let mut splinter = Splinter::from_iter([1024, 2048, 123]);
32///
33/// // Check membership
34/// assert!(splinter.contains(1024));
35/// assert!(!splinter.contains(999));
36///
37/// // Get cardinality
38/// assert_eq!(splinter.cardinality(), 3);
39///
40/// // Optimize for better compression, recommended before encoding to bytes.
41/// splinter.optimize();
42/// ```
43///
44/// Building from iterator:
45///
46/// ```
47/// use splinter_rs::{Splinter, PartitionRead};
48///
49/// let values = vec![100, 200, 300, 400];
50/// let splinter: Splinter = values.into_iter().collect();
51///
52/// assert_eq!(splinter.cardinality(), 4);
53/// assert!(splinter.contains(200));
54/// ```
55#[derive(Clone, PartialEq, Eq, Default, Debug)]
56pub struct Splinter(Partition<High>);
57
58static_assertions::const_assert_eq!(std::mem::size_of::<Splinter>(), 40);
59
60impl Splinter {
61 /// An empty Splinter, suitable for usage in a const context.
62 pub const EMPTY: Self = Splinter(Partition::EMPTY);
63
64 /// A full Splinter, suitable for usage in a const context.
65 pub const FULL: Self = Splinter(Partition::Full);
66
67 /// Encodes this splinter into a [`SplinterRef`] for zero-copy querying.
68 ///
69 /// This method serializes the splinter data and returns a [`SplinterRef<Bytes>`]
70 /// that can be used for efficient read-only operations without deserializing
71 /// the underlying data structure.
72 ///
73 /// # Examples
74 ///
75 /// ```
76 /// use splinter_rs::{Splinter, PartitionWrite, PartitionRead};
77 ///
78 /// let mut splinter = Splinter::from_iter([42, 1337]);
79 ///
80 /// let splinter_ref = splinter.encode_to_splinter_ref();
81 /// assert_eq!(splinter_ref.cardinality(), 2);
82 /// assert!(splinter_ref.contains(42));
83 /// ```
84 pub fn encode_to_splinter_ref(&self) -> SplinterRef<Bytes> {
85 SplinterRef { data: self.encode_to_bytes() }
86 }
87
88 #[inline(always)]
89 pub(crate) fn new(inner: Partition<High>) -> Self {
90 Self(inner)
91 }
92
93 #[inline(always)]
94 pub(crate) fn inner(&self) -> &Partition<High> {
95 &self.0
96 }
97
98 #[inline(always)]
99 pub(crate) fn inner_mut(&mut self) -> &mut Partition<High> {
100 &mut self.0
101 }
102}
103
104impl FromIterator<u32> for Splinter {
105 fn from_iter<I: IntoIterator<Item = u32>>(iter: I) -> Self {
106 Self(Partition::<High>::from_iter(iter))
107 }
108}
109
110impl<R: RangeBounds<u32>> From<R> for Splinter {
111 fn from(range: R) -> Self {
112 if let Some(range) = range.try_into_inclusive() {
113 if range.start() == &u32::MIN && range.end() == &u32::MAX {
114 Self::FULL
115 } else {
116 Self(Partition::<High>::from(range))
117 }
118 } else {
119 // range is empty
120 Self::EMPTY
121 }
122 }
123}
124
125impl PartitionRead<High> for Splinter {
126 /// Returns the total number of elements in this splinter.
127 ///
128 /// # Examples
129 ///
130 /// ```
131 /// use splinter_rs::{Splinter, PartitionRead, PartitionWrite};
132 ///
133 /// let mut splinter = Splinter::EMPTY;
134 /// assert_eq!(splinter.cardinality(), 0);
135 ///
136 /// let splinter = Splinter::from_iter([100, 200, 300]);
137 /// assert_eq!(splinter.cardinality(), 3);
138 /// ```
139 #[inline]
140 fn cardinality(&self) -> usize {
141 self.0.cardinality()
142 }
143
144 /// Returns `true` if this splinter contains no elements.
145 ///
146 /// # Examples
147 ///
148 /// ```
149 /// use splinter_rs::{Splinter, PartitionRead, PartitionWrite};
150 ///
151 /// let mut splinter = Splinter::EMPTY;
152 /// assert!(splinter.is_empty());
153 ///
154 /// let splinter = Splinter::from_iter([42]);
155 /// assert!(!splinter.is_empty());
156 /// ```
157 #[inline]
158 fn is_empty(&self) -> bool {
159 self.0.is_empty()
160 }
161
162 /// Returns `true` if this splinter contains the specified value.
163 ///
164 /// # Examples
165 ///
166 /// ```
167 /// use splinter_rs::{Splinter, PartitionRead, PartitionWrite};
168 ///
169 /// let splinter = Splinter::from_iter([42, 1337]);
170 ///
171 /// assert!(splinter.contains(42));
172 /// assert!(splinter.contains(1337));
173 /// assert!(!splinter.contains(999));
174 /// ```
175 #[inline]
176 fn contains(&self, value: u32) -> bool {
177 self.0.contains(value)
178 }
179
180 /// Returns the 0-based position of the value in this splinter if it exists.
181 ///
182 /// This method searches for the given value in the splinter and returns its position
183 /// in the sorted sequence of all elements. If the value doesn't exist, returns `None`.
184 ///
185 /// # Examples
186 ///
187 /// ```
188 /// use splinter_rs::{Splinter, PartitionRead, PartitionWrite};
189 ///
190 /// let splinter = Splinter::from_iter([10, 20, 30]);
191 ///
192 /// assert_eq!(splinter.position(10), Some(0));
193 /// assert_eq!(splinter.position(20), Some(1));
194 /// assert_eq!(splinter.position(30), Some(2));
195 /// assert_eq!(splinter.position(25), None); // doesn't exist
196 /// ```
197 #[inline]
198 fn position(&self, value: u32) -> Option<usize> {
199 self.0.position(value)
200 }
201
202 /// Returns the number of elements in this splinter that are less than or equal to the given value.
203 ///
204 /// This is also known as the "rank" of the value in the sorted sequence of all elements.
205 ///
206 /// # Examples
207 ///
208 /// ```
209 /// use splinter_rs::{Splinter, PartitionRead, PartitionWrite};
210 ///
211 /// let splinter = Splinter::from_iter([10, 20, 30]);
212 ///
213 /// assert_eq!(splinter.rank(5), 0); // No elements <= 5
214 /// assert_eq!(splinter.rank(10), 1); // One element <= 10
215 /// assert_eq!(splinter.rank(25), 2); // Two elements <= 25
216 /// assert_eq!(splinter.rank(30), 3); // Three elements <= 30
217 /// assert_eq!(splinter.rank(50), 3); // Three elements <= 50
218 /// ```
219 #[inline]
220 fn rank(&self, value: u32) -> usize {
221 self.0.rank(value)
222 }
223
224 /// Returns the element at the given index in the sorted sequence, or `None` if the index is out of bounds.
225 ///
226 /// The index is 0-based, so `select(0)` returns the smallest element.
227 ///
228 /// # Examples
229 ///
230 /// ```
231 /// use splinter_rs::{Splinter, PartitionRead, PartitionWrite};
232 ///
233 /// let splinter = Splinter::from_iter([100, 50, 200]);
234 ///
235 /// assert_eq!(splinter.select(0), Some(50)); // Smallest element
236 /// assert_eq!(splinter.select(1), Some(100)); // Second smallest
237 /// assert_eq!(splinter.select(2), Some(200)); // Largest element
238 /// assert_eq!(splinter.select(3), None); // Out of bounds
239 /// ```
240 #[inline]
241 fn select(&self, idx: usize) -> Option<u32> {
242 self.0.select(idx)
243 }
244
245 /// Returns the largest element in this splinter, or `None` if it's empty.
246 ///
247 /// # Examples
248 ///
249 /// ```
250 /// use splinter_rs::{Splinter, PartitionRead, PartitionWrite};
251 ///
252 /// let mut splinter = Splinter::EMPTY;
253 /// assert_eq!(splinter.last(), None);
254 ///
255 /// let splinter = Splinter::from_iter([100, 50, 200]);
256 ///
257 /// assert_eq!(splinter.last(), Some(200));
258 /// ```
259 #[inline]
260 fn last(&self) -> Option<u32> {
261 self.0.last()
262 }
263
264 /// Returns an iterator over all elements in ascending order.
265 ///
266 /// # Examples
267 ///
268 /// ```
269 /// use splinter_rs::{Splinter, PartitionRead, PartitionWrite};
270 ///
271 /// let splinter = Splinter::from_iter([300, 100, 200]);
272 ///
273 /// let values: Vec<u32> = splinter.iter().collect();
274 /// assert_eq!(values, vec![100, 200, 300]);
275 /// ```
276 #[inline]
277 fn iter(&self) -> impl Iterator<Item = u32> {
278 self.0.iter()
279 }
280
281 /// Returns `true` if this splinter contains all values in the specified range.
282 ///
283 /// This method checks whether every value within the given range bounds is present
284 /// in the splinter. An empty range is trivially contained and returns `true`.
285 ///
286 /// # Examples
287 ///
288 /// ```
289 /// use splinter_rs::{Splinter, PartitionRead};
290 ///
291 /// let splinter = Splinter::from_iter([10, 11, 12, 13, 14, 15, 100]);
292 ///
293 /// // Check if range is fully contained
294 /// assert!(splinter.contains_all(10..=15));
295 /// assert!(splinter.contains_all(11..=14));
296 ///
297 /// // Missing values mean the range is not fully contained
298 /// assert!(!splinter.contains_all(10..=16)); // 16 is missing
299 /// assert!(!splinter.contains_all(9..=15)); // 9 is missing
300 ///
301 /// // Empty ranges are trivially contained
302 /// assert!(splinter.contains_all(50..50));
303 /// ```
304 #[inline]
305 fn contains_all<R: RangeBounds<u32>>(&self, values: R) -> bool {
306 self.0.contains_all(values)
307 }
308
309 /// Returns `true` if this splinter has a non-empty intersection with the specified range.
310 ///
311 /// This method checks whether any value within the given range is present
312 /// in the splinter. Returns `false` for empty ranges.
313 ///
314 /// # Examples
315 ///
316 /// ```
317 /// use splinter_rs::{Splinter, PartitionRead};
318 ///
319 /// let splinter = Splinter::from_iter([10, 20, 30]);
320 ///
321 /// // Check for any overlap
322 /// assert!(splinter.contains_any(10..=15)); // Contains 10
323 /// assert!(splinter.contains_any(5..=10)); // Contains 10
324 /// assert!(splinter.contains_any(25..=35)); // Contains 30
325 ///
326 /// // No overlap
327 /// assert!(!splinter.contains_any(0..=9)); // No values in range
328 /// assert!(!splinter.contains_any(40..=50)); // No values in range
329 ///
330 /// // Empty ranges have no intersection
331 /// assert!(!splinter.contains_any(50..50));
332 /// ```
333 #[inline]
334 fn contains_any<R: RangeBounds<u32>>(&self, values: R) -> bool {
335 self.0.contains_any(values)
336 }
337}
338
339impl PartitionWrite<High> for Splinter {
340 /// Inserts a value into this splinter.
341 ///
342 /// Returns `true` if the value was newly inserted, or `false` if it was already present.
343 ///
344 /// # Examples
345 ///
346 /// ```
347 /// use splinter_rs::{Splinter, PartitionWrite, PartitionRead};
348 ///
349 /// let mut splinter = Splinter::EMPTY;
350 ///
351 /// // First insertion returns true
352 /// assert!(splinter.insert(42));
353 /// assert_eq!(splinter.cardinality(), 1);
354 ///
355 /// // Second insertion of same value returns false
356 /// assert!(!splinter.insert(42));
357 /// assert_eq!(splinter.cardinality(), 1);
358 ///
359 /// // Different value returns true
360 /// assert!(splinter.insert(100));
361 /// assert_eq!(splinter.cardinality(), 2);
362 /// ```
363 #[inline]
364 fn insert(&mut self, value: u32) -> bool {
365 self.0.insert(value)
366 }
367
368 /// Removes a value from this splinter.
369 ///
370 /// Returns `true` if the value was present and removed, or `false` if it was not present.
371 ///
372 /// # Examples
373 ///
374 /// ```
375 /// use splinter_rs::{Splinter, PartitionWrite, PartitionRead};
376 ///
377 /// let mut splinter = Splinter::from_iter([42, 100]);
378 /// assert_eq!(splinter.cardinality(), 2);
379 ///
380 /// // Remove existing value
381 /// assert!(splinter.remove(42));
382 /// assert_eq!(splinter.cardinality(), 1);
383 /// assert!(!splinter.contains(42));
384 /// assert!(splinter.contains(100));
385 ///
386 /// // Remove non-existent value
387 /// assert!(!splinter.remove(999));
388 /// assert_eq!(splinter.cardinality(), 1);
389 /// ```
390 #[inline]
391 fn remove(&mut self, value: u32) -> bool {
392 self.0.remove(value)
393 }
394
395 /// Removes a range of values from this splinter.
396 ///
397 /// This method removes all values that fall within the specified range bounds.
398 /// The range can be inclusive, exclusive, or half-bounded using standard Rust range syntax.
399 ///
400 /// # Examples
401 ///
402 /// ```
403 /// use splinter_rs::{Splinter, PartitionRead, PartitionWrite};
404 ///
405 /// let mut splinter = Splinter::from_iter(1..=10);
406 ///
407 /// // Remove values 3 through 7 (inclusive)
408 /// splinter.remove_range(3..=7);
409 /// assert!(!splinter.contains(5));
410 /// assert!(splinter.contains(2));
411 /// assert!(splinter.contains(8));
412 ///
413 /// // Remove from 9 onwards
414 /// splinter.remove_range(9..);
415 /// assert!(!splinter.contains(9));
416 /// assert!(!splinter.contains(10));
417 /// assert!(splinter.contains(8));
418 /// ```
419 #[inline]
420 fn remove_range<R: RangeBounds<u32>>(&mut self, values: R) {
421 self.0.remove_range(values);
422 }
423}
424
425impl Encodable for Splinter {
426 fn encoded_size(&self) -> usize {
427 self.0.encoded_size() + std::mem::size_of::<Footer>()
428 }
429
430 fn encode<B: bytes::BufMut>(&self, encoder: &mut Encoder<B>) {
431 self.0.encode(encoder);
432 encoder.write_footer();
433 }
434}
435
436impl Optimizable for Splinter {
437 #[inline]
438 fn optimize(&mut self) {
439 self.0.optimize();
440 }
441}
442
443impl Extend<u32> for Splinter {
444 #[inline]
445 fn extend<T: IntoIterator<Item = u32>>(&mut self, iter: T) {
446 self.0.extend(iter);
447 }
448}
449
450#[cfg(test)]
451mod tests {
452 use std::ops::Bound;
453
454 use super::*;
455 use crate::{
456 codec::Encodable,
457 level::{Level, Low},
458 testutil::{SetGen, mksplinter, ratio_to_marks, test_partition_read, test_partition_write},
459 traits::Optimizable,
460 };
461 use itertools::{Itertools, assert_equal};
462 use proptest::{
463 collection::{hash_set, vec},
464 proptest,
465 };
466 use rand::{SeedableRng, seq::index};
467 use roaring::RoaringBitmap;
468
469 #[test]
470 fn test_sanity() {
471 let mut splinter = Splinter::EMPTY;
472
473 assert!(splinter.insert(1));
474 assert!(!splinter.insert(1));
475 assert!(splinter.contains(1));
476
477 let values = [1024, 123, 16384];
478 for v in values {
479 assert!(splinter.insert(v));
480 assert!(splinter.contains(v));
481 assert!(!splinter.contains(v + 1));
482 }
483
484 for i in 0..8192 + 10 {
485 splinter.insert(i);
486 }
487
488 splinter.optimize();
489
490 dbg!(&splinter);
491
492 let expected = splinter.iter().collect_vec();
493 test_partition_read(&splinter, &expected);
494 test_partition_write(&mut splinter);
495 }
496
497 #[test]
498 fn test_wat() {
499 let mut set_gen = SetGen::new(0xDEAD_BEEF);
500 let set = set_gen.random_max(64, 4096);
501 let baseline_size = set.len() * 4;
502
503 let mut splinter = Splinter::from_iter(set.iter().copied());
504 splinter.optimize();
505
506 dbg!(&splinter, splinter.encoded_size(), baseline_size, set.len());
507 itertools::assert_equal(splinter.iter(), set.into_iter());
508 }
509
510 #[test]
511 fn test_splinter_write() {
512 let mut splinter = Splinter::from_iter(0u32..16384);
513 test_partition_write(&mut splinter);
514 }
515
516 #[test]
517 fn test_splinter_optimize_growth() {
518 let mut splinter = Splinter::EMPTY;
519 let mut rng = rand::rngs::StdRng::seed_from_u64(0xdeadbeef);
520 let set = index::sample(&mut rng, Low::MAX_LEN, 8);
521 dbg!(&splinter);
522 for i in set {
523 splinter.insert(i as u32);
524 dbg!(&splinter);
525 }
526 }
527
528 #[test]
529 fn test_splinter_from_range() {
530 let splinter = Splinter::from(..);
531 assert_eq!(splinter.cardinality(), (u32::MAX as usize) + 1);
532
533 let mut splinter = Splinter::from(1..);
534 assert_eq!(splinter.cardinality(), u32::MAX as usize);
535
536 splinter.remove(1024);
537 assert_eq!(splinter.cardinality(), (u32::MAX as usize) - 1);
538
539 let mut count = 1;
540 for i in (2048..=256000).step_by(1024) {
541 splinter.remove(i);
542 count += 1
543 }
544 assert_eq!(splinter.cardinality(), (u32::MAX as usize) - count);
545 }
546
547 proptest! {
548 #[test]
549 fn test_splinter_read_proptest(set in hash_set(0u32..16384, 0..1024)) {
550 let expected = set.iter().copied().sorted().collect_vec();
551 test_partition_read(&Splinter::from_iter(set), &expected);
552 }
553
554
555 #[test]
556 fn test_splinter_proptest(set in vec(0u32..16384, 0..1024)) {
557 let splinter = mksplinter(&set);
558 if set.is_empty() {
559 assert!(!splinter.contains(123));
560 } else {
561 let lookup = set[set.len() / 3];
562 assert!(splinter.contains(lookup));
563 }
564 }
565
566 #[test]
567 fn test_splinter_opt_proptest(set in vec(0u32..16384, 0..1024)) {
568 let mut splinter = mksplinter(&set);
569 splinter.optimize();
570 if set.is_empty() {
571 assert!(!splinter.contains(123));
572 } else {
573 let lookup = set[set.len() / 3];
574 assert!(splinter.contains(lookup));
575 }
576 }
577
578 #[test]
579 fn test_splinter_eq_proptest(set in vec(0u32..16384, 0..1024)) {
580 let a = mksplinter(&set);
581 assert_eq!(a, a.clone());
582 }
583
584 #[test]
585 fn test_splinter_opt_eq_proptest(set in vec(0u32..16384, 0..1024)) {
586 let mut a = mksplinter(&set);
587 let b = mksplinter(&set);
588 a.optimize();
589 assert_eq!(a, b);
590 }
591
592 #[test]
593 fn test_splinter_remove_range_proptest(set in hash_set(0u32..16384, 0..1024)) {
594 let expected = set.iter().copied().sorted().collect_vec();
595 let mut splinter = mksplinter(&expected);
596 if let Some(last) = expected.last() {
597 splinter.remove_range((Bound::Excluded(last), Bound::Unbounded));
598 assert_equal(splinter.iter(), expected);
599 }
600 }
601 }
602
603 #[test]
604 fn test_expected_compression() {
605 fn to_roaring(set: impl Iterator<Item = u32>) -> Vec<u8> {
606 let mut buf = std::io::Cursor::new(Vec::new());
607 let mut bmp = RoaringBitmap::from_sorted_iter(set).unwrap();
608 bmp.optimize();
609 bmp.serialize_into(&mut buf).unwrap();
610 buf.into_inner()
611 }
612
613 struct Report {
614 name: String,
615 baseline: usize,
616 // (actual, expected)
617 splinter: (usize, usize),
618 roaring: (usize, usize),
619
620 splinter_lz4: usize,
621 roaring_lz4: usize,
622 }
623
624 let mut reports = vec![];
625
626 let mut run_test = |name: &str,
627 set: Vec<u32>,
628 expected_set_size: usize,
629 expected_splinter: usize,
630 expected_roaring: usize| {
631 assert_eq!(set.len(), expected_set_size, "Set size mismatch");
632
633 let mut splinter = Splinter::from_iter(set.clone());
634 splinter.optimize();
635 itertools::assert_equal(splinter.iter(), set.iter().copied());
636
637 test_partition_read(&splinter, &set);
638
639 let expected_size = splinter.encoded_size();
640 let splinter = splinter.encode_to_bytes();
641
642 assert_eq!(
643 splinter.len(),
644 expected_size,
645 "actual encoded size does not match declared encoded size"
646 );
647
648 let roaring = to_roaring(set.iter().copied());
649
650 let splinter_lz4 = lz4::block::compress(&splinter, None, false).unwrap();
651 let roaring_lz4 = lz4::block::compress(&roaring, None, false).unwrap();
652
653 // verify round trip
654 assert_eq!(
655 splinter,
656 lz4::block::decompress(&splinter_lz4, Some(splinter.len() as i32)).unwrap()
657 );
658 assert_eq!(
659 roaring,
660 lz4::block::decompress(&roaring_lz4, Some(roaring.len() as i32)).unwrap()
661 );
662
663 reports.push(Report {
664 name: name.to_owned(),
665 baseline: set.len() * std::mem::size_of::<u32>(),
666 splinter: (splinter.len(), expected_splinter),
667 roaring: (roaring.len(), expected_roaring),
668
669 splinter_lz4: splinter_lz4.len(),
670 roaring_lz4: roaring_lz4.len(),
671 });
672 };
673
674 let mut set_gen = SetGen::new(0xDEAD_BEEF);
675
676 // empty splinter
677 run_test("empty", vec![], 0, 13, 8);
678
679 // 1 element in set
680 let set = set_gen.distributed(1, 1, 1, 1);
681 run_test("1 element", set, 1, 21, 18);
682
683 // 1 fully dense block
684 let set = set_gen.distributed(1, 1, 1, 256);
685 run_test("1 dense block", set, 256, 25, 15);
686
687 // 1 half full block
688 let set = set_gen.distributed(1, 1, 1, 128);
689 run_test("1 half full block", set, 128, 63, 255);
690
691 // 1 sparse block
692 let set = set_gen.distributed(1, 1, 1, 16);
693 run_test("1 sparse block", set, 16, 48, 48);
694
695 // 8 half full blocks
696 let set = set_gen.distributed(1, 1, 8, 128);
697 run_test("8 half full blocks", set, 1024, 315, 2003);
698
699 // 8 sparse blocks
700 let set = set_gen.distributed(1, 1, 8, 2);
701 run_test("8 sparse blocks", set, 16, 60, 48);
702
703 // 64 half full blocks
704 let set = set_gen.distributed(4, 4, 4, 128);
705 run_test("64 half full blocks", set, 8192, 2442, 16452);
706
707 // 64 sparse blocks
708 let set = set_gen.distributed(4, 4, 4, 2);
709 run_test("64 sparse blocks", set, 128, 410, 392);
710
711 // 256 half full blocks
712 let set = set_gen.distributed(4, 8, 8, 128);
713 run_test("256 half full blocks", set, 32768, 9450, 65580);
714
715 // 256 sparse blocks
716 let set = set_gen.distributed(4, 8, 8, 2);
717 run_test("256 sparse blocks", set, 512, 1290, 1288);
718
719 // 512 half full blocks
720 let set = set_gen.distributed(8, 8, 8, 128);
721 run_test("512 half full blocks", set, 65536, 18886, 130810);
722
723 // 512 sparse blocks
724 let set = set_gen.distributed(8, 8, 8, 2);
725 run_test("512 sparse blocks", set, 1024, 2566, 2568);
726
727 // the rest of the compression tests use 4k elements
728 let elements = 4096;
729
730 // fully dense splinter
731 let set = set_gen.distributed(1, 1, 16, 256);
732 run_test("fully dense", set, elements, 80, 63);
733
734 // 128 elements per block; dense partitions
735 let set = set_gen.distributed(1, 1, 32, 128);
736 run_test("128/block; dense", set, elements, 1179, 8208);
737
738 // 32 elements per block; dense partitions
739 let set = set_gen.distributed(1, 1, 128, 32);
740 run_test("32/block; dense", set, elements, 4539, 8208);
741
742 // 16 element per block; dense low partitions
743 let set = set_gen.distributed(1, 1, 256, 16);
744 run_test("16/block; dense", set, elements, 5147, 8208);
745
746 // 128 elements per block; sparse mid partitions
747 let set = set_gen.distributed(1, 32, 1, 128);
748 run_test("128/block; sparse mid", set, elements, 1365, 8282);
749
750 // 128 elements per block; sparse high partitions
751 let set = set_gen.distributed(32, 1, 1, 128);
752 run_test("128/block; sparse high", set, elements, 1582, 8224);
753
754 // 1 element per block; sparse mid partitions
755 let set = set_gen.distributed(1, 256, 16, 1);
756 run_test("1/block; sparse mid", set, elements, 9749, 10248);
757
758 // 1 element per block; sparse high partitions
759 let set = set_gen.distributed(256, 16, 1, 1);
760 run_test("1/block; sparse high", set, elements, 14350, 40968);
761
762 // 1/block; spread low
763 let set = set_gen.dense(1, 16, 256, 1);
764 run_test("1/block; spread low", set, elements, 8325, 8328);
765
766 // each partition is dense
767 let set = set_gen.dense(8, 8, 8, 8);
768 run_test("dense throughout", set, elements, 4113, 2700);
769
770 // the lowest partitions are dense
771 let set = set_gen.dense(1, 1, 64, 64);
772 run_test("dense low", set, elements, 529, 267);
773
774 // the mid and low partitions are dense
775 let set = set_gen.dense(1, 32, 16, 8);
776 run_test("dense mid/low", set, elements, 4113, 2376);
777
778 let random_cases = [
779 // random sets drawing from the enire u32 range
780 (32, High::MAX_LEN, 145, 328),
781 (256, High::MAX_LEN, 1041, 2544),
782 (1024, High::MAX_LEN, 4113, 10168),
783 (4096, High::MAX_LEN, 14350, 40056),
784 (16384, High::MAX_LEN, 51214, 148656),
785 (65536, High::MAX_LEN, 198670, 461288),
786 // random sets with values < 65536
787 (32, 65536, 92, 80),
788 (256, 65536, 540, 528),
789 (1024, 65536, 2071, 2064),
790 (4096, 65536, 5147, 8208),
791 (65536, 65536, 25, 15),
792 // small sets with values < 1024
793 (8, 1024, 44, 32),
794 (16, 1024, 60, 48),
795 (32, 1024, 79, 80),
796 (64, 1024, 111, 144),
797 (128, 1024, 168, 272),
798 ];
799
800 for (count, max, expected_splinter, expected_roaring) in random_cases {
801 let name = if max == High::MAX_LEN {
802 format!("random/{count}")
803 } else {
804 format!("random/{count}/{max}")
805 };
806 run_test(
807 &name,
808 set_gen.random_max(count, max),
809 count,
810 expected_splinter,
811 expected_roaring,
812 );
813 }
814
815 let mut fail_test = false;
816
817 println!("{}", "-".repeat(83));
818 println!(
819 "{:30} {:12} {:>6} {:>10} {:>10} {:>10}",
820 "test", "bitmap", "size", "expected", "relative", "ok"
821 );
822 for report in &reports {
823 println!(
824 "{:30} {:12} {:6} {:10} {:>10} {:>10}",
825 report.name,
826 "Splinter",
827 report.splinter.0,
828 report.splinter.1,
829 "1.00",
830 if report.splinter.0 == report.splinter.1 {
831 "ok"
832 } else {
833 fail_test = true;
834 "FAIL"
835 }
836 );
837
838 let diff = report.roaring.0 as f64 / report.splinter.0 as f64;
839 let ok_status = if report.roaring.0 != report.roaring.1 {
840 fail_test = true;
841 "FAIL".into()
842 } else {
843 ratio_to_marks(diff)
844 };
845 println!(
846 "{:30} {:12} {:6} {:10} {:>10.2} {:>10}",
847 "", "Roaring", report.roaring.0, report.roaring.1, diff, ok_status
848 );
849
850 let diff = report.splinter_lz4 as f64 / report.splinter.0 as f64;
851 println!(
852 "{:30} {:12} {:6} {:10} {:>10.2} {:>10}",
853 "",
854 "Splinter LZ4",
855 report.splinter_lz4,
856 report.splinter_lz4,
857 diff,
858 ratio_to_marks(diff)
859 );
860
861 let diff = report.roaring_lz4 as f64 / report.splinter_lz4 as f64;
862 println!(
863 "{:30} {:12} {:6} {:10} {:>10.2} {:>10}",
864 "",
865 "Roaring LZ4",
866 report.roaring_lz4,
867 report.roaring_lz4,
868 diff,
869 ratio_to_marks(diff)
870 );
871
872 let diff = report.baseline as f64 / report.splinter.0 as f64;
873 println!(
874 "{:30} {:12} {:6} {:10} {:>10.2} {:>10}",
875 "",
876 "Baseline",
877 report.baseline,
878 report.baseline,
879 diff,
880 ratio_to_marks(diff)
881 );
882 }
883
884 // calculate average compression ratio (splinter_lz4 / splinter)
885 let avg_ratio = reports
886 .iter()
887 .map(|r| r.splinter_lz4 as f64 / r.splinter.0 as f64)
888 .sum::<f64>()
889 / reports.len() as f64;
890
891 println!("average compression ratio (splinter_lz4 / splinter): {avg_ratio:.2}");
892
893 assert!(!fail_test, "compression test failed");
894 }
895}