1use std::{fmt::Debug, ops::Deref};
2
3use bytes::Bytes;
4
5use crate::{
6 Cut, Encodable, Merge, Optimizable, SplinterRef,
7 codec::{encoder::Encoder, footer::Footer},
8 cow::CowSplinter,
9 level::High,
10 partition::Partition,
11 traits::{PartitionRead, PartitionWrite},
12};
13
14#[derive(Clone, PartialEq, Eq, Default, Debug)]
61pub struct Splinter(Partition<High>);
62
63static_assertions::const_assert_eq!(std::mem::size_of::<Splinter>(), 40);
64
65impl Splinter {
66 pub const EMPTY: Self = Splinter(Partition::EMPTY);
68
69 pub fn encode_to_splinter_ref(&self) -> SplinterRef<Bytes> {
89 SplinterRef { data: self.encode_to_bytes() }
90 }
91}
92
93impl FromIterator<u32> for Splinter {
94 fn from_iter<I: IntoIterator<Item = u32>>(iter: I) -> Self {
95 Self(Partition::<High>::from_iter(iter))
96 }
97}
98
99impl PartitionRead<High> for Splinter {
100 #[inline]
116 fn cardinality(&self) -> usize {
117 self.0.cardinality()
118 }
119
120 #[inline]
134 fn is_empty(&self) -> bool {
135 self.0.is_empty()
136 }
137
138 #[inline]
154 fn contains(&self, value: u32) -> bool {
155 self.0.contains(value)
156 }
157
158 #[inline]
179 fn rank(&self, value: u32) -> usize {
180 self.0.rank(value)
181 }
182
183 #[inline]
203 fn select(&self, idx: usize) -> Option<u32> {
204 self.0.select(idx)
205 }
206
207 #[inline]
224 fn last(&self) -> Option<u32> {
225 self.0.last()
226 }
227
228 #[inline]
244 fn iter(&self) -> impl Iterator<Item = u32> {
245 self.0.iter()
246 }
247}
248
249impl PartitionWrite<High> for Splinter {
250 #[inline]
274 fn insert(&mut self, value: u32) -> bool {
275 self.0.insert(value)
276 }
277
278 #[inline]
303 fn remove(&mut self, value: u32) -> bool {
304 self.0.remove(value)
305 }
306}
307
308impl Encodable for Splinter {
309 fn encoded_size(&self) -> usize {
310 self.0.encoded_size() + std::mem::size_of::<Footer>()
311 }
312
313 fn encode<B: bytes::BufMut>(&self, encoder: &mut Encoder<B>) {
314 self.0.encode(encoder);
315 encoder.write_footer();
316 }
317}
318
319impl Optimizable for Splinter {
320 #[inline]
321 fn optimize(&mut self) {
322 self.0.optimize();
323 }
324}
325
326impl Merge for Splinter {
327 fn merge(&mut self, rhs: &Self) {
328 self.0.merge(&rhs.0)
329 }
330}
331
332impl<B: Deref<Target = [u8]>> Merge<SplinterRef<B>> for Splinter {
333 fn merge(&mut self, rhs: &SplinterRef<B>) {
334 self.0.merge(&rhs.load_unchecked())
335 }
336}
337
338impl<B: Deref<Target = [u8]>> Merge<CowSplinter<B>> for Splinter {
339 fn merge(&mut self, rhs: &CowSplinter<B>) {
340 match rhs {
341 CowSplinter::Ref(splinter_ref) => self.merge(splinter_ref),
342 CowSplinter::Owned(splinter) => self.merge(splinter),
343 }
344 }
345}
346
347impl Cut for Splinter {
348 type Out = Self;
349
350 fn cut(&mut self, rhs: &Self) -> Self::Out {
351 Self(self.0.cut(&rhs.0))
352 }
353}
354
355impl<B: Deref<Target = [u8]>> Cut<SplinterRef<B>> for Splinter {
356 type Out = Self;
357
358 fn cut(&mut self, rhs: &SplinterRef<B>) -> Self::Out {
359 Self(self.0.cut(&rhs.load_unchecked()))
360 }
361}
362
363impl<B: Deref<Target = [u8]>> Cut<CowSplinter<B>> for Splinter {
364 type Out = Self;
365
366 fn cut(&mut self, rhs: &CowSplinter<B>) -> Self::Out {
367 match rhs {
368 CowSplinter::Ref(splinter_ref) => self.cut(splinter_ref),
369 CowSplinter::Owned(splinter) => self.cut(splinter),
370 }
371 }
372}
373
374impl<B: Deref<Target = [u8]>> PartialEq<SplinterRef<B>> for Splinter {
375 #[inline]
376 fn eq(&self, other: &SplinterRef<B>) -> bool {
377 self.0 == other.load_unchecked()
378 }
379}
380
381impl<B: Deref<Target = [u8]>> PartialEq<CowSplinter<B>> for Splinter {
382 fn eq(&self, other: &CowSplinter<B>) -> bool {
383 match other {
384 CowSplinter::Ref(splinter_ref) => self.eq(splinter_ref),
385 CowSplinter::Owned(splinter) => self.eq(splinter),
386 }
387 }
388}
389
390#[cfg(test)]
391mod tests {
392 use super::*;
393 use crate::{
394 codec::Encodable,
395 testutil::{
396 SetGen, analyze_compression_patterns, mksplinter, ratio_to_marks, test_partition_read,
397 test_partition_write,
398 },
399 traits::Optimizable,
400 };
401 use itertools::Itertools;
402 use quickcheck::TestResult;
403 use quickcheck_macros::quickcheck;
404 use roaring::RoaringBitmap;
405
406 #[test]
407 fn test_sanity() {
408 let mut splinter = Splinter::EMPTY;
409
410 assert!(splinter.insert(1));
411 assert!(!splinter.insert(1));
412 assert!(splinter.contains(1));
413
414 let values = [1024, 123, 16384];
415 for v in values {
416 assert!(splinter.insert(v));
417 assert!(splinter.contains(v));
418 assert!(!splinter.contains(v + 1));
419 }
420
421 for i in 0..8192 + 10 {
422 splinter.insert(i);
423 }
424
425 splinter.optimize();
426
427 dbg!(&splinter);
428
429 let expected = splinter.iter().collect_vec();
430 test_partition_read(&splinter, &expected);
431 test_partition_write(&mut splinter);
432 }
433
434 #[test]
435 fn test_wat() {
436 let mut set_gen = SetGen::new(0xDEAD_BEEF);
437 let set = set_gen.random(1024);
438 let baseline_size = set.len() * 4;
439
440 let mut splinter = Splinter::from_iter(set.iter().copied());
441 splinter.optimize();
442
443 dbg!(&splinter, splinter.encoded_size(), baseline_size);
444 itertools::assert_equal(splinter.iter(), set.into_iter());
445 }
446
447 #[quickcheck]
448 fn test_splinter_read_quickcheck(set: Vec<u32>) -> TestResult {
449 let expected = set.iter().copied().sorted().dedup().collect_vec();
450 test_partition_read(&Splinter::from_iter(set), &expected);
451 TestResult::passed()
452 }
453
454 #[quickcheck]
455 fn test_splinter_write_quickcheck(set: Vec<u32>) -> TestResult {
456 let mut splinter = Splinter::from_iter(set);
457 test_partition_write(&mut splinter);
458 TestResult::passed()
459 }
460
461 #[quickcheck]
462 fn test_splinter_quickcheck(set: Vec<u32>) -> bool {
463 let splinter = mksplinter(&set);
464 if set.is_empty() {
465 !splinter.contains(123)
466 } else {
467 let lookup = set[set.len() / 3];
468 splinter.contains(lookup)
469 }
470 }
471
472 #[quickcheck]
473 fn test_splinter_opt_quickcheck(set: Vec<u32>) -> bool {
474 let mut splinter = mksplinter(&set);
475 splinter.optimize();
476 if set.is_empty() {
477 !splinter.contains(123)
478 } else {
479 let lookup = set[set.len() / 3];
480 splinter.contains(lookup)
481 }
482 }
483
484 #[test]
485 fn test_expected_compression() {
486 fn to_roaring(set: impl Iterator<Item = u32>) -> Vec<u8> {
487 let mut buf = std::io::Cursor::new(Vec::new());
488 let mut bmp = RoaringBitmap::from_sorted_iter(set).unwrap();
489 bmp.optimize();
490 bmp.serialize_into(&mut buf).unwrap();
491 buf.into_inner()
492 }
493
494 struct Report {
495 name: &'static str,
496 baseline: usize,
497 splinter: (usize, usize),
499 roaring: (usize, usize),
500
501 splinter_lz4: usize,
502 roaring_lz4: usize,
503 }
504
505 let mut reports = vec![];
506
507 let mut run_test = |name: &'static str,
508 set: Vec<u32>,
509 expected_set_size: usize,
510 expected_splinter: usize,
511 expected_roaring: usize| {
512 println!("-------------------------------------");
513 println!("running test: {name}");
514
515 assert_eq!(set.len(), expected_set_size, "Set size mismatch");
516
517 let mut splinter = Splinter::from_iter(set.clone());
518 splinter.optimize();
519 itertools::assert_equal(splinter.iter(), set.iter().copied());
520
521 test_partition_read(&splinter, &set);
522 test_partition_write(&mut splinter.clone());
523
524 let expected_size = splinter.encoded_size();
525 let splinter = splinter.encode_to_bytes();
526
527 assert_eq!(
528 splinter.len(),
529 expected_size,
530 "actual encoded size does not match declared encoded size"
531 );
532
533 let roaring = to_roaring(set.iter().copied());
534
535 analyze_compression_patterns(&splinter);
536
537 let splinter_lz4 = lz4::block::compress(&splinter, None, false).unwrap();
538 let roaring_lz4 = lz4::block::compress(&roaring, None, false).unwrap();
539
540 assert_eq!(
542 splinter,
543 lz4::block::decompress(&splinter_lz4, Some(splinter.len() as i32)).unwrap()
544 );
545 assert_eq!(
546 roaring,
547 lz4::block::decompress(&roaring_lz4, Some(roaring.len() as i32)).unwrap()
548 );
549
550 reports.push(Report {
551 name,
552 baseline: set.len() * std::mem::size_of::<u32>(),
553 splinter: (splinter.len(), expected_splinter),
554 roaring: (roaring.len(), expected_roaring),
555
556 splinter_lz4: splinter_lz4.len(),
557 roaring_lz4: roaring_lz4.len(),
558 });
559 };
560
561 let mut set_gen = SetGen::new(0xDEAD_BEEF);
562
563 run_test("empty", vec![], 0, 13, 8);
565
566 let set = set_gen.distributed(1, 1, 1, 1);
568 run_test("1 element", set, 1, 21, 18);
569
570 let set = set_gen.distributed(1, 1, 1, 256);
572 run_test("1 dense block", set, 256, 25, 15);
573
574 let set = set_gen.distributed(1, 1, 1, 128);
576 run_test("1 half full block", set, 128, 63, 255);
577
578 let set = set_gen.distributed(1, 1, 1, 16);
580 run_test("1 sparse block", set, 16, 81, 48);
581
582 let set = set_gen.distributed(1, 1, 8, 128);
584 run_test("8 half full blocks", set, 1024, 315, 2003);
585
586 let set = set_gen.distributed(1, 1, 8, 2);
588 run_test("8 sparse blocks", set, 16, 81, 48);
589
590 let set = set_gen.distributed(4, 4, 4, 128);
592 run_test("64 half full blocks", set, 8192, 2442, 16452);
593
594 let set = set_gen.distributed(4, 4, 4, 2);
596 run_test("64 sparse blocks", set, 128, 434, 392);
597
598 let set = set_gen.distributed(4, 8, 8, 128);
600 run_test("256 half full blocks", set, 32768, 9450, 65580);
601
602 let set = set_gen.distributed(4, 8, 8, 2);
604 run_test("256 sparse blocks", set, 512, 1290, 1288);
605
606 let set = set_gen.distributed(8, 8, 8, 128);
608 run_test("512 half full blocks", set, 65536, 18886, 130810);
609
610 let set = set_gen.distributed(8, 8, 8, 2);
612 run_test("512 sparse blocks", set, 1024, 2566, 2568);
613
614 let elements = 4096;
616
617 let set = set_gen.distributed(1, 1, 16, 256);
619 run_test("fully dense", set, elements, 80, 63);
620
621 let set = set_gen.distributed(1, 1, 32, 128);
623 run_test("128/block; dense", set, elements, 1179, 8208);
624
625 let set = set_gen.distributed(1, 1, 128, 32);
627 run_test("32/block; dense", set, elements, 4539, 8208);
628
629 let set = set_gen.distributed(1, 1, 256, 16);
631 run_test("16/block; dense", set, elements, 5147, 8208);
632
633 let set = set_gen.distributed(1, 32, 1, 128);
635 run_test("128/block; sparse mid", set, elements, 1365, 8282);
636
637 let set = set_gen.distributed(32, 1, 1, 128);
639 run_test("128/block; sparse high", set, elements, 1582, 8224);
640
641 let set = set_gen.distributed(1, 256, 16, 1);
643 run_test("1/block; sparse mid", set, elements, 9749, 10248);
644
645 let set = set_gen.distributed(256, 16, 1, 1);
647 run_test("1/block; sparse high", set, elements, 14350, 40968);
648
649 let set = set_gen.dense(1, 16, 256, 1);
651 run_test("1/block; spread low", set, elements, 8325, 8328);
652
653 let set = set_gen.dense(8, 8, 8, 8);
655 run_test("dense throughout", set, elements, 4113, 2700);
656
657 let set = set_gen.dense(1, 1, 64, 64);
659 run_test("dense low", set, elements, 529, 267);
660
661 let set = set_gen.dense(1, 32, 16, 8);
663 run_test("dense mid/low", set, elements, 4113, 2376);
664
665 run_test("random/32", set_gen.random(32), 32, 145, 328);
667 run_test("random/256", set_gen.random(256), 256, 1041, 2544);
668 run_test("random/1024", set_gen.random(1024), 1024, 4113, 10168);
669 run_test("random/4096", set_gen.random(4096), 4096, 14350, 40056);
670 run_test("random/16384", set_gen.random(16384), 16384, 51214, 148656);
671 run_test("random/65535", set_gen.random(65535), 65535, 198667, 461278);
672
673 let mut fail_test = false;
674
675 println!("{}", "-".repeat(83));
676 println!(
677 "{:30} {:12} {:>6} {:>10} {:>10} {:>10}",
678 "test", "bitmap", "size", "expected", "relative", "ok"
679 );
680 for report in &reports {
681 println!(
682 "{:30} {:12} {:6} {:10} {:>10} {:>10}",
683 report.name,
684 "Splinter",
685 report.splinter.0,
686 report.splinter.1,
687 "1.00",
688 if report.splinter.0 == report.splinter.1 {
689 "ok"
690 } else {
691 fail_test = true;
692 "FAIL"
693 }
694 );
695
696 let diff = report.roaring.0 as f64 / report.splinter.0 as f64;
697 let ok_status = if report.roaring.0 != report.roaring.1 {
698 fail_test = true;
699 "FAIL".into()
700 } else {
701 ratio_to_marks(diff)
702 };
703 println!(
704 "{:30} {:12} {:6} {:10} {:>10.2} {:>10}",
705 "", "Roaring", report.roaring.0, report.roaring.1, diff, ok_status
706 );
707
708 let diff = report.splinter_lz4 as f64 / report.splinter.0 as f64;
709 println!(
710 "{:30} {:12} {:6} {:10} {:>10.2} {:>10}",
711 "",
712 "Splinter LZ4",
713 report.splinter_lz4,
714 report.splinter_lz4,
715 diff,
716 ratio_to_marks(diff)
717 );
718
719 let diff = report.roaring_lz4 as f64 / report.splinter_lz4 as f64;
720 println!(
721 "{:30} {:12} {:6} {:10} {:>10.2} {:>10}",
722 "",
723 "Roaring LZ4",
724 report.roaring_lz4,
725 report.roaring_lz4,
726 diff,
727 ratio_to_marks(diff)
728 );
729
730 let diff = report.baseline as f64 / report.splinter.0 as f64;
731 println!(
732 "{:30} {:12} {:6} {:10} {:>10.2} {:>10}",
733 "",
734 "Baseline",
735 report.baseline,
736 report.baseline,
737 diff,
738 ratio_to_marks(diff)
739 );
740 }
741
742 let avg_ratio = reports
744 .iter()
745 .map(|r| r.splinter_lz4 as f64 / r.splinter.0 as f64)
746 .sum::<f64>()
747 / reports.len() as f64;
748
749 println!("average compression ratio (splinter_lz4 / splinter): {avg_ratio:.2}");
750
751 assert!(!fail_test, "compression test failed");
752 }
753}