1use std::{fmt::Debug, ops::Deref};
2
3use bytes::Bytes;
4
5use crate::{
6 Cut, Encodable, Merge, Optimizable, SplinterRef,
7 codec::{encoder::Encoder, footer::Footer},
8 cow::CowSplinter,
9 level::High,
10 partition::Partition,
11 traits::{PartitionRead, PartitionWrite},
12};
13
14#[derive(Clone, PartialEq, Eq, Default, Debug)]
61pub struct Splinter(Partition<High>);
62
63static_assertions::const_assert_eq!(std::mem::size_of::<Splinter>(), 40);
64
65impl Splinter {
66 pub const EMPTY: Self = Splinter(Partition::EMPTY);
68
69 pub fn encode_to_splinter_ref(&self) -> SplinterRef<Bytes> {
89 SplinterRef { data: self.encode_to_bytes() }
90 }
91}
92
93impl FromIterator<u32> for Splinter {
94 fn from_iter<I: IntoIterator<Item = u32>>(iter: I) -> Self {
95 Self(Partition::<High>::from_iter(iter))
96 }
97}
98
99impl PartitionRead<High> for Splinter {
100 #[inline]
116 fn cardinality(&self) -> usize {
117 self.0.cardinality()
118 }
119
120 #[inline]
134 fn is_empty(&self) -> bool {
135 self.0.is_empty()
136 }
137
138 #[inline]
154 fn contains(&self, value: u32) -> bool {
155 self.0.contains(value)
156 }
157
158 #[inline]
179 fn rank(&self, value: u32) -> usize {
180 self.0.rank(value)
181 }
182
183 #[inline]
203 fn select(&self, idx: usize) -> Option<u32> {
204 self.0.select(idx)
205 }
206
207 #[inline]
224 fn last(&self) -> Option<u32> {
225 self.0.last()
226 }
227
228 #[inline]
244 fn iter(&self) -> impl Iterator<Item = u32> {
245 self.0.iter()
246 }
247}
248
249impl PartitionWrite<High> for Splinter {
250 #[inline]
274 fn insert(&mut self, value: u32) -> bool {
275 self.0.insert(value)
276 }
277
278 #[inline]
303 fn remove(&mut self, value: u32) -> bool {
304 self.0.remove(value)
305 }
306}
307
308impl Encodable for Splinter {
309 fn encoded_size(&self) -> usize {
310 self.0.encoded_size() + std::mem::size_of::<Footer>()
311 }
312
313 fn encode<B: bytes::BufMut>(&self, encoder: &mut Encoder<B>) {
314 self.0.encode(encoder);
315 encoder.write_footer();
316 }
317}
318
319impl Optimizable for Splinter {
320 #[inline]
321 fn optimize(&mut self) {
322 self.0.optimize();
323 }
324}
325
326impl Merge for Splinter {
327 fn merge(&mut self, rhs: &Self) {
328 self.0.merge(&rhs.0)
329 }
330}
331
332impl<B: Deref<Target = [u8]>> Merge<SplinterRef<B>> for Splinter {
333 fn merge(&mut self, rhs: &SplinterRef<B>) {
334 self.0.merge(&rhs.load_unchecked())
335 }
336}
337
338impl<B: Deref<Target = [u8]>> Merge<CowSplinter<B>> for Splinter {
339 fn merge(&mut self, rhs: &CowSplinter<B>) {
340 match rhs {
341 CowSplinter::Ref(splinter_ref) => self.merge(splinter_ref),
342 CowSplinter::Owned(splinter) => self.merge(splinter),
343 }
344 }
345}
346
347impl Cut for Splinter {
348 type Out = Self;
349
350 fn cut(&mut self, rhs: &Self) -> Self::Out {
351 Self(self.0.cut(&rhs.0))
352 }
353}
354
355impl<B: Deref<Target = [u8]>> Cut<SplinterRef<B>> for Splinter {
356 type Out = Self;
357
358 fn cut(&mut self, rhs: &SplinterRef<B>) -> Self::Out {
359 Self(self.0.cut(&rhs.load_unchecked()))
360 }
361}
362
363impl<B: Deref<Target = [u8]>> Cut<CowSplinter<B>> for Splinter {
364 type Out = Self;
365
366 fn cut(&mut self, rhs: &CowSplinter<B>) -> Self::Out {
367 match rhs {
368 CowSplinter::Ref(splinter_ref) => self.cut(splinter_ref),
369 CowSplinter::Owned(splinter) => self.cut(splinter),
370 }
371 }
372}
373
374impl<B: Deref<Target = [u8]>> PartialEq<SplinterRef<B>> for Splinter {
375 #[inline]
376 fn eq(&self, other: &SplinterRef<B>) -> bool {
377 self.0 == other.load_unchecked()
378 }
379}
380
381impl<B: Deref<Target = [u8]>> PartialEq<CowSplinter<B>> for Splinter {
382 fn eq(&self, other: &CowSplinter<B>) -> bool {
383 match other {
384 CowSplinter::Ref(splinter_ref) => self.eq(splinter_ref),
385 CowSplinter::Owned(splinter) => self.eq(splinter),
386 }
387 }
388}
389
390#[cfg(test)]
391mod tests {
392 use super::*;
393 use crate::{
394 codec::Encodable,
395 level::Level,
396 testutil::{SetGen, mksplinter, ratio_to_marks, test_partition_read, test_partition_write},
397 traits::Optimizable,
398 };
399 use itertools::Itertools;
400 use quickcheck::TestResult;
401 use quickcheck_macros::quickcheck;
402 use roaring::RoaringBitmap;
403
404 #[test]
405 fn test_sanity() {
406 let mut splinter = Splinter::EMPTY;
407
408 assert!(splinter.insert(1));
409 assert!(!splinter.insert(1));
410 assert!(splinter.contains(1));
411
412 let values = [1024, 123, 16384];
413 for v in values {
414 assert!(splinter.insert(v));
415 assert!(splinter.contains(v));
416 assert!(!splinter.contains(v + 1));
417 }
418
419 for i in 0..8192 + 10 {
420 splinter.insert(i);
421 }
422
423 splinter.optimize();
424
425 dbg!(&splinter);
426
427 let expected = splinter.iter().collect_vec();
428 test_partition_read(&splinter, &expected);
429 test_partition_write(&mut splinter);
430 }
431
432 #[test]
433 fn test_wat() {
434 let mut set_gen = SetGen::new(0xDEAD_BEEF);
435 let set = set_gen.random_max(64, 4096);
436 let baseline_size = set.len() * 4;
437
438 let mut splinter = Splinter::from_iter(set.iter().copied());
439 splinter.optimize();
440
441 dbg!(&splinter, splinter.encoded_size(), baseline_size, set.len());
442 itertools::assert_equal(splinter.iter(), set.into_iter());
443 }
444
445 #[quickcheck]
446 fn test_splinter_read_quickcheck(set: Vec<u32>) -> TestResult {
447 let expected = set.iter().copied().sorted().dedup().collect_vec();
448 test_partition_read(&Splinter::from_iter(set), &expected);
449 TestResult::passed()
450 }
451
452 #[quickcheck]
453 fn test_splinter_write_quickcheck(set: Vec<u32>) -> TestResult {
454 let mut splinter = Splinter::from_iter(set);
455 test_partition_write(&mut splinter);
456 TestResult::passed()
457 }
458
459 #[quickcheck]
460 fn test_splinter_quickcheck(set: Vec<u32>) -> bool {
461 let splinter = mksplinter(&set);
462 if set.is_empty() {
463 !splinter.contains(123)
464 } else {
465 let lookup = set[set.len() / 3];
466 splinter.contains(lookup)
467 }
468 }
469
470 #[quickcheck]
471 fn test_splinter_opt_quickcheck(set: Vec<u32>) -> bool {
472 let mut splinter = mksplinter(&set);
473 splinter.optimize();
474 if set.is_empty() {
475 !splinter.contains(123)
476 } else {
477 let lookup = set[set.len() / 3];
478 splinter.contains(lookup)
479 }
480 }
481
482 #[test]
483 fn test_expected_compression() {
484 fn to_roaring(set: impl Iterator<Item = u32>) -> Vec<u8> {
485 let mut buf = std::io::Cursor::new(Vec::new());
486 let mut bmp = RoaringBitmap::from_sorted_iter(set).unwrap();
487 bmp.optimize();
488 bmp.serialize_into(&mut buf).unwrap();
489 buf.into_inner()
490 }
491
492 struct Report {
493 name: String,
494 baseline: usize,
495 splinter: (usize, usize),
497 roaring: (usize, usize),
498
499 splinter_lz4: usize,
500 roaring_lz4: usize,
501 }
502
503 let mut reports = vec![];
504
505 let mut run_test = |name: &str,
506 set: Vec<u32>,
507 expected_set_size: usize,
508 expected_splinter: usize,
509 expected_roaring: usize| {
510 assert_eq!(set.len(), expected_set_size, "Set size mismatch");
511
512 let mut splinter = Splinter::from_iter(set.clone());
513 splinter.optimize();
514 itertools::assert_equal(splinter.iter(), set.iter().copied());
515
516 test_partition_read(&splinter, &set);
517 test_partition_write(&mut splinter.clone());
518
519 let expected_size = splinter.encoded_size();
520 let splinter = splinter.encode_to_bytes();
521
522 assert_eq!(
523 splinter.len(),
524 expected_size,
525 "actual encoded size does not match declared encoded size"
526 );
527
528 let roaring = to_roaring(set.iter().copied());
529
530 let splinter_lz4 = lz4::block::compress(&splinter, None, false).unwrap();
531 let roaring_lz4 = lz4::block::compress(&roaring, None, false).unwrap();
532
533 assert_eq!(
535 splinter,
536 lz4::block::decompress(&splinter_lz4, Some(splinter.len() as i32)).unwrap()
537 );
538 assert_eq!(
539 roaring,
540 lz4::block::decompress(&roaring_lz4, Some(roaring.len() as i32)).unwrap()
541 );
542
543 reports.push(Report {
544 name: name.to_owned(),
545 baseline: set.len() * std::mem::size_of::<u32>(),
546 splinter: (splinter.len(), expected_splinter),
547 roaring: (roaring.len(), expected_roaring),
548
549 splinter_lz4: splinter_lz4.len(),
550 roaring_lz4: roaring_lz4.len(),
551 });
552 };
553
554 let mut set_gen = SetGen::new(0xDEAD_BEEF);
555
556 run_test("empty", vec![], 0, 13, 8);
558
559 let set = set_gen.distributed(1, 1, 1, 1);
561 run_test("1 element", set, 1, 21, 18);
562
563 let set = set_gen.distributed(1, 1, 1, 256);
565 run_test("1 dense block", set, 256, 25, 15);
566
567 let set = set_gen.distributed(1, 1, 1, 128);
569 run_test("1 half full block", set, 128, 63, 255);
570
571 let set = set_gen.distributed(1, 1, 1, 16);
573 run_test("1 sparse block", set, 16, 48, 48);
574
575 let set = set_gen.distributed(1, 1, 8, 128);
577 run_test("8 half full blocks", set, 1024, 315, 2003);
578
579 let set = set_gen.distributed(1, 1, 8, 2);
581 run_test("8 sparse blocks", set, 16, 60, 48);
582
583 let set = set_gen.distributed(4, 4, 4, 128);
585 run_test("64 half full blocks", set, 8192, 2442, 16452);
586
587 let set = set_gen.distributed(4, 4, 4, 2);
589 run_test("64 sparse blocks", set, 128, 410, 392);
590
591 let set = set_gen.distributed(4, 8, 8, 128);
593 run_test("256 half full blocks", set, 32768, 9450, 65580);
594
595 let set = set_gen.distributed(4, 8, 8, 2);
597 run_test("256 sparse blocks", set, 512, 1290, 1288);
598
599 let set = set_gen.distributed(8, 8, 8, 128);
601 run_test("512 half full blocks", set, 65536, 18886, 130810);
602
603 let set = set_gen.distributed(8, 8, 8, 2);
605 run_test("512 sparse blocks", set, 1024, 2566, 2568);
606
607 let elements = 4096;
609
610 let set = set_gen.distributed(1, 1, 16, 256);
612 run_test("fully dense", set, elements, 80, 63);
613
614 let set = set_gen.distributed(1, 1, 32, 128);
616 run_test("128/block; dense", set, elements, 1179, 8208);
617
618 let set = set_gen.distributed(1, 1, 128, 32);
620 run_test("32/block; dense", set, elements, 4539, 8208);
621
622 let set = set_gen.distributed(1, 1, 256, 16);
624 run_test("16/block; dense", set, elements, 5147, 8208);
625
626 let set = set_gen.distributed(1, 32, 1, 128);
628 run_test("128/block; sparse mid", set, elements, 1365, 8282);
629
630 let set = set_gen.distributed(32, 1, 1, 128);
632 run_test("128/block; sparse high", set, elements, 1582, 8224);
633
634 let set = set_gen.distributed(1, 256, 16, 1);
636 run_test("1/block; sparse mid", set, elements, 9749, 10248);
637
638 let set = set_gen.distributed(256, 16, 1, 1);
640 run_test("1/block; sparse high", set, elements, 14350, 40968);
641
642 let set = set_gen.dense(1, 16, 256, 1);
644 run_test("1/block; spread low", set, elements, 8325, 8328);
645
646 let set = set_gen.dense(8, 8, 8, 8);
648 run_test("dense throughout", set, elements, 4113, 2700);
649
650 let set = set_gen.dense(1, 1, 64, 64);
652 run_test("dense low", set, elements, 529, 267);
653
654 let set = set_gen.dense(1, 32, 16, 8);
656 run_test("dense mid/low", set, elements, 4113, 2376);
657
658 let random_cases = [
659 (32, High::MAX_LEN, 145, 328),
661 (256, High::MAX_LEN, 1041, 2544),
662 (1024, High::MAX_LEN, 4113, 10168),
663 (4096, High::MAX_LEN, 14350, 40056),
664 (16384, High::MAX_LEN, 51214, 148656),
665 (65536, High::MAX_LEN, 198670, 461288),
666 (32, 65536, 92, 80),
668 (256, 65536, 540, 528),
669 (1024, 65536, 2071, 2064),
670 (4096, 65536, 5147, 8208),
671 (65536, 65536, 25, 15),
672 (8, 1024, 49, 32),
674 (16, 1024, 60, 48),
675 (32, 1024, 79, 80),
676 (64, 1024, 111, 144),
677 (128, 1024, 168, 272),
678 ];
679
680 for (count, max, s, r) in random_cases {
681 let name = if max == High::MAX_LEN {
682 format!("random/{count}")
683 } else {
684 format!("random/{count}/{max}")
685 };
686 run_test(&name, set_gen.random_max(count, max), count, s, r);
687 }
688
689 let mut fail_test = false;
690
691 println!("{}", "-".repeat(83));
692 println!(
693 "{:30} {:12} {:>6} {:>10} {:>10} {:>10}",
694 "test", "bitmap", "size", "expected", "relative", "ok"
695 );
696 for report in &reports {
697 println!(
698 "{:30} {:12} {:6} {:10} {:>10} {:>10}",
699 report.name,
700 "Splinter",
701 report.splinter.0,
702 report.splinter.1,
703 "1.00",
704 if report.splinter.0 == report.splinter.1 {
705 "ok"
706 } else {
707 fail_test = true;
708 "FAIL"
709 }
710 );
711
712 let diff = report.roaring.0 as f64 / report.splinter.0 as f64;
713 let ok_status = if report.roaring.0 != report.roaring.1 {
714 fail_test = true;
715 "FAIL".into()
716 } else {
717 ratio_to_marks(diff)
718 };
719 println!(
720 "{:30} {:12} {:6} {:10} {:>10.2} {:>10}",
721 "", "Roaring", report.roaring.0, report.roaring.1, diff, ok_status
722 );
723
724 let diff = report.splinter_lz4 as f64 / report.splinter.0 as f64;
725 println!(
726 "{:30} {:12} {:6} {:10} {:>10.2} {:>10}",
727 "",
728 "Splinter LZ4",
729 report.splinter_lz4,
730 report.splinter_lz4,
731 diff,
732 ratio_to_marks(diff)
733 );
734
735 let diff = report.roaring_lz4 as f64 / report.splinter_lz4 as f64;
736 println!(
737 "{:30} {:12} {:6} {:10} {:>10.2} {:>10}",
738 "",
739 "Roaring LZ4",
740 report.roaring_lz4,
741 report.roaring_lz4,
742 diff,
743 ratio_to_marks(diff)
744 );
745
746 let diff = report.baseline as f64 / report.splinter.0 as f64;
747 println!(
748 "{:30} {:12} {:6} {:10} {:>10.2} {:>10}",
749 "",
750 "Baseline",
751 report.baseline,
752 report.baseline,
753 diff,
754 ratio_to_marks(diff)
755 );
756 }
757
758 let avg_ratio = reports
760 .iter()
761 .map(|r| r.splinter_lz4 as f64 / r.splinter.0 as f64)
762 .sum::<f64>()
763 / reports.len() as f64;
764
765 println!("average compression ratio (splinter_lz4 / splinter): {avg_ratio:.2}");
766
767 assert!(!fail_test, "compression test failed");
768 }
769}