1use std::{fmt::Debug, ops::Deref};
2
3use bytes::Bytes;
4
5use crate::{
6 Cut, Encodable, Merge, Optimizable, SplinterRef,
7 codec::{encoder::Encoder, footer::Footer},
8 cow::CowSplinter,
9 level::High,
10 partition::Partition,
11 traits::{PartitionRead, PartitionWrite},
12};
13
14#[derive(Clone, PartialEq, Eq, Default, Debug)]
61pub struct Splinter(Partition<High>);
62
63static_assertions::const_assert_eq!(std::mem::size_of::<Splinter>(), 40);
64
65impl Splinter {
66 pub const EMPTY: Self = Splinter(Partition::EMPTY);
68
69 pub fn encode_to_splinter_ref(&self) -> SplinterRef<Bytes> {
89 SplinterRef { data: self.encode_to_bytes() }
90 }
91}
92
93impl FromIterator<u32> for Splinter {
94 fn from_iter<I: IntoIterator<Item = u32>>(iter: I) -> Self {
95 Self(Partition::<High>::from_iter(iter))
96 }
97}
98
99impl PartitionRead<High> for Splinter {
100 #[inline]
116 fn cardinality(&self) -> usize {
117 self.0.cardinality()
118 }
119
120 #[inline]
134 fn is_empty(&self) -> bool {
135 self.0.is_empty()
136 }
137
138 #[inline]
154 fn contains(&self, value: u32) -> bool {
155 self.0.contains(value)
156 }
157
158 #[inline]
179 fn rank(&self, value: u32) -> usize {
180 self.0.rank(value)
181 }
182
183 #[inline]
203 fn select(&self, idx: usize) -> Option<u32> {
204 self.0.select(idx)
205 }
206
207 #[inline]
224 fn last(&self) -> Option<u32> {
225 self.0.last()
226 }
227
228 #[inline]
244 fn iter(&self) -> impl Iterator<Item = u32> {
245 self.0.iter()
246 }
247}
248
249impl PartitionWrite<High> for Splinter {
250 #[inline]
274 fn insert(&mut self, value: u32) -> bool {
275 self.0.insert(value)
276 }
277
278 #[inline]
303 fn remove(&mut self, value: u32) -> bool {
304 self.0.remove(value)
305 }
306}
307
308impl Encodable for Splinter {
309 fn encoded_size(&self) -> usize {
310 self.0.encoded_size() + std::mem::size_of::<Footer>()
311 }
312
313 fn encode<B: bytes::BufMut>(&self, encoder: &mut Encoder<B>) {
314 self.0.encode(encoder);
315 encoder.write_footer();
316 }
317}
318
319impl Optimizable for Splinter {
320 #[inline]
321 fn optimize(&mut self) {
322 self.0.optimize();
323 }
324}
325
326impl Merge for Splinter {
327 fn merge(&mut self, rhs: &Self) {
328 self.0.merge(&rhs.0)
329 }
330}
331
332impl<B: Deref<Target = [u8]>> Merge<SplinterRef<B>> for Splinter {
333 fn merge(&mut self, rhs: &SplinterRef<B>) {
334 self.0.merge(&rhs.load_unchecked())
335 }
336}
337
338impl<B: Deref<Target = [u8]>> Merge<CowSplinter<B>> for Splinter {
339 fn merge(&mut self, rhs: &CowSplinter<B>) {
340 match rhs {
341 CowSplinter::Ref(splinter_ref) => self.merge(splinter_ref),
342 CowSplinter::Owned(splinter) => self.merge(splinter),
343 }
344 }
345}
346
347impl Cut for Splinter {
348 type Out = Self;
349
350 fn cut(&mut self, rhs: &Self) -> Self::Out {
351 Self(self.0.cut(&rhs.0))
352 }
353}
354
355impl<B: Deref<Target = [u8]>> Cut<SplinterRef<B>> for Splinter {
356 type Out = Self;
357
358 fn cut(&mut self, rhs: &SplinterRef<B>) -> Self::Out {
359 Self(self.0.cut(&rhs.load_unchecked()))
360 }
361}
362
363impl<B: Deref<Target = [u8]>> Cut<CowSplinter<B>> for Splinter {
364 type Out = Self;
365
366 fn cut(&mut self, rhs: &CowSplinter<B>) -> Self::Out {
367 match rhs {
368 CowSplinter::Ref(splinter_ref) => self.cut(splinter_ref),
369 CowSplinter::Owned(splinter) => self.cut(splinter),
370 }
371 }
372}
373
374impl<B: Deref<Target = [u8]>> PartialEq<SplinterRef<B>> for Splinter {
375 #[inline]
376 fn eq(&self, other: &SplinterRef<B>) -> bool {
377 self.0 == other.load_unchecked()
378 }
379}
380
381impl<B: Deref<Target = [u8]>> PartialEq<CowSplinter<B>> for Splinter {
382 fn eq(&self, other: &CowSplinter<B>) -> bool {
383 match other {
384 CowSplinter::Ref(splinter_ref) => self.eq(splinter_ref),
385 CowSplinter::Owned(splinter) => self.eq(splinter),
386 }
387 }
388}
389
390#[cfg(test)]
391mod tests {
392 use super::*;
393 use crate::{
394 codec::Encodable,
395 testutil::{SetGen, mksplinter, ratio_to_marks, test_partition_read, test_partition_write},
396 traits::Optimizable,
397 };
398 use itertools::Itertools;
399 use quickcheck::TestResult;
400 use quickcheck_macros::quickcheck;
401 use roaring::RoaringBitmap;
402
403 #[test]
404 fn test_sanity() {
405 let mut splinter = Splinter::EMPTY;
406
407 assert!(splinter.insert(1));
408 assert!(!splinter.insert(1));
409 assert!(splinter.contains(1));
410
411 let values = [1024, 123, 16384];
412 for v in values {
413 assert!(splinter.insert(v));
414 assert!(splinter.contains(v));
415 assert!(!splinter.contains(v + 1));
416 }
417
418 for i in 0..8192 + 10 {
419 splinter.insert(i);
420 }
421
422 splinter.optimize();
423
424 dbg!(&splinter);
425
426 let expected = splinter.iter().collect_vec();
427 test_partition_read(&splinter, &expected);
428 test_partition_write(&mut splinter);
429 }
430
431 #[test]
432 fn test_wat() {
433 let mut set_gen = SetGen::new(0xDEAD_BEEF);
434 let set = set_gen.distributed(4, 8, 16, 32);
435 let baseline_size = set.len() * 4;
436
437 let mut splinter = Splinter::from_iter(set.iter().copied());
438 splinter.optimize();
439
440 dbg!(&splinter, splinter.encoded_size(), baseline_size, set.len());
441 itertools::assert_equal(splinter.iter(), set.into_iter());
442 }
443
444 #[quickcheck]
445 fn test_splinter_read_quickcheck(set: Vec<u32>) -> TestResult {
446 let expected = set.iter().copied().sorted().dedup().collect_vec();
447 test_partition_read(&Splinter::from_iter(set), &expected);
448 TestResult::passed()
449 }
450
451 #[quickcheck]
452 fn test_splinter_write_quickcheck(set: Vec<u32>) -> TestResult {
453 let mut splinter = Splinter::from_iter(set);
454 test_partition_write(&mut splinter);
455 TestResult::passed()
456 }
457
458 #[quickcheck]
459 fn test_splinter_quickcheck(set: Vec<u32>) -> bool {
460 let splinter = mksplinter(&set);
461 if set.is_empty() {
462 !splinter.contains(123)
463 } else {
464 let lookup = set[set.len() / 3];
465 splinter.contains(lookup)
466 }
467 }
468
469 #[quickcheck]
470 fn test_splinter_opt_quickcheck(set: Vec<u32>) -> bool {
471 let mut splinter = mksplinter(&set);
472 splinter.optimize();
473 if set.is_empty() {
474 !splinter.contains(123)
475 } else {
476 let lookup = set[set.len() / 3];
477 splinter.contains(lookup)
478 }
479 }
480
481 #[test]
482 fn test_expected_compression() {
483 fn to_roaring(set: impl Iterator<Item = u32>) -> Vec<u8> {
484 let mut buf = std::io::Cursor::new(Vec::new());
485 let mut bmp = RoaringBitmap::from_sorted_iter(set).unwrap();
486 bmp.optimize();
487 bmp.serialize_into(&mut buf).unwrap();
488 buf.into_inner()
489 }
490
491 struct Report {
492 name: &'static str,
493 baseline: usize,
494 splinter: (usize, usize),
496 roaring: (usize, usize),
497
498 splinter_lz4: usize,
499 roaring_lz4: usize,
500 }
501
502 let mut reports = vec![];
503
504 let mut run_test = |name: &'static str,
505 set: Vec<u32>,
506 expected_set_size: usize,
507 expected_splinter: usize,
508 expected_roaring: usize| {
509 assert_eq!(set.len(), expected_set_size, "Set size mismatch");
510
511 let mut splinter = Splinter::from_iter(set.clone());
512 splinter.optimize();
513 itertools::assert_equal(splinter.iter(), set.iter().copied());
514
515 test_partition_read(&splinter, &set);
516 test_partition_write(&mut splinter.clone());
517
518 let expected_size = splinter.encoded_size();
519 let splinter = splinter.encode_to_bytes();
520
521 assert_eq!(
522 splinter.len(),
523 expected_size,
524 "actual encoded size does not match declared encoded size"
525 );
526
527 let roaring = to_roaring(set.iter().copied());
528
529 let splinter_lz4 = lz4::block::compress(&splinter, None, false).unwrap();
530 let roaring_lz4 = lz4::block::compress(&roaring, None, false).unwrap();
531
532 assert_eq!(
534 splinter,
535 lz4::block::decompress(&splinter_lz4, Some(splinter.len() as i32)).unwrap()
536 );
537 assert_eq!(
538 roaring,
539 lz4::block::decompress(&roaring_lz4, Some(roaring.len() as i32)).unwrap()
540 );
541
542 reports.push(Report {
543 name,
544 baseline: set.len() * std::mem::size_of::<u32>(),
545 splinter: (splinter.len(), expected_splinter),
546 roaring: (roaring.len(), expected_roaring),
547
548 splinter_lz4: splinter_lz4.len(),
549 roaring_lz4: roaring_lz4.len(),
550 });
551 };
552
553 let mut set_gen = SetGen::new(0xDEAD_BEEF);
554
555 run_test("empty", vec![], 0, 13, 8);
557
558 let set = set_gen.distributed(1, 1, 1, 1);
560 run_test("1 element", set, 1, 21, 18);
561
562 let set = set_gen.distributed(1, 1, 1, 256);
564 run_test("1 dense block", set, 256, 25, 15);
565
566 let set = set_gen.distributed(1, 1, 1, 128);
568 run_test("1 half full block", set, 128, 63, 255);
569
570 let set = set_gen.distributed(1, 1, 1, 16);
572 run_test("1 sparse block", set, 16, 81, 48);
573
574 let set = set_gen.distributed(1, 1, 8, 128);
576 run_test("8 half full blocks", set, 1024, 315, 2003);
577
578 let set = set_gen.distributed(1, 1, 8, 2);
580 run_test("8 sparse blocks", set, 16, 81, 48);
581
582 let set = set_gen.distributed(4, 4, 4, 128);
584 run_test("64 half full blocks", set, 8192, 2442, 16452);
585
586 let set = set_gen.distributed(4, 4, 4, 2);
588 run_test("64 sparse blocks", set, 128, 434, 392);
589
590 let set = set_gen.distributed(4, 8, 8, 128);
592 run_test("256 half full blocks", set, 32768, 9450, 65580);
593
594 let set = set_gen.distributed(4, 8, 8, 2);
596 run_test("256 sparse blocks", set, 512, 1290, 1288);
597
598 let set = set_gen.distributed(8, 8, 8, 128);
600 run_test("512 half full blocks", set, 65536, 18886, 130810);
601
602 let set = set_gen.distributed(8, 8, 8, 2);
604 run_test("512 sparse blocks", set, 1024, 2566, 2568);
605
606 let elements = 4096;
608
609 let set = set_gen.distributed(1, 1, 16, 256);
611 run_test("fully dense", set, elements, 80, 63);
612
613 let set = set_gen.distributed(1, 1, 32, 128);
615 run_test("128/block; dense", set, elements, 1179, 8208);
616
617 let set = set_gen.distributed(1, 1, 128, 32);
619 run_test("32/block; dense", set, elements, 4539, 8208);
620
621 let set = set_gen.distributed(1, 1, 256, 16);
623 run_test("16/block; dense", set, elements, 5147, 8208);
624
625 let set = set_gen.distributed(1, 32, 1, 128);
627 run_test("128/block; sparse mid", set, elements, 1365, 8282);
628
629 let set = set_gen.distributed(32, 1, 1, 128);
631 run_test("128/block; sparse high", set, elements, 1582, 8224);
632
633 let set = set_gen.distributed(1, 256, 16, 1);
635 run_test("1/block; sparse mid", set, elements, 9749, 10248);
636
637 let set = set_gen.distributed(256, 16, 1, 1);
639 run_test("1/block; sparse high", set, elements, 14350, 40968);
640
641 let set = set_gen.dense(1, 16, 256, 1);
643 run_test("1/block; spread low", set, elements, 8325, 8328);
644
645 let set = set_gen.dense(8, 8, 8, 8);
647 run_test("dense throughout", set, elements, 4113, 2700);
648
649 let set = set_gen.dense(1, 1, 64, 64);
651 run_test("dense low", set, elements, 529, 267);
652
653 let set = set_gen.dense(1, 32, 16, 8);
655 run_test("dense mid/low", set, elements, 4113, 2376);
656
657 run_test("random/32", set_gen.random(32), 32, 145, 328);
659 run_test("random/256", set_gen.random(256), 256, 1041, 2544);
660 run_test("random/1024", set_gen.random(1024), 1024, 4113, 10168);
661 run_test("random/4096", set_gen.random(4096), 4096, 14350, 40056);
662 run_test("random/16384", set_gen.random(16384), 16384, 51214, 148656);
663 run_test("random/65535", set_gen.random(65535), 65535, 198667, 461278);
664
665 let mut fail_test = false;
666
667 println!("{}", "-".repeat(83));
668 println!(
669 "{:30} {:12} {:>6} {:>10} {:>10} {:>10}",
670 "test", "bitmap", "size", "expected", "relative", "ok"
671 );
672 for report in &reports {
673 println!(
674 "{:30} {:12} {:6} {:10} {:>10} {:>10}",
675 report.name,
676 "Splinter",
677 report.splinter.0,
678 report.splinter.1,
679 "1.00",
680 if report.splinter.0 == report.splinter.1 {
681 "ok"
682 } else {
683 fail_test = true;
684 "FAIL"
685 }
686 );
687
688 let diff = report.roaring.0 as f64 / report.splinter.0 as f64;
689 let ok_status = if report.roaring.0 != report.roaring.1 {
690 fail_test = true;
691 "FAIL".into()
692 } else {
693 ratio_to_marks(diff)
694 };
695 println!(
696 "{:30} {:12} {:6} {:10} {:>10.2} {:>10}",
697 "", "Roaring", report.roaring.0, report.roaring.1, diff, ok_status
698 );
699
700 let diff = report.splinter_lz4 as f64 / report.splinter.0 as f64;
701 println!(
702 "{:30} {:12} {:6} {:10} {:>10.2} {:>10}",
703 "",
704 "Splinter LZ4",
705 report.splinter_lz4,
706 report.splinter_lz4,
707 diff,
708 ratio_to_marks(diff)
709 );
710
711 let diff = report.roaring_lz4 as f64 / report.splinter_lz4 as f64;
712 println!(
713 "{:30} {:12} {:6} {:10} {:>10.2} {:>10}",
714 "",
715 "Roaring LZ4",
716 report.roaring_lz4,
717 report.roaring_lz4,
718 diff,
719 ratio_to_marks(diff)
720 );
721
722 let diff = report.baseline as f64 / report.splinter.0 as f64;
723 println!(
724 "{:30} {:12} {:6} {:10} {:>10.2} {:>10}",
725 "",
726 "Baseline",
727 report.baseline,
728 report.baseline,
729 diff,
730 ratio_to_marks(diff)
731 );
732 }
733
734 let avg_ratio = reports
736 .iter()
737 .map(|r| r.splinter_lz4 as f64 / r.splinter.0 as f64)
738 .sum::<f64>()
739 / reports.len() as f64;
740
741 println!("average compression ratio (splinter_lz4 / splinter): {avg_ratio:.2}");
742
743 assert!(!fail_test, "compression test failed");
744 }
745}