sbits 0.2.2

Succinct data structures: near-optimal space with efficient queries.
Documentation
use proptest::prelude::*;
use sbits::bitvec::BitVector;

proptest! {
    #[test]
    fn test_bitvector_rank_property(
        bits in prop::collection::vec(any::<u64>(), 1..100),
        len_mult in 0..64usize,
    ) {
        let len = (bits.len() * 64).saturating_sub(len_mult);
        let bv = BitVector::new(&bits, len);

        // Check total rank
        let mut expected_total = 0;
        for i in 0..len {
            let word_idx = i / 64;
            let bit_idx = i % 64;
            if (bits[word_idx] & (1 << bit_idx)) != 0 {
                expected_total += 1;
            }
        }

        prop_assert_eq!(bv.rank1(len), expected_total);

        // Check individual ranks at random points
        for i in (0..len).step_by(13) {
            let mut expected_rank = 0;
            for j in 0..i {
                let word_idx = j / 64;
                let bit_idx = j % 64;
                if (bits[word_idx] & (1 << bit_idx)) != 0 {
                    expected_rank += 1;
                }
            }
            prop_assert_eq!(bv.rank1(i), expected_rank);
            prop_assert_eq!(bv.rank0(i), i - expected_rank);
        }

        // Check select1 for every set bit
        let mut count = 0;
        for i in 0..len {
            let word_idx = i / 64;
            let bit_idx = i % 64;
            if (bits[word_idx] & (1 << bit_idx)) != 0 {
                prop_assert_eq!(bv.select1(count), Some(i));
                count += 1;
            }
        }
        prop_assert_eq!(bv.select1(count), None);

        // Check select0 for every unset bit
        let mut count0 = 0;
        for i in 0..len {
            let word_idx = i / 64;
            let bit_idx = i % 64;
            if (bits[word_idx] & (1 << bit_idx)) == 0 {
                prop_assert_eq!(bv.select0(count0), Some(i));
                count0 += 1;
            }
        }
        prop_assert_eq!(bv.select0(count0), None);
    }
}

use sbits::elias_fano::EliasFano;
use sbits::partitioned_elias_fano::PartitionedEliasFano;
use sbits::wavelet::WaveletTree;

proptest! {
    #[test]
    fn test_elias_fano_property(
        mut values in prop::collection::vec(0..10000u64, 1..100),
    ) {
        values.sort();
        values.dedup();
        if values.is_empty() { return Ok(()); }

        let universe_size = values.last().copied().unwrap() + 100;
        let ef = EliasFano::new(&values, universe_size);

        prop_assert_eq!(ef.len(), values.len());

        for (i, &expected) in values.iter().enumerate() {
            prop_assert_eq!(ef.get(i).unwrap(), expected);
        }
    }

    #[test]
    fn test_elias_fano_serialization_roundtrip(
        mut values in prop::collection::vec(0..10000u64, 1..50),
    ) {
        values.sort();
        values.dedup();
        if values.is_empty() { return Ok(()); }

        let universe_size = values.last().copied().unwrap() + 100;
        let ef = EliasFano::new(&values, universe_size);
        let bytes = ef.to_bytes();
        let ef2 = EliasFano::from_bytes(&bytes).unwrap();

        prop_assert_eq!(ef2.len(), ef.len());
        for i in 0..ef.len() {
            prop_assert_eq!(ef2.get(i).unwrap(), ef.get(i).unwrap());
        }
    }

    #[test]
    fn test_partitioned_elias_fano_property(
        mut values in prop::collection::vec(0..10000u64, 1..100),
        block_size in 1usize..32,
    ) {
        values.sort();
        values.dedup();
        if values.is_empty() { return Ok(()); }

        let universe_size = values.last().copied().unwrap() + 100;
        let pef = PartitionedEliasFano::new(&values, universe_size, block_size);

        prop_assert_eq!(pef.len(), values.len());

        for (i, &expected) in values.iter().enumerate() {
            prop_assert_eq!(pef.get(i).unwrap(), expected);
        }
    }

    #[test]
    fn test_partitioned_elias_fano_serialization_roundtrip(
        mut values in prop::collection::vec(0..10000u64, 1..50),
        block_size in 1usize..32,
    ) {
        values.sort();
        values.dedup();
        if values.is_empty() { return Ok(()); }

        let universe_size = values.last().copied().unwrap() + 100;
        let pef = PartitionedEliasFano::new(&values, universe_size, block_size);
        let bytes = pef.to_bytes();
        let pef2 = PartitionedEliasFano::from_bytes(&bytes).unwrap();

        prop_assert_eq!(pef2.len(), pef.len());
        for i in 0..pef.len() {
            prop_assert_eq!(pef2.get(i).unwrap(), pef.get(i).unwrap());
        }
    }

    #[test]
    fn test_bitvector_serialization_roundtrip(
        bits in prop::collection::vec(any::<u64>(), 1..20),
        len_mult in 0..64usize,
    ) {
        let len = (bits.len() * 64).saturating_sub(len_mult);
        let bv = BitVector::new(&bits, len);
        let bytes = bv.to_bytes();
        let bv2 = BitVector::from_bytes(&bytes).unwrap();

        prop_assert_eq!(bv2.len(), bv.len());
        prop_assert_eq!(bv2.rank1(len), bv.rank1(len));
        // Spot-check a few positions.
        for i in (0..len).step_by(17) {
            prop_assert_eq!(bv2.rank1(i), bv.rank1(i));
            prop_assert_eq!(bv2.get(i), bv.get(i));
        }
    }

    #[test]
    fn test_wavelet_tree_property(
        input in prop::collection::vec(0..100u32, 1..100),
    ) {
        let sigma = input.iter().max().copied().unwrap_or(0) + 1;
        let wt = WaveletTree::new(&input, sigma);

        prop_assert_eq!(wt.len(), input.len());

        for (i, &expected) in input.iter().enumerate() {
            prop_assert_eq!(wt.access(i), expected);
        }

        // Check rank for each symbol
        for symbol in 0..sigma {
            let mut expected_rank = 0;
            for (i, &v) in input.iter().enumerate() {
                prop_assert_eq!(wt.rank(symbol, i), expected_rank);
                if v == symbol {
                    expected_rank += 1;
                }
            }
            prop_assert_eq!(wt.rank(symbol, input.len()), expected_rank);
        }

        // Check select for each symbol
        for symbol in 0..sigma.min(20) {
            let total = wt.rank(symbol, input.len());
            for k in 0..total {
                let pos = wt.select(symbol, k);
                prop_assert!(pos.is_some(), "select({symbol}, {k}) returned None but rank is {total}");
                let pos = pos.unwrap();
                prop_assert!(pos < input.len());
                prop_assert_eq!(input[pos], symbol);
                prop_assert_eq!(wt.rank(symbol, pos), k);
            }
            prop_assert_eq!(wt.select(symbol, total), None);
        }
    }

    #[test]
    fn test_bitvector_ones_iter(
        bits in prop::collection::vec(any::<u64>(), 1..50),
        len_mult in 0..64usize,
    ) {
        let len = (bits.len() * 64).saturating_sub(len_mult);
        let bv = BitVector::new(&bits, len);

        let from_iter: Vec<usize> = bv.ones().collect();
        let mut expected = Vec::new();
        for i in 0..len {
            if (bits[i / 64] & (1u64 << (i % 64))) != 0 {
                expected.push(i);
            }
        }
        prop_assert_eq!(&from_iter, &expected);
        prop_assert_eq!(from_iter.len(), bv.rank1(len));
    }

    #[test]
    fn test_bitvector_serialization_verifies_select(
        bits in prop::collection::vec(any::<u64>(), 1..20),
        len_mult in 0..64usize,
    ) {
        let len = (bits.len() * 64).saturating_sub(len_mult);
        let bv = BitVector::new(&bits, len);
        let bytes = bv.to_bytes();
        let bv2 = BitVector::from_bytes(&bytes).unwrap();

        // Verify select1 after roundtrip
        let total_ones = bv.rank1(len);
        for k in 0..total_ones.min(50) {
            prop_assert_eq!(bv2.select1(k), bv.select1(k));
        }
        // Verify select0 after roundtrip
        let total_zeros = bv.rank0(len);
        for k in 0..total_zeros.min(50) {
            prop_assert_eq!(bv2.select0(k), bv.select0(k));
        }
    }

    #[test]
    fn test_elias_fano_predecessor_successor(
        mut values in prop::collection::vec(0..10000u64, 1..100),
    ) {
        values.sort();
        values.dedup();
        if values.is_empty() { return Ok(()); }

        let universe_size = values.last().copied().unwrap() + 100;
        let ef = EliasFano::new(&values, universe_size);

        // Successor of each value is itself
        for &v in &values {
            prop_assert_eq!(ef.successor(v), Some(v));
            prop_assert_eq!(ef.predecessor(v), Some(v));
        }

        // Successor of 0 is first value (or None if first > 0)
        if values[0] == 0 {
            prop_assert_eq!(ef.successor(0), Some(0));
        } else {
            prop_assert_eq!(ef.successor(0), Some(values[0]));
            prop_assert_eq!(ef.predecessor(0), None);
        }

        // Successor past the end is None
        let last = *values.last().unwrap();
        prop_assert_eq!(ef.successor(last + 1), None);
        prop_assert_eq!(ef.predecessor(last + 1), Some(last));

        // Iterator produces all values in order
        let from_iter: Vec<u64> = ef.iter().collect();
        prop_assert_eq!(&from_iter, &values);

        // Contains
        for &v in &values {
            prop_assert!(ef.contains(v));
        }
        prop_assert!(!ef.contains(universe_size - 1));
    }

    #[test]
    fn test_partitioned_elias_fano_iter(
        mut values in prop::collection::vec(0..10000u64, 1..100),
        block_size in 1usize..32,
    ) {
        values.sort();
        values.dedup();
        if values.is_empty() { return Ok(()); }

        let universe_size = values.last().copied().unwrap() + 100;
        let pef = PartitionedEliasFano::new(&values, universe_size, block_size);

        let from_iter: Vec<u64> = pef.iter().collect();
        prop_assert_eq!(&from_iter, &values);
        prop_assert_eq!(pef.iter().len(), values.len());
    }

    #[test]
    fn test_wavelet_tree_serialization_roundtrip(
        input in prop::collection::vec(0..50u32, 1..100),
    ) {
        let sigma = input.iter().max().copied().unwrap_or(0) + 1;
        let wt = WaveletTree::new(&input, sigma);
        let bytes = wt.to_bytes();
        let wt2 = WaveletTree::from_bytes(&bytes).unwrap();

        prop_assert_eq!(wt2.len(), wt.len());
        prop_assert_eq!(wt2.sigma(), wt.sigma());
        for i in 0..wt.len() {
            prop_assert_eq!(wt2.access(i), wt.access(i));
        }
        for symbol in 0..sigma.min(10) {
            prop_assert_eq!(wt2.rank(symbol, input.len()), wt.rank(symbol, input.len()));
        }
    }

    #[test]
    fn test_bitvector_from_ones(
        positions in prop::collection::vec(0..1000usize, 0..100),
    ) {
        let len = 1024;
        let bv = BitVector::from_ones(positions.iter().copied(), len);
        for &p in &positions {
            if p < len {
                prop_assert!(bv.get(p), "from_ones: bit {} should be set", p);
            }
        }
        // All ones from iterator should be in the set
        let ones: std::collections::HashSet<usize> = bv.ones().collect();
        for &p in &positions {
            if p < len {
                prop_assert!(ones.contains(&p));
            }
        }
    }

    #[test]
    fn test_bitvector_exact_size_iter(
        bits in prop::collection::vec(any::<u64>(), 1..20),
        len_mult in 0..64usize,
    ) {
        let len = (bits.len() * 64).saturating_sub(len_mult);
        let bv = BitVector::new(&bits, len);

        let ones_iter = bv.ones();
        let expected_ones = bv.rank1(len);
        prop_assert_eq!(ones_iter.len(), expected_ones);

        let zeros_iter = bv.zeros();
        let expected_zeros = bv.rank0(len);
        prop_assert_eq!(zeros_iter.len(), expected_zeros);
    }
}