indexed_bitvec_core/
bits_ref.rs

1/*
2   Copyright 2018 DarkOtter
3
4   Licensed under the Apache License, Version 2.0 (the "License");
5   you may not use this file except in compliance with the License.
6   You may obtain a copy of the License at
7
8       http://www.apache.org/licenses/LICENSE-2.0
9
10   Unless required by applicable law or agreed to in writing, software
11   distributed under the License is distributed on an "AS IS" BASIS,
12   WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13   See the License for the specific language governing permissions and
14   limitations under the License.
15*/
16//! Type to represent a reference to some bits, and basic count/rank/select functions for it.
17use super::ceil_div_u64;
18use crate::bytes;
19use crate::ones_or_zeros::{OneBits, OnesOrZeros, ZeroBits};
20
21/// Bits stored as a sequence of bytes (most significant bit first).
22#[derive(Copy, Clone, Debug)]
23pub struct BitsRef<'a>((&'a [u8], u64));
24
25impl<'a> From<BitsRef<'a>> for (&'a [u8], u64) {
26    fn from(bits: BitsRef<'a>) -> Self {
27        bits.0
28    }
29}
30
31fn big_enough(bytes: &[u8], len: u64) -> bool {
32    ceil_div_u64(len, 8) <= bytes.len() as u64
33}
34
35impl<'a> BitsRef<'a> {
36    pub fn from_bytes(bytes: &'a [u8], len: u64) -> Option<Self> {
37        if big_enough(bytes, len) {
38            Some(BitsRef((bytes, len)))
39        } else {
40            None
41        }
42    }
43
44    /// All of the bytes stored in the byte sequence: not just the ones actually used.
45    #[inline]
46    pub fn all_bytes(self) -> &'a [u8] {
47        (self.0).0
48    }
49
50    /// The number of bits used in the storage.
51    #[inline]
52    pub fn len(self) -> u64 {
53        (self.0).1
54    }
55
56    /// The used bytes of the byte sequence: bear in mind some of the bits in the
57    /// last byte may be unused.
58    #[inline]
59    pub fn bytes(self) -> &'a [u8] {
60        let all_bytes = self.all_bytes();
61        debug_assert!(big_enough(all_bytes, self.len()));
62        // This will not overflow because we checked the size
63        // when we made self...
64        &all_bytes[..ceil_div_u64(self.len(), 8) as usize]
65    }
66
67    /// Get the byte at a specific index.
68    ///
69    /// Returns `None` for out-of-bounds.
70    #[inline]
71    pub fn get(self, idx_bits: u64) -> Option<bool> {
72        if idx_bits >= self.len() {
73            None
74        } else {
75            debug_assert!(big_enough(self.all_bytes(), self.len()));
76            Some(bytes::get_unchecked(self.all_bytes(), idx_bits))
77        }
78    }
79
80    /// Count the set bits (*O(n)*).
81    pub fn count_ones(self) -> u64 {
82        if (self.len() % 8) != 0 {
83            bytes::rank_ones(self.all_bytes(), self.len())
84                .expect("Internally called rank out-of-range")
85        } else {
86            bytes::count_ones(self.bytes())
87        }
88    }
89
90    /// Count the unset bits (*O(n)*).
91    #[inline]
92    pub fn count_zeros(self) -> u64 {
93        ZeroBits::convert_count(self.count_ones(), self.len())
94    }
95
96    /// Count the set bits before a position in the bits (*O(n)*).
97    ///
98    /// Returns `None` it the index is out of bounds.
99    pub fn rank_ones(self, idx: u64) -> Option<u64> {
100        if idx >= self.len() {
101            None
102        } else {
103            Some(
104                bytes::rank_ones(self.all_bytes(), idx)
105                    .expect("Internally called rank out-of-range"),
106            )
107        }
108    }
109
110    /// Count the unset bits before a position in the bits (*O(n)*).
111    ///
112    /// Returns `None` it the index is out of bounds.
113    #[inline]
114    pub fn rank_zeros(self, idx: u64) -> Option<u64> {
115        self.rank_ones(idx)
116            .map(|rank_ones| ZeroBits::convert_count(rank_ones, idx))
117    }
118
119    pub(crate) fn select<W: OnesOrZeros>(self, target_rank: u64) -> Option<u64> {
120        let res = bytes::select::<W>(self.bytes(), target_rank);
121        match res {
122            None => None,
123            Some(res) => {
124                if res >= self.len() {
125                    None
126                } else {
127                    Some(res)
128                }
129            }
130        }
131    }
132
133    /// Find the position of a set bit by its rank (*O(n)*).
134    ///
135    /// Returns `None` if no suitable bit is found. It is
136    /// always the case otherwise that `rank_ones(result) == Some(target_rank)`
137    /// and `get(result) == Some(true)`.
138    pub fn select_ones(self, target_rank: u64) -> Option<u64> {
139        self.select::<OneBits>(target_rank)
140    }
141
142    /// Find the position of an unset bit by its rank (*O(n)*).
143    ///
144    /// Returns `None` if no suitable bit is found. It is
145    /// always the case otherwise that `rank_zeros(result) == Some(target_rank)`
146    /// and `get(result) == Some(false)`.
147    pub fn select_zeros(self, target_rank: u64) -> Option<u64> {
148        self.select::<ZeroBits>(target_rank)
149    }
150}
151
152use core::cmp::{min, Ord, Ordering};
153
154fn must_have_or_bug<T>(opt: Option<T>) -> T {
155    opt.expect("If this is None there is a bug in Bits implementation")
156}
157
158impl<'a> core::cmp::Ord for BitsRef<'a> {
159    fn cmp(&self, other: &Self) -> Ordering {
160        let common_len = min(self.len(), other.len());
161        let common_full_byte_len = (common_len / 8) as usize;
162
163        let full_bytes_self = &(self.all_bytes())[..common_full_byte_len];
164        let full_bytes_other = &(other.all_bytes())[..common_full_byte_len];
165        match full_bytes_self.cmp(full_bytes_other) {
166            Ordering::Equal => (),
167            r => return r,
168        };
169
170        for idx in ((common_full_byte_len * 8) as u64)..common_len {
171            let self_bit = must_have_or_bug(self.get(idx));
172            let other_bit = must_have_or_bug(other.get(idx));
173            match self_bit.cmp(&other_bit) {
174                Ordering::Equal => (),
175                r => return r,
176            }
177        }
178
179        self.len().cmp(&other.len())
180    }
181}
182
183impl<'a> core::cmp::PartialOrd for BitsRef<'a> {
184    fn partial_cmp(&self, other: &Self) -> Option<Ordering> {
185        Some(self.cmp(other))
186    }
187}
188
189impl<'a> core::cmp::Eq for BitsRef<'a> {}
190
191impl<'a> core::cmp::PartialEq for BitsRef<'a> {
192    fn eq(&self, other: &Self) -> bool {
193        self.len() == other.len() && self.cmp(other) == Ordering::Equal
194    }
195}
196
197#[cfg(test)]
198mod tests {
199    use super::*;
200    use quickcheck::Arbitrary;
201    use std::boxed::Box;
202    use std::vec::Vec;
203
204    fn from_or_panic<'a, T: ?Sized + std::ops::Deref<Target = [u8]>>(
205        bytes: &'a T,
206        len: u64,
207    ) -> BitsRef<'a> {
208        BitsRef::from_bytes(bytes.deref(), len).expect("Tried to make an invalid BitsRef in tests")
209    }
210
211    mod gen_bits {
212        use super::*;
213
214        #[derive(Clone, Debug)]
215        pub struct GenBits(Box<[u8]>, u64);
216
217        impl GenBits {
218            pub fn as_ref<'a>(&'a self) -> BitsRef<'a> {
219                from_or_panic(&self.0, self.1)
220            }
221        }
222
223        impl Arbitrary for GenBits {
224            fn arbitrary<G: quickcheck::Gen>(g: &mut G) -> Self {
225                use rand::Rng;
226                let data = <Vec<u8>>::arbitrary(g);
227                let all_bits = data.len() as u64 * 8;
228                let overflow = g.gen_range(0, 64);
229                GenBits(data.into_boxed_slice(), all_bits.saturating_sub(overflow))
230            }
231        }
232    }
233    pub use self::gen_bits::GenBits;
234
235    #[test]
236    fn test_get() {
237        let pattern_a = vec![0x80, 0x40, 0x20, 0x10, 0x08, 0x04, 0x02, 0x01];
238        let bits_a = from_or_panic(&pattern_a, 8 * 8);
239        for i in 0..bits_a.len() {
240            assert_eq!(
241                bits_a.get(i).unwrap(),
242                i / 8 == i % 8,
243                "Differed at position {}",
244                i
245            )
246        }
247
248        let pattern_b = vec![0xff, 0xc0];
249        let bits_b = from_or_panic(&pattern_b, 10);
250        for i in 0..10 {
251            assert_eq!(bits_b.get(i), Some(true), "Differed at position {}", i)
252        }
253        for i in 10..16 {
254            assert_eq!(bits_b.get(i), None, "Differed at position {}", i)
255        }
256    }
257
258    #[test]
259    fn test_count() {
260        let pattern_a = [0xff, 0xaau8];
261        let bytes_a = &pattern_a[..];
262        let make = |len: u64| from_or_panic(&bytes_a, len);
263        assert_eq!(12, make(16).count_ones());
264        assert_eq!(4, make(16).count_zeros());
265        assert_eq!(12, make(15).count_ones());
266        assert_eq!(3, make(15).count_zeros());
267        assert_eq!(11, make(14).count_ones());
268        assert_eq!(3, make(14).count_zeros());
269        assert_eq!(11, make(13).count_ones());
270        assert_eq!(2, make(13).count_zeros());
271        assert_eq!(10, make(12).count_ones());
272        assert_eq!(2, make(12).count_zeros());
273        assert_eq!(10, make(11).count_ones());
274        assert_eq!(1, make(11).count_zeros());
275        assert_eq!(9, make(10).count_ones());
276        assert_eq!(1, make(10).count_zeros());
277        assert_eq!(9, make(9).count_ones());
278        assert_eq!(0, make(9).count_zeros());
279        assert_eq!(8, make(8).count_ones());
280        assert_eq!(0, make(8).count_zeros());
281        assert_eq!(7, make(7).count_ones());
282        assert_eq!(0, make(7).count_zeros());
283        assert_eq!(0, make(0).count_ones());
284        assert_eq!(0, make(0).count_zeros());
285    }
286
287    #[test]
288    fn test_rank() {
289        let pattern_a = vec![0xff, 0xaau8];
290        let make = |len: u64| from_or_panic(&pattern_a, len);
291        let bits_a = make(16);
292        for i in 0..15 {
293            assert_eq!(Some(make(i).count_ones()), bits_a.rank_ones(i));
294            assert_eq!(Some(make(i).count_zeros()), bits_a.rank_zeros(i));
295        }
296        assert_eq!(None, bits_a.rank_ones(16));
297        assert_eq!(None, bits_a.rank_zeros(16));
298        assert_eq!(None, make(13).rank_ones(13));
299        assert_eq!(None, make(13).rank_zeros(13));
300        assert_eq!(bits_a.rank_ones(12), make(13).rank_ones(12));
301        assert_eq!(bits_a.rank_zeros(12), make(13).rank_zeros(12));
302    }
303
304    #[test]
305    fn test_select() {
306        let pattern_a = [0xff, 0xaau8];
307        let bytes_a = &pattern_a[..];
308        let make = |len: u64| from_or_panic(&bytes_a, len);
309        assert_eq!(Some(14), make(16).select_ones(11));
310        assert_eq!(None, make(14).select_ones(11));
311    }
312
313    quickcheck! {
314        fn fuzz_test(bits: GenBits) -> () {
315            let bits = bits.as_ref();
316            let mut running_rank_ones = 0;
317            let mut running_rank_zeros = 0;
318            for idx in 0..bits.len() {
319                assert_eq!(Some(running_rank_ones), bits.rank_ones(idx));
320                assert_eq!(Some(running_rank_zeros), bits.rank_zeros(idx));
321                if bits.get(idx).unwrap() {
322                    assert_eq!(Some(idx), bits.select_ones(running_rank_ones));
323                    running_rank_ones += 1;
324                } else {
325                    assert_eq!(Some(idx), bits.select_zeros(running_rank_zeros));
326                    running_rank_zeros += 1;
327                }
328            }
329        }
330    }
331
332    impl<'a> BitsRef<'a> {
333        fn to_bool_vec_slow(self) -> Vec<bool> {
334            (0..self.len()).map(|idx| self.get(idx).unwrap()).collect()
335        }
336    }
337
338    quickcheck! {
339        fn test_cmp_eq_pair(l: GenBits, r: GenBits) -> () {
340            let l = l.as_ref();
341            let r = r.as_ref();
342            let l_vec = l.to_bool_vec_slow();
343            let r_vec = r.to_bool_vec_slow();
344            assert_eq!(l_vec.cmp(&r_vec), l.cmp(&r));
345            assert_eq!(l_vec.eq(&r_vec), l.eq(&r));
346        }
347
348        fn test_cmp_eq_single(l: GenBits) -> () {
349            let l = l.as_ref();
350            let r = l;
351            let l_vec = l.to_bool_vec_slow();
352            let r_vec = r.to_bool_vec_slow();
353            assert_eq!(l_vec.cmp(&r_vec), l.cmp(&r));
354            assert_eq!(l_vec.eq(&r_vec), l.eq(&r));
355        }
356    }
357
358    #[test]
359    fn test_eq_cmp() {
360        fn check<'a>(expected: Ordering, l: BitsRef<'a>, r: BitsRef<'a>) {
361            let expected_eq = match expected {
362                Ordering::Equal => true,
363                _ => false,
364            };
365            assert_eq!(expected_eq, l.eq(&r));
366            assert_eq!(expected, l.cmp(&r));
367        }
368
369        // Should ignore extra bits
370        check(
371            Ordering::Equal,
372            from_or_panic(&vec![0xff, 0xf0], 12),
373            from_or_panic(&vec![0xff, 0xff], 12),
374        );
375
376        check(
377            Ordering::Equal,
378            from_or_panic(&vec![], 0),
379            from_or_panic(&vec![], 0),
380        );
381        check(
382            Ordering::Less,
383            from_or_panic(&vec![0xff], 0),
384            from_or_panic(&vec![0xff], 1),
385        );
386        check(
387            Ordering::Greater,
388            from_or_panic(&vec![0xff], 1),
389            from_or_panic(&vec![0xff], 0),
390        );
391        check(
392            Ordering::Equal,
393            from_or_panic(&vec![0xff], 1),
394            from_or_panic(&vec![0xff], 1),
395        );
396        check(
397            Ordering::Less,
398            from_or_panic(&vec![0x00], 1),
399            from_or_panic(&vec![0xff], 1),
400        );
401        check(
402            Ordering::Greater,
403            from_or_panic(&vec![0xff], 1),
404            from_or_panic(&vec![0x00], 1),
405        );
406    }
407}
408
409#[cfg(test)]
410pub use self::tests::GenBits;