git_bitmap/
ewah.rs

1use std::convert::TryInto;
2
3pub mod decode {
4    use quick_error::quick_error;
5
6    quick_error! {
7        #[derive(Debug)]
8        pub enum Error {
9            Corrupt(message: &'static str) {
10                display("{}", message)
11            }
12        }
13    }
14}
15
16pub fn decode(data: &[u8]) -> Result<(Vec, &[u8]), decode::Error> {
17    use self::decode::Error;
18    use crate::decode;
19
20    let (num_bits, data) = decode::u32(data).ok_or(Error::Corrupt("eof reading amount of bits"))?;
21    let (len, data) = decode::u32(data).ok_or(Error::Corrupt("eof reading chunk length"))?;
22    let len = len as usize;
23
24    // NOTE: git does this by copying all bytes first, and then it will change the endianness in a separate loop.
25    //       Maybe it's faster, but we can't do it without unsafe. Let's leave it to the optimizer and maybe
26    //       one day somebody will find out that it's worth it to use unsafe here.
27    let (mut bits, data) = decode::split_at_pos(data, len * std::mem::size_of::<u64>())
28        .ok_or(Error::Corrupt("eof while reading bit data"))?;
29    let mut buf = std::vec::Vec::<u64>::with_capacity(len);
30    for _ in 0..len {
31        let (bit_num, rest) = bits.split_at(std::mem::size_of::<u64>());
32        bits = rest;
33        buf.push(u64::from_be_bytes(bit_num.try_into().unwrap()))
34    }
35
36    let (rlw, data) = decode::u32(data).ok_or(Error::Corrupt("eof while reading run length width"))?;
37
38    Ok((
39        Vec {
40            num_bits,
41            bits: buf,
42            rlw: rlw.into(),
43        },
44        data,
45    ))
46}
47
48mod access {
49    use std::convert::{TryFrom, TryInto};
50
51    use super::Vec;
52
53    impl Vec {
54        /// Call `f(index)` for each bit that is true, given the index of the bit that identifies it uniquely within the bit array.
55        /// If `f` returns `None` the iteration will be stopped and `None` is returned.
56        ///
57        /// The index is sequential like in any other vector.
58        pub fn for_each_set_bit(&self, mut f: impl FnMut(usize) -> Option<()>) -> Option<()> {
59            let mut index = 0usize;
60            let mut iter = self.bits.iter();
61            while let Some(word) = iter.next() {
62                if rlw_runbit_is_set(word) {
63                    let len = rlw_running_len_bits(word);
64                    for _ in 0..len {
65                        f(index)?;
66                        index += 1;
67                    }
68                } else {
69                    index += usize::try_from(rlw_running_len_bits(word)).ok()?;
70                }
71
72                for _ in 0..rlw_literal_words(word) {
73                    let word = iter
74                        .next()
75                        .expect("BUG: ran out of words while going through uncompressed portion");
76                    for bit_index in 0..64 {
77                        if word & (1 << bit_index) != 0 {
78                            f(index)?;
79                        }
80                        index += 1;
81                    }
82                }
83            }
84            Some(())
85        }
86
87        /// The amount of bits we are currently holding.
88        pub fn num_bits(&self) -> usize {
89            self.num_bits.try_into().expect("we are not on 16 bit systems")
90        }
91    }
92
93    #[inline]
94    fn rlw_running_len_bits(w: &u64) -> u64 {
95        rlw_running_len(w) * 64
96    }
97
98    #[inline]
99    fn rlw_running_len(w: &u64) -> u64 {
100        (w >> 1) & RLW_LARGEST_RUNNING_COUNT
101    }
102
103    #[inline]
104    fn rlw_literal_words(w: &u64) -> u64 {
105        w >> (1 + RLW_RUNNING_BITS)
106    }
107
108    #[inline]
109    fn rlw_runbit_is_set(w: &u64) -> bool {
110        w & 1 == 1
111    }
112
113    const RLW_RUNNING_BITS: u64 = 4 * 8;
114    const RLW_LARGEST_RUNNING_COUNT: u64 = (1 << RLW_RUNNING_BITS) - 1;
115}
116
117/// A growable collection of u64 that are seen as stream of individual bits.
118#[allow(dead_code)]
119#[derive(Clone)]
120pub struct Vec {
121    num_bits: u32,
122    bits: std::vec::Vec<u64>,
123    /// RLW is an offset into the `bits` buffer, so `1` translates into &bits\[1] essentially.
124    rlw: u64,
125}