1use std::convert::TryInto;
23pub mod decode {
4use quick_error::quick_error;
56quick_error! {
7#[derive(Debug)]
8pub enum Error {
9 Corrupt(message: &'static str) {
10 display("{}", message)
11 }
12 }
13 }
14}
1516pub fn decode(data: &[u8]) -> Result<(Vec, &[u8]), decode::Error> {
17use self::decode::Error;
18use crate::decode;
1920let (num_bits, data) = decode::u32(data).ok_or(Error::Corrupt("eof reading amount of bits"))?;
21let (len, data) = decode::u32(data).ok_or(Error::Corrupt("eof reading chunk length"))?;
22let len = len as usize;
2324// NOTE: git does this by copying all bytes first, and then it will change the endianness in a separate loop.
25 // Maybe it's faster, but we can't do it without unsafe. Let's leave it to the optimizer and maybe
26 // one day somebody will find out that it's worth it to use unsafe here.
27let (mut bits, data) = decode::split_at_pos(data, len * std::mem::size_of::<u64>())
28 .ok_or(Error::Corrupt("eof while reading bit data"))?;
29let mut buf = std::vec::Vec::<u64>::with_capacity(len);
30for _ in 0..len {
31let (bit_num, rest) = bits.split_at(std::mem::size_of::<u64>());
32 bits = rest;
33 buf.push(u64::from_be_bytes(bit_num.try_into().unwrap()))
34 }
3536let (rlw, data) = decode::u32(data).ok_or(Error::Corrupt("eof while reading run length width"))?;
3738Ok((
39 Vec {
40 num_bits,
41 bits: buf,
42 rlw: rlw.into(),
43 },
44 data,
45 ))
46}
4748mod access {
49use std::convert::{TryFrom, TryInto};
5051use super::Vec;
5253impl Vec {
54/// Call `f(index)` for each bit that is true, given the index of the bit that identifies it uniquely within the bit array.
55 /// If `f` returns `None` the iteration will be stopped and `None` is returned.
56 ///
57 /// The index is sequential like in any other vector.
58pub fn for_each_set_bit(&self, mut f: impl FnMut(usize) -> Option<()>) -> Option<()> {
59let mut index = 0usize;
60let mut iter = self.bits.iter();
61while let Some(word) = iter.next() {
62if rlw_runbit_is_set(word) {
63let len = rlw_running_len_bits(word);
64for _ in 0..len {
65 f(index)?;
66 index += 1;
67 }
68 } else {
69 index += usize::try_from(rlw_running_len_bits(word)).ok()?;
70 }
7172for _ in 0..rlw_literal_words(word) {
73let word = iter
74 .next()
75 .expect("BUG: ran out of words while going through uncompressed portion");
76for bit_index in 0..64 {
77if word & (1 << bit_index) != 0 {
78 f(index)?;
79 }
80 index += 1;
81 }
82 }
83 }
84Some(())
85 }
8687/// The amount of bits we are currently holding.
88pub fn num_bits(&self) -> usize {
89self.num_bits.try_into().expect("we are not on 16 bit systems")
90 }
91 }
9293#[inline]
94fn rlw_running_len_bits(w: &u64) -> u64 {
95 rlw_running_len(w) * 64
96}
9798#[inline]
99fn rlw_running_len(w: &u64) -> u64 {
100 (w >> 1) & RLW_LARGEST_RUNNING_COUNT
101 }
102103#[inline]
104fn rlw_literal_words(w: &u64) -> u64 {
105 w >> (1 + RLW_RUNNING_BITS)
106 }
107108#[inline]
109fn rlw_runbit_is_set(w: &u64) -> bool {
110 w & 1 == 1
111}
112113const RLW_RUNNING_BITS: u64 = 4 * 8;
114const RLW_LARGEST_RUNNING_COUNT: u64 = (1 << RLW_RUNNING_BITS) - 1;
115}
116117/// A growable collection of u64 that are seen as stream of individual bits.
118#[allow(dead_code)]
119#[derive(Clone)]
120pub struct Vec {
121 num_bits: u32,
122 bits: std::vec::Vec<u64>,
123/// RLW is an offset into the `bits` buffer, so `1` translates into &bits\[1] essentially.
124rlw: u64,
125}