1use crate::containers::vbyte::{encode_vbyte, read_vbyte};
3use bytesize::ByteSize;
4use mem_dbg::{MemSize, SizeFlags};
5use qwt::{AccessBin, BitVector, BitVectorMut, RankBin, SelectBin, bitvector::rs_narrow::RSNarrow};
6#[cfg(feature = "cache")]
7use serde::{Deserialize, Serialize};
8use std::fmt;
9use std::io::BufRead;
10use std::mem::size_of;
11
12#[derive(Clone)]
14#[cfg_attr(feature = "cache", derive(Serialize, Deserialize))]
15pub struct Bitmap {
16 pub dict: RSNarrow,
18}
19
20pub type Result<T> = core::result::Result<T, Error>;
21
22#[derive(thiserror::Error, Debug)]
24pub enum Error {
25 #[error("IO error")]
26 Io(#[from] std::io::Error),
27 #[error("Invalid CRC8-CCIT checksum {0}, expected {1}")]
28 InvalidCrc8Checksum(u8, u8),
29 #[error("Invalid CRC32C checksum {0}, expected {1}")]
30 InvalidCrc32Checksum(u32, u32),
31 #[error("Failed to turn raw bytes into u64")]
32 TryFromSliceError(#[from] std::array::TryFromSliceError),
33 #[error("Read unsupported bitmap type {0} != 1")]
34 UnsupportedBitmapType(u8),
35}
36
37impl fmt::Debug for Bitmap {
38 fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
39 write!(f, "{}, {} bits", ByteSize(self.size_in_bytes() as u64), self.len())
40 }
41}
42
43impl From<BitVector> for Bitmap {
44 fn from(bv: BitVector) -> Self {
45 Bitmap { dict: bv.into() }
46 }
47}
48
49impl From<BitVectorMut> for Bitmap {
50 fn from(bv: BitVectorMut) -> Self {
51 Bitmap { dict: <BitVectorMut as Into<BitVector>>::into(bv).into() }
52 }
53}
54
55impl Bitmap {
56 pub fn new(data: Vec<u64>) -> Self {
58 let mut v = BitVectorMut::new();
59 for d in data {
60 v.append_bits(d, 64);
61 }
62 let dict: BitVector = v.into();
63 Bitmap { dict: dict.into() }
64 }
65
66 pub fn size_in_bytes(&self) -> usize {
68 self.dict.mem_size(SizeFlags::default())
69 }
70
71 pub fn len(&self) -> usize {
73 self.dict.n_zeros() + self.dict.n_ones() }
76
77 pub fn num_ones(&self) -> usize {
79 self.dict.n_ones()
80 }
81
82 pub fn select1(&self, k: usize) -> Option<usize> {
84 self.dict.select1(k)
85 }
86
87 pub fn rank(&self, k: usize) -> usize {
89 self.dict.rank1(k).unwrap_or_else(|| panic!("Out of bounds position: {} >= {}", k, self.len()))
90 }
91
92 pub fn at_last_sibling(&self, word_index: usize) -> bool {
94 self.dict.get(word_index).expect("word index out of bounds")
95 }
96
97 pub fn read<R: BufRead>(reader: &mut R) -> Result<Self> {
99 use Error::*;
100 let mut history: Vec<u8> = Vec::with_capacity(5);
101
102 let mut bitmap_type = [0u8];
104 reader.read_exact(&mut bitmap_type)?;
105 history.extend_from_slice(&bitmap_type);
106 if bitmap_type[0] != 1 {
107 return Err(UnsupportedBitmapType(bitmap_type[0]));
108 }
109
110 let (num_bits, bytes_read) = read_vbyte(reader)?;
112 history.extend_from_slice(&bytes_read);
113
114 let mut crc_code = [0_u8];
116 reader.read_exact(&mut crc_code)?;
117 let crc_code = crc_code[0];
118
119 let crc8 = crc::Crc::<u8>::new(&crc::CRC_8_SMBUS);
121 let mut digest = crc8.digest();
122 digest.update(&history);
123 let crc_calculated = digest.finalize();
124 if crc_calculated != crc_code {
125 return Err(InvalidCrc8Checksum(crc_calculated, crc_code));
126 }
127
128 let full_byte_amount = ((num_bits - 1) >> 6) * 8;
130 let mut full_words = vec![0_u8; full_byte_amount];
131 let mut data: Vec<u64> = Vec::with_capacity(full_byte_amount / 8 + usize::from(full_byte_amount % 8 != 0));
133 reader.read_exact(&mut full_words)?;
134
135 for word in full_words.chunks_exact(size_of::<u64>()) {
136 data.push(u64::from_le_bytes(<[u8; 8]>::try_from(word)?));
137 }
138
139 let crc32 = crc::Crc::<u32>::new(&crc::CRC_32_ISCSI);
141 let mut digest = crc32.digest();
142 digest.update(&full_words);
143
144 let mut bits_read = 0;
145 let mut last_value: u64 = 0;
146 let last_word_bits = if num_bits == 0 { 0 } else { ((num_bits - 1) % 64) + 1 };
147
148 while bits_read < last_word_bits {
149 let mut buffer = [0u8];
150 reader.read_exact(&mut buffer)?;
151 digest.update(&buffer);
152 last_value |= (buffer[0] as u64) << bits_read;
153 bits_read += 8;
154 }
155 data.push(last_value);
156
157 let mut crc_code = [0_u8; 4];
159 reader.read_exact(&mut crc_code)?;
160 let crc_code = u32::from_le_bytes(crc_code);
161
162 let crc_calculated = digest.finalize();
165 if crc_calculated != crc_code {
166 return Err(InvalidCrc32Checksum(crc_calculated, crc_code));
167 }
168 Ok(Self::new(data))
169 }
170
171 pub fn write(&self, w: &mut impl std::io::Write) -> Result<()> {
172 let crc = crc::Crc::<u8>::new(&crc::CRC_8_SMBUS);
173 let mut hasher = crc.digest();
174 let bitmap_type: [u8; 1] = [1];
176 w.write_all(&bitmap_type)?;
177 hasher.update(&bitmap_type);
178 let t = encode_vbyte(self.len());
180 w.write_all(&t)?;
181 hasher.update(&t);
182 let checksum = hasher.finalize();
184 w.write_all(&checksum.to_le_bytes())?;
185
186 let crc32 = crc::Crc::<u32>::new(&crc::CRC_32_ISCSI);
188 let mut hasher = crc32.digest();
189
190 let words: &[u64] = self.dict.bit_vector().words();
191 let num_bytes = self.dict.bit_vector().len().div_ceil(8); let bytes = unsafe {
193 std::slice::from_raw_parts(words.as_ptr().cast::<u8>(), num_bytes) };
195
196 w.write_all(bytes)?;
197 hasher.update(bytes);
198 let crc_code = hasher.finalize();
199 let crc_code = crc_code.to_le_bytes();
200 w.write_all(&crc_code)?;
201 Ok(())
202 }
203}
204
205#[cfg(test)]
206mod tests {
207 use super::*;
208 use crate::tests::init;
209 use pretty_assertions::assert_eq;
210
211 #[test]
212 fn bugtest() {
213 let mut v = BitVectorMut::new();
214 v.push(true);
215 v.push(false);
216 let bv: BitVector = v.into();
217 let rs: RSNarrow = bv.into();
218 rs.n_zeros();
219 }
220
221 #[test]
222 fn write() -> color_eyre::Result<()> {
223 init();
224 let bits: Vec<u64> = vec![0b10111];
225 let bitmap = Bitmap::new(bits);
226 assert_eq!(bitmap.len(), 64);
227 assert_eq!(bitmap.select1(0).unwrap(), 0);
230 assert_eq!(bitmap.select1(1).unwrap(), 1);
231 assert_eq!(bitmap.select1(2).unwrap(), 2);
232 assert_eq!(bitmap.select1(3).unwrap(), 4);
233 assert_eq!(bitmap.select1(4), None);
234 assert_eq!(bitmap.rank(1), 1);
236 assert_eq!(bitmap.rank(5), 4);
237 let mut buf = Vec::<u8>::new();
238 bitmap.write(&mut buf)?;
239 let bitmap2 = Bitmap::read(&mut std::io::Cursor::new(buf))?;
240 assert_eq!(bitmap.dict, bitmap2.dict);
242 Ok(())
243 }
244}