use crate::containers::vbyte::{encode_vbyte, read_vbyte};
use bytesize::ByteSize;
use mem_dbg::{MemSize, SizeFlags};
use qwt::{AccessBin, BitVector, BitVectorMut, RankBin, SelectBin, bitvector::rs_narrow::RSNarrow};
#[cfg(feature = "cache")]
use serde::{Deserialize, Serialize};
use std::fmt;
use std::io::BufRead;
use std::mem::size_of;
#[derive(Clone)]
#[cfg_attr(feature = "cache", derive(Serialize, Deserialize))]
pub struct Bitmap {
pub dict: RSNarrow,
}
pub type Result<T> = core::result::Result<T, Error>;
#[derive(thiserror::Error, Debug)]
pub enum Error {
#[error("IO error")]
Io(#[from] std::io::Error),
#[error("Invalid CRC8-CCIT checksum {0}, expected {1}")]
InvalidCrc8Checksum(u8, u8),
#[error("Invalid CRC32C checksum {0}, expected {1}")]
InvalidCrc32Checksum(u32, u32),
#[error("Failed to turn raw bytes into u64")]
TryFromSliceError(#[from] std::array::TryFromSliceError),
#[error("Read unsupported bitmap type {0} != 1")]
UnsupportedBitmapType(u8),
}
impl fmt::Debug for Bitmap {
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
write!(f, "{}, {} bits", ByteSize(self.size_in_bytes() as u64), self.len())
}
}
impl From<BitVector> for Bitmap {
fn from(bv: BitVector) -> Self {
Bitmap { dict: bv.into() }
}
}
impl From<BitVectorMut> for Bitmap {
fn from(bv: BitVectorMut) -> Self {
Bitmap { dict: <BitVectorMut as Into<BitVector>>::into(bv).into() }
}
}
impl Bitmap {
pub fn new(data: Vec<u64>) -> Self {
let mut v = BitVectorMut::new();
for d in data {
v.append_bits(d, 64);
}
let dict: BitVector = v.into();
Bitmap { dict: dict.into() }
}
pub fn size_in_bytes(&self) -> usize {
self.dict.mem_size(SizeFlags::default())
}
pub fn len(&self) -> usize {
self.dict.n_zeros() + self.dict.n_ones() }
pub fn num_ones(&self) -> usize {
self.dict.n_ones()
}
pub fn select1(&self, k: usize) -> Option<usize> {
self.dict.select1(k)
}
pub fn rank(&self, k: usize) -> usize {
self.dict.rank1(k).unwrap_or_else(|| panic!("Out of bounds position: {} >= {}", k, self.len()))
}
pub fn at_last_sibling(&self, word_index: usize) -> bool {
self.dict.get(word_index).expect("word index out of bounds")
}
pub fn read<R: BufRead>(reader: &mut R) -> Result<Self> {
use Error::*;
let mut history: Vec<u8> = Vec::with_capacity(5);
let mut bitmap_type = [0u8];
reader.read_exact(&mut bitmap_type)?;
history.extend_from_slice(&bitmap_type);
if bitmap_type[0] != 1 {
return Err(UnsupportedBitmapType(bitmap_type[0]));
}
let (num_bits, bytes_read) = read_vbyte(reader)?;
history.extend_from_slice(&bytes_read);
let mut crc_code = [0_u8];
reader.read_exact(&mut crc_code)?;
let crc_code = crc_code[0];
let crc8 = crc::Crc::<u8>::new(&crc::CRC_8_SMBUS);
let mut digest = crc8.digest();
digest.update(&history);
let crc_calculated = digest.finalize();
if crc_calculated != crc_code {
return Err(InvalidCrc8Checksum(crc_calculated, crc_code));
}
let full_byte_amount = ((num_bits - 1) >> 6) * 8;
let mut full_words = vec![0_u8; full_byte_amount];
let mut data: Vec<u64> = Vec::with_capacity(full_byte_amount / 8 + usize::from(full_byte_amount % 8 != 0));
reader.read_exact(&mut full_words)?;
for word in full_words.chunks_exact(size_of::<u64>()) {
data.push(u64::from_le_bytes(<[u8; 8]>::try_from(word)?));
}
let crc32 = crc::Crc::<u32>::new(&crc::CRC_32_ISCSI);
let mut digest = crc32.digest();
digest.update(&full_words);
let mut bits_read = 0;
let mut last_value: u64 = 0;
let last_word_bits = if num_bits == 0 { 0 } else { ((num_bits - 1) % 64) + 1 };
while bits_read < last_word_bits {
let mut buffer = [0u8];
reader.read_exact(&mut buffer)?;
digest.update(&buffer);
last_value |= (buffer[0] as u64) << bits_read;
bits_read += 8;
}
data.push(last_value);
let mut crc_code = [0_u8; 4];
reader.read_exact(&mut crc_code)?;
let crc_code = u32::from_le_bytes(crc_code);
let crc_calculated = digest.finalize();
if crc_calculated != crc_code {
return Err(InvalidCrc32Checksum(crc_calculated, crc_code));
}
Ok(Self::new(data))
}
pub fn write(&self, w: &mut impl std::io::Write) -> Result<()> {
let crc = crc::Crc::<u8>::new(&crc::CRC_8_SMBUS);
let mut hasher = crc.digest();
let bitmap_type: [u8; 1] = [1];
w.write_all(&bitmap_type)?;
hasher.update(&bitmap_type);
let t = encode_vbyte(self.len());
w.write_all(&t)?;
hasher.update(&t);
let checksum = hasher.finalize();
w.write_all(&checksum.to_le_bytes())?;
let crc32 = crc::Crc::<u32>::new(&crc::CRC_32_ISCSI);
let mut hasher = crc32.digest();
let words: &[u64] = self.dict.bit_vector().words();
let num_bytes = self.dict.bit_vector().len().div_ceil(8); let bytes = unsafe {
std::slice::from_raw_parts(words.as_ptr().cast::<u8>(), num_bytes) };
w.write_all(bytes)?;
hasher.update(bytes);
let crc_code = hasher.finalize();
let crc_code = crc_code.to_le_bytes();
w.write_all(&crc_code)?;
Ok(())
}
}
#[cfg(test)]
mod tests {
use super::*;
use crate::tests::init;
use pretty_assertions::assert_eq;
#[test]
fn bugtest() {
let mut v = BitVectorMut::new();
v.push(true);
v.push(false);
let bv: BitVector = v.into();
let rs: RSNarrow = bv.into();
rs.n_zeros();
}
#[test]
fn write() -> color_eyre::Result<()> {
init();
let bits: Vec<u64> = vec![0b10111];
let bitmap = Bitmap::new(bits);
assert_eq!(bitmap.len(), 64);
assert_eq!(bitmap.select1(0).unwrap(), 0);
assert_eq!(bitmap.select1(1).unwrap(), 1);
assert_eq!(bitmap.select1(2).unwrap(), 2);
assert_eq!(bitmap.select1(3).unwrap(), 4);
assert_eq!(bitmap.select1(4), None);
assert_eq!(bitmap.rank(1), 1);
assert_eq!(bitmap.rank(5), 4);
let mut buf = Vec::<u8>::new();
bitmap.write(&mut buf)?;
let bitmap2 = Bitmap::read(&mut std::io::Cursor::new(buf))?;
assert_eq!(bitmap.dict, bitmap2.dict);
Ok(())
}
}