use crate::{analyzer::SizeEstimationParameters, schema::BitOrder};
use bitstream_io::{BigEndian, BitRead, BitReader, BitWrite, BitWriter, LittleEndian};
use lossless_transform_utils::{
entropy::code_length_of_histogram32,
histogram::{histogram32_from_bytes, Histogram32},
};
use std::io::{self, Cursor, SeekFrom};
pub fn size_estimate(params: SizeEstimationParameters) -> usize {
let bytes_after_lz =
params.data_len - (params.num_lz_matches as f64 * params.lz_match_multiplier) as usize;
(bytes_after_lz as f64 * params.entropy * params.entropy_multiplier).ceil() as usize / 8
}
pub fn get_zstd_compressed_size(data: &[u8], level: i32) -> u64 {
zstd::bulk::compress(data, level)
.ok()
.map(|compressed| compressed.len())
.unwrap() as u64
}
pub fn calculate_file_entropy(bytes: &[u8]) -> f64 {
let mut histogram = Histogram32::default();
histogram32_from_bytes(bytes, &mut histogram);
code_length_of_histogram32(&histogram, bytes.len() as u64)
}
pub fn reverse_bits(max_bits: u32, bits: u64) -> u64 {
let mut reversed_bits = 0u64;
for x in 0..max_bits {
if bits & (1 << x) != 0 {
reversed_bits |= 1 << (max_bits - 1 - x);
}
}
reversed_bits
}
pub enum BitReaderContainer<'a> {
Msb(BitReader<Cursor<&'a [u8]>, BigEndian>),
Lsb(BitReader<Cursor<&'a [u8]>, LittleEndian>),
}
impl BitReaderContainer<'_> {
pub fn read(&mut self, bits: u32) -> io::Result<u64> {
match self {
BitReaderContainer::Msb(reader) => reader.read(bits),
BitReaderContainer::Lsb(reader) => reader.read(bits),
}
}
pub fn seek_bits(&mut self, seekfrom: SeekFrom) -> io::Result<u64> {
match self {
BitReaderContainer::Msb(reader) => reader.seek_bits(seekfrom),
BitReaderContainer::Lsb(reader) => reader.seek_bits(seekfrom),
}
}
}
pub fn create_bit_reader(data: &[u8], bit_order: BitOrder) -> BitReaderContainer<'_> {
match bit_order {
BitOrder::Default | BitOrder::Msb => {
BitReaderContainer::Msb(BitReader::endian(Cursor::new(data), BigEndian))
}
BitOrder::Lsb => {
BitReaderContainer::Lsb(BitReader::endian(Cursor::new(data), LittleEndian))
}
}
}
pub enum BitWriterContainer {
Msb(BitWriter<Cursor<Vec<u8>>, BigEndian>),
Lsb(BitWriter<Cursor<Vec<u8>>, LittleEndian>),
}
pub fn create_bit_writer(bit_order: BitOrder) -> BitWriterContainer {
match bit_order {
BitOrder::Default | BitOrder::Msb => {
BitWriterContainer::Msb(BitWriter::endian(Cursor::new(Vec::new()), BigEndian))
}
BitOrder::Lsb => {
BitWriterContainer::Lsb(BitWriter::endian(Cursor::new(Vec::new()), LittleEndian))
}
}
}
pub fn create_bit_writer_with_owned_data(data: &[u8], bit_order: BitOrder) -> BitWriterContainer {
match bit_order {
BitOrder::Default | BitOrder::Msb => {
let mut cursor = Cursor::new(data.to_vec());
cursor.set_position(data.len() as u64);
BitWriterContainer::Msb(BitWriter::endian(cursor, BigEndian))
}
BitOrder::Lsb => {
let mut cursor = Cursor::new(data.to_vec());
cursor.set_position(data.len() as u64);
BitWriterContainer::Lsb(BitWriter::endian(cursor, LittleEndian))
}
}
}
pub fn get_writer_buffer(writer: &mut BitWriterContainer) -> &[u8] {
match writer {
BitWriterContainer::Msb(writer) => {
writer.byte_align().unwrap();
writer.writer().unwrap().get_ref()
}
BitWriterContainer::Lsb(writer) => {
writer.byte_align().unwrap();
writer.writer().unwrap().get_ref()
}
}
}
pub fn bit_writer_to_reader(writer: &mut BitWriterContainer) -> BitReaderContainer {
match writer {
BitWriterContainer::Msb(writer) => {
writer.byte_align().unwrap();
let array = writer.writer().unwrap().get_ref();
BitReaderContainer::Msb(BitReader::endian(Cursor::new(array), BigEndian))
}
BitWriterContainer::Lsb(writer) => {
writer.byte_align().unwrap();
let array = writer.writer().unwrap().get_ref();
BitReaderContainer::Lsb(BitReader::endian(Cursor::new(array), LittleEndian))
}
}
}
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn zstd_compression_estimate() {
let data = b"This is a test string that should compress well with zstandard zstandard zstandard zstandard zstandard zstandard";
let compressed_size = get_zstd_compressed_size(data, 16);
assert!(compressed_size < data.len() as u64);
}
}