use std::cmp::{min, Ordering};
use std::io::{self, Error, Write};
#[derive(Debug)]
struct PaddingMap {
data_bits: usize,
element_bits: usize,
}
const FR32_PADDING_MAP: PaddingMap = PaddingMap {
data_bits: 254,
element_bits: 256,
};
pub fn to_unpadded_bytes(padded_bytes: u64) -> u64 {
FR32_PADDING_MAP.transform_byte_offset(padded_bytes as usize, false) as u64
}
pub fn to_padded_bytes(unpadded_bytes: usize) -> usize {
FR32_PADDING_MAP.transform_byte_offset(unpadded_bytes, true)
}
#[derive(Debug)]
struct BitByte {
bytes: usize,
bits: usize,
}
impl BitByte {
fn from_bits(bits: usize) -> BitByte {
BitByte {
bytes: bits / 8,
bits: bits % 8,
}
}
fn total_bits(&self) -> usize {
self.bytes * 8 + self.bits
}
fn bytes_needed(&self) -> usize {
self.bytes + if self.bits == 0 { 0 } else { 1 }
}
}
impl PaddingMap {
fn pad_bits(&self) -> usize {
self.element_bits - self.data_bits
}
fn transform_bit_offset(&self, pos: usize, padding: bool) -> usize {
let (from_size, to_size) = if padding {
(self.data_bits, self.element_bits)
} else {
(self.element_bits, self.data_bits)
};
let (full_elements, incomplete_data) = div_rem(pos, from_size);
(full_elements * to_size) + incomplete_data
}
fn transform_byte_offset(&self, pos: usize, padding: bool) -> usize {
let transformed_bit_pos = self.transform_bit_offset(pos * 8, padding);
let transformed_byte_pos = transformed_bit_pos as f64 / 8.;
(if padding {
transformed_byte_pos.ceil()
} else {
transformed_byte_pos.floor()
}) as usize
}
fn next_boundary(&self, position: &BitByte) -> (usize, usize) {
let position_bits = position.total_bits();
let (_, bits_after_last_boundary) = div_rem(position_bits, self.element_bits);
let remaining_data_unit_bits = self.data_bits - bits_after_last_boundary;
let next_element_position_bits = position_bits + remaining_data_unit_bits + self.pad_bits();
(next_element_position_bits / 8, remaining_data_unit_bits)
}
}
#[inline]
fn div_rem(a: usize, b: usize) -> (usize, usize) {
let div = a / b;
let rem = a % b;
(div, rem)
}
fn shift_bits(input: &[u8], amount: usize, is_left: bool) -> Vec<u8> {
debug_assert!(amount >= 1);
debug_assert!(amount <= 7);
let mut output = Vec::with_capacity(input.len() + if is_left { 1 } else { 0 });
output.extend_from_slice(input);
if is_left {
output.push(0);
}
for output_byte in output.iter_mut().take(input.len()) {
if is_left {
*output_byte <<= amount;
} else {
*output_byte >>= amount;
}
}
if is_left {
for i in 0..input.len() {
let overflow = input[i] >> (8 - amount);
output[i + 1] |= overflow;
}
} else {
for i in 1..input.len() {
let overflow = input[i] << (8 - amount);
output[i - 1] |= overflow;
}
}
output
}
#[inline]
fn extract_bits_and_shift(input: &[u8], pos: usize, num_bits: usize, new_offset: usize) -> Vec<u8> {
debug_assert!(input.len() * 8 >= pos + num_bits);
debug_assert!(new_offset <= 7);
let (skip_bytes, extraction_offset) = div_rem(pos, 8);
let input = &input[skip_bytes..];
let input = &input[..BitByte::from_bits(extraction_offset + num_bits).bytes_needed()];
let mut output = match new_offset.cmp(&extraction_offset) {
Ordering::Less => {
shift_bits(input, extraction_offset - new_offset, false)
}
Ordering::Greater => {
shift_bits(input, new_offset - extraction_offset, true)
}
Ordering::Equal => {
input.to_vec()
}
};
if output.len() > BitByte::from_bits(new_offset + num_bits).bytes_needed() {
output.pop();
}
if new_offset != 0 {
clear_right_bits(output.first_mut().expect("output is empty"), new_offset);
}
let end_offset = (new_offset + num_bits) % 8;
if end_offset != 0 {
clear_left_bits(output.last_mut().expect("output is empty"), end_offset);
}
output
}
#[inline]
fn clear_left_bits(byte: &mut u8, offset: usize) {
*(byte) &= (1 << offset) - 1
}
#[inline]
fn clear_right_bits(byte: &mut u8, offset: usize) {
*(byte) &= !((1 << offset) - 1)
}
pub fn write_unpadded<W>(
source: &[u8],
target: &mut W,
offset: usize,
len: usize,
) -> io::Result<usize>
where
W: Write + ?Sized,
{
let read_pos = BitByte::from_bits(FR32_PADDING_MAP.transform_bit_offset(offset * 8, true));
let raw_data_size = BitByte::from_bits(
FR32_PADDING_MAP.transform_bit_offset(source.len() * 8 - read_pos.total_bits(), false),
)
.bytes_needed();
if raw_data_size < len {
return Err(Error::other(format!(
"requested extraction of {} raw data bytes when there's at most {} in the source",
len, raw_data_size
)));
}
let n = 1000;
let chunk_size = 128 * n;
let mut written = 0;
let mut offset = offset;
let mut len = len;
for chunk in source.chunks(chunk_size) {
let write_len = min(len, chunk.len());
written += write_unpadded_aux(&FR32_PADDING_MAP, source, target, offset, write_len)?;
offset += write_len;
len -= write_len;
}
Ok(written)
}
fn write_unpadded_aux<W>(
padding_map: &PaddingMap,
source: &[u8],
target: &mut W,
write_pos: usize,
max_write_size: usize,
) -> io::Result<usize>
where
W: Write + ?Sized,
{
let mut read_pos = BitByte::from_bits(padding_map.transform_bit_offset(write_pos * 8, true));
let max_write_size_bits = max_write_size * 8;
let mut raw_data_size = BitByte::from_bits(
padding_map.transform_bit_offset(source.len() * 8 - read_pos.total_bits(), false),
)
.bytes_needed();
raw_data_size = min(raw_data_size, max_write_size);
let mut raw_data: Vec<u8> = Vec::with_capacity(raw_data_size);
let mut written_bits = 0;
let mut write_bit_offset = 0;
while read_pos.bytes < source.len() && written_bits < max_write_size_bits {
let (next_element_position, mut bits_to_extract) = padding_map.next_boundary(&read_pos);
bits_to_extract = min(bits_to_extract, source.len() * 8 - read_pos.total_bits());
let bits_left_to_write = max_write_size_bits - written_bits;
bits_to_extract = min(bits_to_extract, bits_left_to_write);
let mut recovered = extract_bits_and_shift(
source,
read_pos.total_bits(),
bits_to_extract,
write_bit_offset,
);
if write_bit_offset != 0 {
*(raw_data.last_mut().expect("raw_data is empty")) |=
*(recovered.first().expect("recovered is empty"));
raw_data.append(&mut recovered[1..].to_vec());
} else {
raw_data.append(&mut recovered);
}
written_bits += bits_to_extract;
write_bit_offset = written_bits % 8;
read_pos = BitByte {
bytes: next_element_position,
bits: 0,
};
}
debug_assert!(raw_data_size - raw_data.len() <= 1);
debug_assert!(raw_data_size >= raw_data.len());
target.write_all(&raw_data)?;
Ok(raw_data.len())
}
#[cfg(test)]
mod tests {
use super::*;
use std::io::{Cursor, Read};
use bitvec::{order::Lsb0 as LittleEndian, vec::BitVec};
use itertools::Itertools;
use rand::{Rng, SeedableRng};
use rand_xorshift::XorShiftRng;
use crate::Fr32Reader;
const TEST_SEED: [u8; 16] = [
0x59, 0x62, 0xbe, 0x5d, 0x76, 0x3d, 0x31, 0x8d, 0x17, 0xdb, 0x37, 0x32, 0x54, 0x06, 0xbc,
0xe5,
];
#[test]
fn test_position() {
let mut bits = 0;
for i in 0..10 {
for j in 0..8 {
let position = BitByte { bytes: i, bits: j };
assert_eq!(position.total_bits(), bits);
bits += 1;
}
}
}
#[test]
fn test_random_bit_extraction() {
let len = 20;
let rng = &mut XorShiftRng::from_seed(TEST_SEED);
let data: Vec<u8> = (0..len).map(|_| rng.gen()).collect();
for _ in 0..100 {
let pos = rng.gen_range(0..data.len() / 2);
let num_bits = rng.gen_range(1..data.len() * 8 - pos);
let new_offset = rng.gen_range(0..8);
let mut bv = BitVec::<LittleEndian, u8>::new();
bv.extend(
BitVec::<LittleEndian, u8>::from(&data[..])
.into_iter()
.skip(pos)
.take(num_bits),
);
let shifted_bv: BitVec<LittleEndian, u8> = bv >> new_offset;
assert_eq!(
shifted_bv.as_slice(),
&extract_bits_and_shift(&data, pos, num_bits, new_offset)[..],
);
}
}
#[test]
fn test_bit_shifts() {
let len = 5;
let rng = &mut XorShiftRng::from_seed(TEST_SEED);
for amount in 1..8 {
for left in [true, false].iter() {
let data: Vec<u8> = (0..len).map(|_| rng.gen()).collect();
let shifted_bits = shift_bits(&data, amount, *left);
let mut bv: BitVec<LittleEndian, u8> = data.into();
if *left {
bv >>= amount;
} else {
bv <<= amount;
}
assert_eq!(bv.as_slice(), shifted_bits.as_slice());
}
}
}
fn bit_vec_padding(raw_data: Vec<u8>) -> Box<[u8]> {
let mut padded_data: BitVec<LittleEndian, u8> = BitVec::new();
let raw_data: BitVec<LittleEndian, u8> = BitVec::from(raw_data);
for data_unit in raw_data
.into_iter()
.chunks(FR32_PADDING_MAP.data_bits)
.into_iter()
{
padded_data.extend(data_unit);
if !padded_data.len().is_multiple_of(8) {
for _ in 0..FR32_PADDING_MAP.pad_bits() {
padded_data.push(false);
}
}
}
padded_data.into_boxed_slice()
}
#[test]
fn test_read_write_padded() {
let len = 1016; let data = vec![255u8; len];
let mut padded = Vec::new();
let mut reader = Fr32Reader::new(Cursor::new(&data));
reader
.read_to_end(&mut padded)
.expect("in-memory read failed");
assert_eq!(
padded.len(),
FR32_PADDING_MAP.transform_byte_offset(len, true)
);
let mut unpadded = Vec::new();
let unpadded_written =
write_unpadded(&padded, &mut unpadded, 0, len).expect("un-padded write failed");
assert_eq!(unpadded_written, len);
assert_eq!(data, unpadded);
assert_eq!(padded.into_boxed_slice(), bit_vec_padding(data));
}
#[test]
fn test_read_write_padded_offset() {
let rng = &mut XorShiftRng::from_seed(TEST_SEED);
let len = 1016;
let data: Vec<u8> = (0..len).map(|_| rng.gen()).collect();
let mut padded = Vec::new();
let mut reader = Fr32Reader::new(Cursor::new(&data));
reader
.read_to_end(&mut padded)
.expect("in-memory read failed");
{
let mut unpadded = Vec::new();
write_unpadded(&padded, &mut unpadded, 0, 1016).expect("un-padded write failed: 1016");
let expected = &data[0..1016];
assert_eq!(expected.len(), unpadded.len());
assert_eq!(expected, &unpadded[..]);
}
{
let mut unpadded = Vec::new();
write_unpadded(&padded, &mut unpadded, 0, 44).expect("un-padded write failed: 44");
let expected = &data[0..44];
assert_eq!(expected.len(), unpadded.len());
assert_eq!(expected, &unpadded[..]);
}
let excessive_len = 35;
for start in (1016 - excessive_len + 2)..1016 {
assert!(write_unpadded(&padded, &mut Vec::new(), start, excessive_len).is_err());
}
}
}