1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195
//! This module contains the main functions that are used for translating
//! between the ben32 and BEN formats. The ben32 format is a simple run-length
//! encoding of an assignment vector done at the byte level and for which every
//! 32 bits of data encodes a one (assignment, count) pair. The BEN format is
//! a bit-packed version of the ben32 format along with some extra headers.
use byteorder::{BigEndian, ReadBytesExt};
use std::io::{self, Error, Read, Write};
use super::{log, logln};
use crate::decode::decode_ben_line;
use crate::encode::encode_ben_vec_from_rle;
/// This function takes a ben32 encoded assignment vector and
/// transforms into a ben encoded assignment vector.
///
/// # Arguments
///
/// * `ben32_vec` - A vector of bytes containing the ben32 encoded assignment vector
///
/// # Returns
///
/// A vector of bytes containing the ben encoded assignment vector
///
/// # Errors
///
/// This function will return an error if the input ben32 vector is not a multiple of 4
/// bytes long or if the end of line separator (4 bytes of 0) is missing. All
/// assignment vectors are expected to be a multiple of 4 bytes long since each
/// assignment vector is an run-length encoded as a 32 bit integer (2 bytes for
/// the value and 2 bytes for the count). The end of line separator is also the
/// only way that the ben32 format has to separate assignment vectors.
fn ben32_to_ben_line(ben32_vec: Vec<u8>) -> io::Result<Vec<u8>> {
let mut buffer = [0u8; 4];
let mut ben32_rle: Vec<(u16, u16)> = Vec::new();
let mut reader = ben32_vec.as_slice();
if ben32_vec.len() % 4 != 0 {
return Err(Error::new(
io::ErrorKind::InvalidData,
"Invalid ben32 data length",
));
}
for _ in 0..((ben32_vec.len() / 4) - 1) {
reader.read_exact(&mut buffer)?;
let encoded = u32::from_be_bytes(buffer);
let value = (encoded >> 16) as u16; // High 16 bits
let count = (encoded & 0xFFFF) as u16; // Low 16 bits
ben32_rle.push((value, count));
}
// read the last 4 bytes which should be 0 since they are a separator
reader.read_exact(&mut buffer)?;
if buffer != [0u8; 4] {
return Err(Error::new(
io::ErrorKind::InvalidData,
"Invalid ben32 data format. Missing end of line separator.",
));
}
Ok(encode_ben_vec_from_rle(ben32_rle))
}
/// This function takes a reader that contains a several ben32 encoded assignment
/// vectors and encodes them into ben encoded assignment vectors and writes them
/// to the designated writer.
///
/// # Arguments
///
/// * `reader` - A reader that contains ben32 encoded assignment vectors
/// * `writer` - A writer that will contain the ben encoded assignment vectors
///
/// # Returns
///
/// An io::Result containing the result of the operation
///
/// # Errors
///
/// This function will return an error if the input reader contains invalid ben32
/// data or if the writer encounters an error while writing the ben data.
pub fn ben32_to_ben_lines<R: Read, W: Write>(mut reader: R, mut writer: W) -> io::Result<()> {
'outer: loop {
let mut ben32_vec: Vec<u8> = Vec::new();
let mut ben32_read_buff: [u8; 4] = [0u8; 4];
// extract the ben32 data
'inner: loop {
match reader.read_exact(&mut ben32_read_buff) {
Ok(()) => {
ben32_vec.extend(ben32_read_buff);
if ben32_read_buff == [0u8; 4] {
break 'inner;
}
}
Err(e) => {
if e.kind() == io::ErrorKind::UnexpectedEof {
break 'outer;
}
return Err(e);
}
}
}
let ben_vec = ben32_to_ben_line(ben32_vec)?;
writer.write_all(&ben_vec)?;
}
Ok(())
}
/// This function takes a ben encoded assignment vector and transforms it into
/// a ben32 encoded assignment vector.
///
/// # Arguments
///
/// * `reader` - A reader that contains ben encoded assignment vectors
/// * `max_val_bits` - The maximum number of bits that the value of an assignment can have
/// * `max_len_bits` - The maximum number of bits that the length of an assignment can have
///
/// # Returns
///
/// A vector of bytes containing the ben32 encoded assignment vector
fn ben_to_ben32_line<R: Read>(
reader: R,
max_val_bits: u8,
max_len_bits: u8,
n_bytes: u32,
) -> io::Result<Vec<u8>> {
let ben_rle: Vec<(u16, u16)> = decode_ben_line(reader, max_val_bits, max_len_bits, n_bytes)?;
let mut ben32_vec: Vec<u8> = Vec::new();
for (value, count) in ben_rle.into_iter() {
let encoded = ((value as u32) << 16) | (count as u32);
ben32_vec.extend(&encoded.to_be_bytes());
}
ben32_vec.extend(&[0u8; 4]);
Ok(ben32_vec)
}
/// This function takes a reader that contains a several ben encoded assignment
/// vectors and encodes them into ben32 encoded assignment vectors and writes them
/// to the designated writer.
///
/// # Arguments
///
/// * `reader` - A reader that contains ben encoded assignment vectors
/// * `writer` - A writer that will contain the ben32 encoded assignment vectors
///
/// # Returns
///
/// An io::Result containing the result of the operation
///
/// # Errors
///
/// This function will return an error if the input reader contains invalid ben
/// data or if the writer encounters an error while writing the ben32 data.
pub fn ben_to_ben32_lines<R: Read, W: Write>(mut reader: R, mut writer: W) -> io::Result<()> {
let mut sample_number = 1;
'outer: loop {
let mut tmp_buffer = [0u8];
let max_val_bits = match reader.read_exact(&mut tmp_buffer) {
Ok(()) => tmp_buffer[0],
Err(e) => {
if e.kind() == io::ErrorKind::UnexpectedEof {
break 'outer;
}
return Err(e);
}
};
let max_len_bits = reader.read_u8()?;
let n_bytes = reader.read_u32::<BigEndian>()?;
log!("Encoding line: {}\r", sample_number);
sample_number += 1;
let ben32_vec = ben_to_ben32_line(&mut reader, max_val_bits, max_len_bits, n_bytes)?;
writer.write_all(&ben32_vec)?;
}
logln!();
logln!("Done!");
Ok(())
}
#[cfg(test)]
mod tests {
include!("tests/translate_tests.rs");
}