ben/encode/
translate.rs

1//! This module contains the main functions that are used for translating
2//! between the ben32 and BEN formats. The ben32 format is a simple run-length
3//! encoding of an assignment vector done at the byte level and for which every
4//! 32 bits of data encodes a one (assignment, count) pair. The BEN format is
5//! a bit-packed version of the ben32 format along with some extra headers.
6use byteorder::{BigEndian, ReadBytesExt};
7use std::io::{self, Error, Read, Write};
8
9use super::{log, logln, BenVariant};
10use crate::decode::decode_ben_line;
11use crate::encode::encode_ben_vec_from_rle;
12
13/// This function takes a ben32 encoded assignment vector and
14/// transforms into a ben encoded assignment vector.
15///
16/// # Arguments
17///
18/// * `ben32_vec` - A vector of bytes containing the ben32 encoded assignment vector
19///
20/// # Returns
21///
22/// A vector of bytes containing the ben encoded assignment vector
23///
24/// # Errors
25///
26/// This function will return an error if the input ben32 vector is not a multiple of 4
27/// bytes long or if the end of line separator (4 bytes of 0) is missing. All
28/// assignment vectors are expected to be a multiple of 4 bytes long since each
29/// assignment vector is an run-length encoded as a 32 bit integer (2 bytes for
30/// the value and 2 bytes for the count). The end of line separator is also the
31/// only way that the ben32 format has to separate assignment vectors.
32fn ben32_to_ben_line(ben32_vec: Vec<u8>) -> io::Result<Vec<u8>> {
33    let mut buffer = [0u8; 4];
34    let mut ben32_rle: Vec<(u16, u16)> = Vec::new();
35
36    let mut reader = ben32_vec.as_slice();
37
38    if ben32_vec.len() % 4 != 0 {
39        return Err(Error::new(
40            io::ErrorKind::InvalidData,
41            "Invalid ben32 data length",
42        ));
43    }
44
45    for _ in 0..((ben32_vec.len() / 4) - 1) {
46        reader.read_exact(&mut buffer)?;
47        let encoded = u32::from_be_bytes(buffer);
48
49        let value = (encoded >> 16) as u16; // High 16 bits
50        let count = (encoded & 0xFFFF) as u16; // Low 16 bits
51
52        ben32_rle.push((value, count));
53    }
54
55    // read the last 4 bytes which should be 0 since they are a separator
56    reader.read_exact(&mut buffer)?;
57    if buffer != [0u8; 4] {
58        return Err(Error::new(
59            io::ErrorKind::InvalidData,
60            "Invalid ben32 data format. Missing end of line separator.",
61        ));
62    }
63
64    Ok(encode_ben_vec_from_rle(ben32_rle))
65}
66
67/// This function takes a reader that contains a several ben32 encoded assignment
68/// vectors and encodes them into ben encoded assignment vectors and writes them
69/// to the designated writer.
70///
71/// # Arguments
72///
73/// * `reader` - A reader that contains ben32 encoded assignment vectors
74/// * `writer` - A writer that will contain the ben encoded assignment vectors
75///
76/// # Returns
77///
78/// An io::Result containing the result of the operation
79///
80/// # Errors
81///
82/// This function will return an error if the input reader contains invalid ben32
83/// data or if the writer encounters an error while writing the ben data.
84pub fn ben32_to_ben_lines<R: Read, W: Write>(
85    mut reader: R,
86    mut writer: W,
87    variant: BenVariant,
88) -> io::Result<()> {
89    'outer: loop {
90        let mut ben32_vec: Vec<u8> = Vec::new();
91        let mut ben32_read_buff: [u8; 4] = [0u8; 4];
92
93        let mut n_reps = 0;
94
95        // extract the ben32 data
96        'inner: loop {
97            match reader.read_exact(&mut ben32_read_buff) {
98                Ok(()) => {
99                    ben32_vec.extend(ben32_read_buff);
100                    if ben32_read_buff == [0u8; 4] {
101                        if variant == BenVariant::MkvChain {
102                            n_reps = reader.read_u16::<BigEndian>()?;
103                        }
104                        break 'inner;
105                    }
106                }
107                Err(e) => {
108                    if e.kind() == io::ErrorKind::UnexpectedEof {
109                        break 'outer;
110                    }
111                    return Err(e);
112                }
113            }
114        }
115
116        let ben_vec = ben32_to_ben_line(ben32_vec)?;
117        writer.write_all(&ben_vec)?;
118        if variant == BenVariant::MkvChain {
119            writer.write_all(&n_reps.to_be_bytes())?;
120        }
121    }
122
123    Ok(())
124}
125
126/// This function takes a ben encoded assignment vector and transforms it into
127/// a ben32 encoded assignment vector.
128///
129/// # Arguments
130///
131/// * `reader` - A reader that contains ben encoded assignment vectors
132/// * `max_val_bits` - The maximum number of bits that the value of an assignment can have
133/// * `max_len_bits` - The maximum number of bits that the length of an assignment can have
134///
135/// # Returns
136///
137/// A vector of bytes containing the ben32 encoded assignment vector
138fn ben_to_ben32_line<R: Read>(
139    reader: R,
140    max_val_bits: u8,
141    max_len_bits: u8,
142    n_bytes: u32,
143) -> io::Result<Vec<u8>> {
144    let ben_rle: Vec<(u16, u16)> = decode_ben_line(reader, max_val_bits, max_len_bits, n_bytes)?;
145
146    let mut ben32_vec: Vec<u8> = Vec::new();
147
148    for (value, count) in ben_rle.into_iter() {
149        let encoded = ((value as u32) << 16) | (count as u32);
150        ben32_vec.extend(&encoded.to_be_bytes());
151    }
152
153    ben32_vec.extend(&[0u8; 4]);
154
155    Ok(ben32_vec)
156}
157
158/// This function takes a reader that contains a several ben encoded assignment
159/// vectors and encodes them into ben32 encoded assignment vectors and writes them
160/// to the designated writer.
161///
162/// # Arguments
163///
164/// * `reader` - A reader that contains ben encoded assignment vectors
165/// * `writer` - A writer that will contain the ben32 encoded assignment vectors
166///
167/// # Returns
168///
169/// An io::Result containing the result of the operation
170///
171/// # Errors
172///
173/// This function will return an error if the input reader contains invalid ben
174/// data or if the writer encounters an error while writing the ben32 data.
175pub fn ben_to_ben32_lines<R: Read, W: Write>(
176    mut reader: R,
177    mut writer: W,
178    variant: BenVariant,
179) -> io::Result<()> {
180    let mut sample_number = 1;
181    'outer: loop {
182        let mut tmp_buffer = [0u8];
183        let max_val_bits = match reader.read_exact(&mut tmp_buffer) {
184            Ok(()) => tmp_buffer[0],
185            Err(e) => {
186                if e.kind() == io::ErrorKind::UnexpectedEof {
187                    break 'outer;
188                }
189                return Err(e);
190            }
191        };
192
193        let max_len_bits = reader.read_u8()?;
194        let n_bytes = reader.read_u32::<BigEndian>()?;
195
196        log!("Encoding line: {}\r", sample_number);
197
198        match variant {
199            BenVariant::Standard => {
200                sample_number += 1;
201                let ben32_vec =
202                    ben_to_ben32_line(&mut reader, max_val_bits, max_len_bits, n_bytes)?;
203                writer.write_all(&ben32_vec)?;
204            }
205            BenVariant::MkvChain => {
206                let ben32_vec =
207                    ben_to_ben32_line(&mut reader, max_val_bits, max_len_bits, n_bytes)?;
208
209                // Read the number of repetitions AFTER the ben32 data
210                let n_reps = reader.read_u16::<BigEndian>()?;
211                sample_number += n_reps as usize;
212                writer.write_all(&ben32_vec)?;
213                writer.write_all(&n_reps.to_be_bytes())?;
214            }
215        }
216    }
217
218    logln!();
219    logln!("Done!");
220    Ok(())
221}
222
223#[cfg(test)]
224#[path = "tests/translate_tests.rs"]
225mod tests;