ben/encode/translate.rs
1//! This module contains the main functions that are used for translating
2//! between the ben32 and BEN formats. The ben32 format is a simple run-length
3//! encoding of an assignment vector done at the byte level and for which every
4//! 32 bits of data encodes a one (assignment, count) pair. The BEN format is
5//! a bit-packed version of the ben32 format along with some extra headers.
6use byteorder::{BigEndian, ReadBytesExt};
7use std::io::{self, Error, Read, Write};
8
9use super::{log, logln, BenVariant};
10use crate::decode::decode_ben_line;
11use crate::encode::encode_ben_vec_from_rle;
12
13/// This function takes a ben32 encoded assignment vector and
14/// transforms into a ben encoded assignment vector.
15///
16/// # Arguments
17///
18/// * `ben32_vec` - A vector of bytes containing the ben32 encoded assignment vector
19///
20/// # Returns
21///
22/// A vector of bytes containing the ben encoded assignment vector
23///
24/// # Errors
25///
26/// This function will return an error if the input ben32 vector is not a multiple of 4
27/// bytes long or if the end of line separator (4 bytes of 0) is missing. All
28/// assignment vectors are expected to be a multiple of 4 bytes long since each
29/// assignment vector is an run-length encoded as a 32 bit integer (2 bytes for
30/// the value and 2 bytes for the count). The end of line separator is also the
31/// only way that the ben32 format has to separate assignment vectors.
32fn ben32_to_ben_line(ben32_vec: Vec<u8>) -> io::Result<Vec<u8>> {
33 let mut buffer = [0u8; 4];
34 let mut ben32_rle: Vec<(u16, u16)> = Vec::new();
35
36 let mut reader = ben32_vec.as_slice();
37
38 if ben32_vec.len() % 4 != 0 {
39 return Err(Error::new(
40 io::ErrorKind::InvalidData,
41 "Invalid ben32 data length",
42 ));
43 }
44
45 for _ in 0..((ben32_vec.len() / 4) - 1) {
46 reader.read_exact(&mut buffer)?;
47 let encoded = u32::from_be_bytes(buffer);
48
49 let value = (encoded >> 16) as u16; // High 16 bits
50 let count = (encoded & 0xFFFF) as u16; // Low 16 bits
51
52 ben32_rle.push((value, count));
53 }
54
55 // read the last 4 bytes which should be 0 since they are a separator
56 reader.read_exact(&mut buffer)?;
57 if buffer != [0u8; 4] {
58 return Err(Error::new(
59 io::ErrorKind::InvalidData,
60 "Invalid ben32 data format. Missing end of line separator.",
61 ));
62 }
63
64 Ok(encode_ben_vec_from_rle(ben32_rle))
65}
66
67/// This function takes a reader that contains a several ben32 encoded assignment
68/// vectors and encodes them into ben encoded assignment vectors and writes them
69/// to the designated writer.
70///
71/// # Arguments
72///
73/// * `reader` - A reader that contains ben32 encoded assignment vectors
74/// * `writer` - A writer that will contain the ben encoded assignment vectors
75///
76/// # Returns
77///
78/// An io::Result containing the result of the operation
79///
80/// # Errors
81///
82/// This function will return an error if the input reader contains invalid ben32
83/// data or if the writer encounters an error while writing the ben data.
84pub fn ben32_to_ben_lines<R: Read, W: Write>(
85 mut reader: R,
86 mut writer: W,
87 variant: BenVariant,
88) -> io::Result<()> {
89 'outer: loop {
90 let mut ben32_vec: Vec<u8> = Vec::new();
91 let mut ben32_read_buff: [u8; 4] = [0u8; 4];
92
93 let mut n_reps = 0;
94
95 // extract the ben32 data
96 'inner: loop {
97 match reader.read_exact(&mut ben32_read_buff) {
98 Ok(()) => {
99 ben32_vec.extend(ben32_read_buff);
100 if ben32_read_buff == [0u8; 4] {
101 if variant == BenVariant::MkvChain {
102 n_reps = reader.read_u16::<BigEndian>()?;
103 }
104 break 'inner;
105 }
106 }
107 Err(e) => {
108 if e.kind() == io::ErrorKind::UnexpectedEof {
109 break 'outer;
110 }
111 return Err(e);
112 }
113 }
114 }
115
116 let ben_vec = ben32_to_ben_line(ben32_vec)?;
117 writer.write_all(&ben_vec)?;
118 if variant == BenVariant::MkvChain {
119 writer.write_all(&n_reps.to_be_bytes())?;
120 }
121 }
122
123 Ok(())
124}
125
126/// This function takes a ben encoded assignment vector and transforms it into
127/// a ben32 encoded assignment vector.
128///
129/// # Arguments
130///
131/// * `reader` - A reader that contains ben encoded assignment vectors
132/// * `max_val_bits` - The maximum number of bits that the value of an assignment can have
133/// * `max_len_bits` - The maximum number of bits that the length of an assignment can have
134///
135/// # Returns
136///
137/// A vector of bytes containing the ben32 encoded assignment vector
138fn ben_to_ben32_line<R: Read>(
139 reader: R,
140 max_val_bits: u8,
141 max_len_bits: u8,
142 n_bytes: u32,
143) -> io::Result<Vec<u8>> {
144 let ben_rle: Vec<(u16, u16)> = decode_ben_line(reader, max_val_bits, max_len_bits, n_bytes)?;
145
146 let mut ben32_vec: Vec<u8> = Vec::new();
147
148 for (value, count) in ben_rle.into_iter() {
149 let encoded = ((value as u32) << 16) | (count as u32);
150 ben32_vec.extend(&encoded.to_be_bytes());
151 }
152
153 ben32_vec.extend(&[0u8; 4]);
154
155 Ok(ben32_vec)
156}
157
158/// This function takes a reader that contains a several ben encoded assignment
159/// vectors and encodes them into ben32 encoded assignment vectors and writes them
160/// to the designated writer.
161///
162/// # Arguments
163///
164/// * `reader` - A reader that contains ben encoded assignment vectors
165/// * `writer` - A writer that will contain the ben32 encoded assignment vectors
166///
167/// # Returns
168///
169/// An io::Result containing the result of the operation
170///
171/// # Errors
172///
173/// This function will return an error if the input reader contains invalid ben
174/// data or if the writer encounters an error while writing the ben32 data.
175pub fn ben_to_ben32_lines<R: Read, W: Write>(
176 mut reader: R,
177 mut writer: W,
178 variant: BenVariant,
179) -> io::Result<()> {
180 let mut sample_number = 1;
181 'outer: loop {
182 let mut tmp_buffer = [0u8];
183 let max_val_bits = match reader.read_exact(&mut tmp_buffer) {
184 Ok(()) => tmp_buffer[0],
185 Err(e) => {
186 if e.kind() == io::ErrorKind::UnexpectedEof {
187 break 'outer;
188 }
189 return Err(e);
190 }
191 };
192
193 let max_len_bits = reader.read_u8()?;
194 let n_bytes = reader.read_u32::<BigEndian>()?;
195
196 log!("Encoding line: {}\r", sample_number);
197
198 match variant {
199 BenVariant::Standard => {
200 sample_number += 1;
201 let ben32_vec =
202 ben_to_ben32_line(&mut reader, max_val_bits, max_len_bits, n_bytes)?;
203 writer.write_all(&ben32_vec)?;
204 }
205 BenVariant::MkvChain => {
206 let ben32_vec =
207 ben_to_ben32_line(&mut reader, max_val_bits, max_len_bits, n_bytes)?;
208
209 // Read the number of repetitions AFTER the ben32 data
210 let n_reps = reader.read_u16::<BigEndian>()?;
211 sample_number += n_reps as usize;
212 writer.write_all(&ben32_vec)?;
213 writer.write_all(&n_reps.to_be_bytes())?;
214 }
215 }
216 }
217
218 logln!();
219 logln!("Done!");
220 Ok(())
221}
222
223#[cfg(test)]
224#[path = "tests/translate_tests.rs"]
225mod tests;