1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
//! This module contains the main functions that are used for translating
//! between the ben32 and BEN formats. The ben32 format is a simple run-length
//! encoding of an assignment vector done at the byte level and for which every
//! 32 bits of data encodes a one (assignment, count) pair. The BEN format is
//! a bit-packed version of the ben32 format along with some extra headers.
use byteorder::{BigEndian, ReadBytesExt};
use std::io::{self, Error, Read, Write};

use super::{log, logln};
use crate::decode::decode_ben_line;
use crate::encode::encode_ben_vec_from_rle;

/// This function takes a ben32 encoded assignment vector and
/// transforms into a ben encoded assignment vector.
///
/// # Arguments
///
/// * `ben32_vec` - A vector of bytes containing the ben32 encoded assignment vector
///
/// # Returns
///
/// A vector of bytes containing the ben encoded assignment vector
///
/// # Errors
///
/// This function will return an error if the input ben32 vector is not a multiple of 4
/// bytes long or if the end of line separator (4 bytes of 0) is missing. All
/// assignment vectors are expected to be a multiple of 4 bytes long since each
/// assignment vector is an run-length encoded as a 32 bit integer (2 bytes for
/// the value and 2 bytes for the count). The end of line separator is also the
/// only way that the ben32 format has to separate assignment vectors.
fn ben32_to_ben_line(ben32_vec: Vec<u8>) -> io::Result<Vec<u8>> {
    let mut buffer = [0u8; 4];
    let mut ben32_rle: Vec<(u16, u16)> = Vec::new();

    let mut reader = ben32_vec.as_slice();

    if ben32_vec.len() % 4 != 0 {
        return Err(Error::new(
            io::ErrorKind::InvalidData,
            "Invalid ben32 data length",
        ));
    }

    for _ in 0..((ben32_vec.len() / 4) - 1) {
        reader.read_exact(&mut buffer)?;
        let encoded = u32::from_be_bytes(buffer);

        let value = (encoded >> 16) as u16; // High 16 bits
        let count = (encoded & 0xFFFF) as u16; // Low 16 bits

        ben32_rle.push((value, count));
    }

    // read the last 4 bytes which should be 0 since they are a separator
    reader.read_exact(&mut buffer)?;
    if buffer != [0u8; 4] {
        return Err(Error::new(
            io::ErrorKind::InvalidData,
            "Invalid ben32 data format. Missing end of line separator.",
        ));
    }

    Ok(encode_ben_vec_from_rle(ben32_rle))
}

/// This function takes a reader that contains a several ben32 encoded assignment
/// vectors and encodes them into ben encoded assignment vectors and writes them
/// to the designated writer.
///
/// # Arguments
///
/// * `reader` - A reader that contains ben32 encoded assignment vectors
/// * `writer` - A writer that will contain the ben encoded assignment vectors
///
/// # Returns
///
/// An io::Result containing the result of the operation
///
/// # Errors
///
/// This function will return an error if the input reader contains invalid ben32
/// data or if the writer encounters an error while writing the ben data.
pub fn ben32_to_ben_lines<R: Read, W: Write>(mut reader: R, mut writer: W) -> io::Result<()> {
    'outer: loop {
        let mut ben32_vec: Vec<u8> = Vec::new();
        let mut ben32_read_buff: [u8; 4] = [0u8; 4];

        // extract the ben32 data
        'inner: loop {
            match reader.read_exact(&mut ben32_read_buff) {
                Ok(()) => {
                    ben32_vec.extend(ben32_read_buff);
                    if ben32_read_buff == [0u8; 4] {
                        break 'inner;
                    }
                }
                Err(e) => {
                    if e.kind() == io::ErrorKind::UnexpectedEof {
                        break 'outer;
                    }
                    return Err(e);
                }
            }
        }

        let ben_vec = ben32_to_ben_line(ben32_vec)?;
        writer.write_all(&ben_vec)?;
    }

    Ok(())
}

/// This function takes a ben encoded assignment vector and transforms it into
/// a ben32 encoded assignment vector.
///
/// # Arguments
///
/// * `reader` - A reader that contains ben encoded assignment vectors
/// * `max_val_bits` - The maximum number of bits that the value of an assignment can have
/// * `max_len_bits` - The maximum number of bits that the length of an assignment can have
///
/// # Returns
///
/// A vector of bytes containing the ben32 encoded assignment vector
fn ben_to_ben32_line<R: Read>(
    reader: R,
    max_val_bits: u8,
    max_len_bits: u8,
    n_bytes: u32,
) -> io::Result<Vec<u8>> {
    let ben_rle: Vec<(u16, u16)> = decode_ben_line(reader, max_val_bits, max_len_bits, n_bytes)?;

    let mut ben32_vec: Vec<u8> = Vec::new();

    for (value, count) in ben_rle.into_iter() {
        let encoded = ((value as u32) << 16) | (count as u32);
        ben32_vec.extend(&encoded.to_be_bytes());
    }

    ben32_vec.extend(&[0u8; 4]);

    Ok(ben32_vec)
}

/// This function takes a reader that contains a several ben encoded assignment
/// vectors and encodes them into ben32 encoded assignment vectors and writes them
/// to the designated writer.
///
/// # Arguments
///
/// * `reader` - A reader that contains ben encoded assignment vectors
/// * `writer` - A writer that will contain the ben32 encoded assignment vectors
///
/// # Returns
///
/// An io::Result containing the result of the operation
///
/// # Errors
///
/// This function will return an error if the input reader contains invalid ben
/// data or if the writer encounters an error while writing the ben32 data.
pub fn ben_to_ben32_lines<R: Read, W: Write>(mut reader: R, mut writer: W) -> io::Result<()> {
    let mut sample_number = 1;
    'outer: loop {
        let mut tmp_buffer = [0u8];
        let max_val_bits = match reader.read_exact(&mut tmp_buffer) {
            Ok(()) => tmp_buffer[0],
            Err(e) => {
                if e.kind() == io::ErrorKind::UnexpectedEof {
                    break 'outer;
                }
                return Err(e);
            }
        };

        let max_len_bits = reader.read_u8()?;
        let n_bytes = reader.read_u32::<BigEndian>()?;

        log!("Encoding line: {}\r", sample_number);
        sample_number += 1;
        let ben32_vec = ben_to_ben32_line(&mut reader, max_val_bits, max_len_bits, n_bytes)?;

        writer.write_all(&ben32_vec)?;
    }

    logln!();
    logln!("Done!");
    Ok(())
}

#[cfg(test)]
mod tests {
    include!("tests/translate_tests.rs");
}