binarytext 0.1.2

Binary-to-text encoders / decoders
Documentation
//! This module provides helper functions for encoding / decoding a buffered input stream to a
//! specific destination like stdout or a file.
//! Any source or destination is possible as long as the traits Read or Write are implemented.
//! It is ensured that the input stream is aligned to a multiple of the number of bytes the encoder
//! / decoder expects. Furthermore special characters like newline or carriage return are skipped when
//! decoding.

use crate::binarytext::BinaryText;
use crate::error::BinTxtError;
use std::fs::File;
use std::io::{BufReader, BufWriter, Read, Stdout, Write, stdout};

/// Decide whether to decode or encode.
#[derive(Copy, Clone, Debug, PartialEq)]
pub enum Mode {
    Decode,
    Encode,
}

/// Helper function for opening files using BufReader.
pub fn buf_reader_file(filename: &str) -> Result<BufReader<File>, BinTxtError> {
    let f = match File::open(filename) {
        Ok(file) => file,
        Err(_) => {
            let msg = format!("Error opening file {} for reading", filename);
            return Err(BinTxtError::IOError(msg));
        }
    };
    Ok(BufReader::new(f))
}

/// Helper function for reading from a slice of bytes using BufReader.
pub fn buf_reader_bytes(bytes: &[u8]) -> Result<BufReader<&[u8]>, BinTxtError> {
    Ok(BufReader::new(bytes))
}

/// Helper function for writing to stdout using BufWriter.
pub fn buf_writer_stdout() -> Result<BufWriter<Stdout>, BinTxtError> {
    Ok(BufWriter::new(stdout()))
}

/// Helper function for opening files using BufWriter.
pub fn buf_writer_file(filename: &str) -> Result<BufWriter<File>, BinTxtError> {
    let f = match File::create(filename) {
        Ok(file) => file,
        Err(_) => {
            let msg = format!("Error opening file {} for writing", filename);
            return Err(BinTxtError::IOError(msg));
        }
    };
    Ok(BufWriter::new(f))
}

/// Helper function for writing to a string using BufWriter.
pub fn buf_writer_bytes() -> Result<BufWriter<Vec<u8>>, BinTxtError> {
    Ok(BufWriter::new(Vec::<u8>::new()))
}

/// A buffered reader and writer streaming encoded or decoded text from source to destination.
/// For correctly encoding or decoding a stream of bytes, the buffer needs to be aligned to a
/// specific number of bytes. For example using Base64 the buffer has to be a multiple
/// of 3 bytes for encoding and a multiple of 4 bytes for decoding.
pub struct BinTxtStream<'a, R: Read, W: Write, B: BinaryText> {
    buf_reader: BufReader<R>,
    buf_writer: BufWriter<W>,
    bintxt: &'a B,
    mode: Mode,
    buf_len: usize,
    width: usize,
}

impl<'a, R: Read, W: Write, B: BinaryText> BinTxtStream<'a, R, W, B> {
    /// Returns a new instance of Stream for binary text encoding / decoding.
    pub fn new(
        buf_reader: BufReader<R>,
        buf_writer: BufWriter<W>,
        bintxt: &'a B,
        mode: Mode,
        buf_len: usize,
        width: usize,
    ) -> Self {
        Self {
            buf_reader,
            buf_writer,
            bintxt,
            mode,
            buf_len,
            width,
        }
    }

    /// Returns a reference of the source to be read.
    pub fn source_ref(&self) -> &R {
        self.buf_reader.get_ref()
    }

    /// Returns a reference of the destination to be written to.
    pub fn dest_ref(&self) -> &W {
        self.buf_writer.get_ref()
    }

    /// Returns a buffer of buf_len bytes plus some additional bytes, such that
    /// it is divisable by div, if necessary.
    fn buffer_mod(buf_len: usize, div: usize) -> Vec<u8> {
        if buf_len == 0 || !buf_len.is_multiple_of(div) {
            // Add bytes such that len is divisable by the number of encoding or decoding bytes
            let add = div - buf_len % div;
            vec![0; buf_len + add]
        } else {
            vec![0; buf_len]
        }
    }

    /// Removes the control bytes like line feed and carriage return from the buffer
    /// and returns the new size.
    fn filter_buffer_dec(buffer: &mut [u8]) -> usize {
        let len_orig = buffer.len();
        let mut ind = 0;
        for i in 0..len_orig {
            let val = buffer[i];
            if val >= 0x20 {
                if ind < i {
                    buffer[ind] = val;
                }
                ind += 1;
            }
        }
        ind
    }

    /// Reads into the buffer and the returns the number of bytes read.
    /// This function ensures that the buffer is filled completely, if possible.
    fn read_into_buffer(&mut self, buffer: &mut [u8]) -> Result<usize, BinTxtError> {
        let mut offset = 0;
        let len = buffer.len();
        loop {
            // Continue until the whole buffer is read
            match self.buf_reader.read(&mut buffer[offset..len]) {
                Ok(n_bytes_read) => {
                    let end = match self.mode {
                        Mode::Decode => {
                            // Filter invalid bytes like newline from the buffer
                            let e = offset + n_bytes_read;
                            let len_new = Self::filter_buffer_dec(&mut buffer[offset..e]);
                            offset + len_new
                        }
                        Mode::Encode => offset + n_bytes_read,
                    };
                    // Stop if the buffer is filled or no bytes have been read
                    if n_bytes_read == 0 || end == len {
                        return Ok(end);
                    }
                    offset = end;
                }
                Err(_) => {
                    let msg = "Error reading buffer";
                    return Err(BinTxtError::IOError(msg.to_string()));
                }
            }
        }
    }

    /// Writes a buffer to the destination completely or returns an error.
    /// This function ensures that the whole buffer is written to the destination.
    fn write_buffer(&mut self, buffer: &[u8]) -> Result<(), BinTxtError> {
        // Write the whole buffer or return an error
        match self.buf_writer.write_all(buffer) {
            Ok(()) => {}
            Err(_) => {
                let msg = "Error writing buffer";
                return Err(BinTxtError::IOError(msg.to_string()));
            }
        }
        Ok(())
    }

    /// Writes the buffer with newlines, if necessary and return the position of the next newline
    /// for the next buffer.
    fn write_buffer_with_newlines(
        &mut self,
        buffer: &[u8],
        pos_next_newline: usize,
    ) -> Result<usize, BinTxtError> {
        if self.width > 0 {
            // Insert newlines into the buffer every *width* bytes
            self.write_buffer(&buffer[0..pos_next_newline])?;
            self.write_buffer(b"\n")?;
            let iter_chunks = buffer[pos_next_newline..].chunks_exact(self.width);
            let rem = iter_chunks.remainder();
            for c in iter_chunks {
                self.write_buffer(c)?;
                self.write_buffer(b"\n")?;
            }
            if !rem.is_empty() {
                self.write_buffer(rem)?;
            }
            // The next position of a newline equals the width minus the number of bytes in the
            // remainder
            return Ok(self.width - rem.len());
        } else {
            // Just write the buffer without newlines
            self.write_buffer(buffer)?;
        }
        Ok(0)
    }

    /// Reads bytes from a source, encodes or decodes them using the binarytext encoder and writes the result
    /// to the destination.
    pub fn stream(&mut self) -> Result<(), BinTxtError> {
        let (n_bytes_encode, n_bytes_decode) =
            (self.bintxt.n_bytes_encode(), self.bintxt.n_bytes_decode());
        let (align, mul) = match self.mode {
            Mode::Encode => (n_bytes_encode, n_bytes_encode / n_bytes_decode),
            Mode::Decode => (n_bytes_decode, n_bytes_encode / n_bytes_decode),
        };
        let mut buffer_read = Self::buffer_mod(self.buf_len, align);
        let mut buffer_write = Vec::<u8>::with_capacity(self.buf_len * mul);
        // Save the position of the next newline
        let mut pos_next_newline = self.width;
        loop {
            let n_bytes_read = self.read_into_buffer(&mut buffer_read)?;
            if n_bytes_read == 0 {
                break;
            }
            // Encode or decode the bytes read and write them into the destination buffer
            match self.mode {
                Mode::Encode => {
                    self.bintxt
                        .encode_into_vec(&buffer_read[0..n_bytes_read], &mut buffer_write)?;
                }
                Mode::Decode => {
                    self.bintxt
                        .decode_into_vec(&buffer_read[0..n_bytes_read], &mut buffer_write)?;
                }
            }
            pos_next_newline =
                self.write_buffer_with_newlines(buffer_write.as_slice(), pos_next_newline)?;
        }
        // Flush the write buffer to complete the stream
        match self.buf_writer.flush() {
            Ok(_) => Ok(()),
            Err(_) => {
                let msg = "Error flushing write buffer".to_string();
                Err(BinTxtError::IOError(msg))
            }
        }
    }
}

#[cfg(test)]
mod tests {
    use crate::base64::Base64;
    use crate::stream::{BinTxtStream, Mode};
    use std::io::{BufReader, BufWriter};

    #[test]
    fn test_stream() {
        let source =
            "gr3n8t94h89hf849h29ht894h2989n2t8928tn493h8th843h08tj493jt9403jt04j3089jz0649hh89"
                .as_bytes();
        let res = "Z3Izbjh0OTRoO\nDloZjg0OWgyOW\nh0ODk0aDI5ODl\nuMnQ4OTI4dG40\nOTNoOHRoODQza\nDA4dGo0OTNqdD\nk0MDNqdDA0ajM\nwODlqejA2NDlo\naDg5".as_bytes();
        let bintxt = Base64::new();
        let buf_reader = BufReader::new(source);
        let test = Vec::<u8>::with_capacity(res.len());
        let buf_writer = BufWriter::new(test);
        let mut stream = BinTxtStream::new(buf_reader, buf_writer, &bintxt, Mode::Encode, 24, 13);
        stream.stream().unwrap();
        let test = stream.dest_ref();
        assert_eq!(res, test);
    }

    #[test]
    fn test_filter_dec() {
        // Removes the newlines
        let mut buffer = "grne9unt95j9j89gr39j9\nf78h784hr\n0950490\ngbzu8h"
            .as_bytes()
            .iter()
            .cloned()
            .collect::<Vec<u8>>();
        let buffer_test = "grne9unt95j9j89gr39j9f78h784hr0950490gbzu8h".as_bytes();
        let length_new =
            BinTxtStream::<BufReader<&[u8]>, BufWriter<&mut [u8]>, Base64>::filter_buffer_dec(
                &mut buffer,
            );
        assert_eq!(buffer_test[0..], buffer[0..length_new]);
    }
}