binarytext 0.1.2

Binary-to-text encoders / decoders
Documentation
//! Implemention of the Base85 encoder.

use crate::binarytext::BinaryText;
use crate::error::BinTxtError;

/// Base85 implementation.
#[derive(Clone, Debug)]
pub struct Base85 {}

impl Default for Base85 {
    fn default() -> Self {
        Self::new()
    }
}

impl Base85 {
    pub fn new() -> Self {
        Self {}
    }
}

impl BinaryText for Base85 {
    fn base(&self) -> usize {
        85
    }

    fn name(&self) -> &str {
        "Base85"
    }

    fn n_bytes_encode(&self) -> usize {
        4
    }

    fn n_bytes_decode(&self) -> usize {
        5
    }

    fn encode_byte(&self, byte: u8) -> Result<u8, BinTxtError> {
        if byte >= 85 {
            let msg = format!("Byte {byte} exceeds maximum {}", 85);
            return Err(BinTxtError::EncodingErr(msg));
        }
        // The result is just ASCII shifted by 33
        Ok(byte + 33)
    }

    fn encode_into_vec(&self, input: &[u8], res: &mut Vec<u8>) -> Result<(), BinTxtError> {
        res.clear();
        let encode_block = |bytes_in: &[u8], bytes_out: &mut [u8]| -> Result<(), BinTxtError> {
            const MUL: [u32; 4] = [85 * 85 * 85 * 85, 85 * 85 * 85, 85 * 85, 85];
            let int = (bytes_in[0] as u32) << 24
                | (bytes_in[1] as u32) << 16
                | (bytes_in[2] as u32) << 8
                | bytes_in[3] as u32;
            let pos = int / MUL[0];
            bytes_out[0] = self.encode_byte(pos as u8)?;
            let int = int - pos * MUL[0];
            let pos = int / MUL[1];
            bytes_out[1] = self.encode_byte(pos as u8)?;
            let int = int - pos * MUL[1];
            let pos = int / MUL[2];
            bytes_out[2] = self.encode_byte(pos as u8)?;
            let int = int - pos * MUL[2];
            let pos = int / MUL[3];
            bytes_out[3] = self.encode_byte(pos as u8)?;
            let pos = int - pos * MUL[3];
            bytes_out[4] = self.encode_byte(pos as u8)?;
            Ok(())
        };
        // Four bytes -> 32 bits
        let iter = input.chunks_exact(4);
        let bytes_rem = iter.remainder();
        let mut bytes_enc = [0u8; 5];
        for bytes in iter {
            // If all four bytes are zero, push the special value "z" instead of "!!!!!"
            if bytes[0] == 0 && bytes[1] == 0 && bytes[2] == 0 && bytes[3] == 0 {
                res.push(b'z');
            } else {
                encode_block(bytes, &mut bytes_enc)?;
                res.extend(&bytes_enc);
            }
        }
        // Handle the remaining bytes
        if !bytes_rem.is_empty() {
            let mut bytes = [0u8; 4];
            bytes[..bytes_rem.len()].copy_from_slice(bytes_rem);
            encode_block(&bytes, &mut bytes_enc)?;
            // Subtract the number of padded bytes
            let len_rem_enc = bytes_rem.len() + 1;
            res.extend(&bytes_enc[0..len_rem_enc]);
        }
        Ok(())
    }

    fn decode_byte(&self, byte: u8) -> Result<u8, BinTxtError> {
        // The result is just ASCII shifted by 33
        if !(b'!'..=b'u').contains(&byte) {
            let errmsg = format!("Invalid byte {byte} in Base85 string");
            return Err(BinTxtError::DecodingErr(errmsg));
        }
        Ok(byte - b'!')
    }

    fn decode_into_vec(&self, input: &[u8], res: &mut Vec<u8>) -> Result<(), BinTxtError> {
        res.clear();
        if input.is_empty() {
            return Ok(());
        }
        // Get the last position before the remainder
        let rem = input.len() % 5;
        let pos_last = if rem == 0 {
            input.len() - 5
        } else {
            input.len() - rem
        };
        // Function for decoding a block of four bytes into three bytes
        let decode_block = |bytes_in: &[u8], bytes_out: &mut [u8]| -> Result<(), BinTxtError> {
            const MUL: [u32; 4] = [85 * 85 * 85 * 85, 85 * 85 * 85, 85 * 85, 85];
            let dec0 = self.decode_byte(bytes_in[0])?;
            let dec1 = self.decode_byte(bytes_in[1])?;
            let dec2 = self.decode_byte(bytes_in[2])?;
            let dec3 = self.decode_byte(bytes_in[3])?;
            let dec4 = self.decode_byte(bytes_in[4])?;
            let int = dec0 as u32 * MUL[0]
                + dec1 as u32 * MUL[1]
                + dec2 as u32 * MUL[2]
                + dec3 as u32 * MUL[3]
                + dec4 as u32;
            bytes_out[0] = (int >> 24) as u8;
            bytes_out[1] = (int >> 16) as u8;
            bytes_out[2] = (int >> 8) as u8;
            bytes_out[3] = int as u8;
            Ok(())
        };
        let mut bytes_dec = [0u8; 4];
        // Decode everything except the last 5 bytes
        // This way we don't have to worry about padding here
        for bytes in input[0..pos_last].chunks_exact(5) {
            // "z" decodes to four zero bytes
            if bytes[0] != b'z' {
                decode_block(bytes, &mut bytes_dec)?;
            }
            res.extend(&bytes_dec);
        }
        // Handle the last chunk of bytes
        let bytes_rem = &input[pos_last..];
        let bytes = {
            let mut ret = bytes_rem.to_vec();
            ret.resize(5, b'u');
            ret
        };
        decode_block(&bytes, &mut bytes_dec)?;
        let n_pad = 5 - (input.len() - pos_last);
        for &b in bytes_dec.iter().take(4 - n_pad) {
            res.push(b);
        }
        Ok(())
    }

    fn is_decodable(&self, input: &str) -> bool {
        for &byte in input.as_bytes() {
            if byte != b'z' && self.decode_byte(byte).is_err() {
                return false;
            }
        }
        true
    }
}