binarytext 0.1.2

Binary-to-text encoders / decoders
Documentation
//! Implemention of the Base64 encoder and its variants.

use crate::binarytext::{BinaryText, build_decoding_lut};
use crate::error::BinTxtError;

/// Base64 implementation as described in RFC 4648 / Section 4.
#[derive(Clone, Debug)]
pub struct Base64 {
    name: String,
    lut_enc: [u8; 64],
    lut_dec: [u8; 128],
}

impl Default for Base64 {
    fn default() -> Self {
        Self::new()
    }
}

impl Base64 {
    /// Returns the default Base64 encoder.
    pub fn new() -> Self {
        let name = "Base64".to_string();
        let lut_enc = [
            b'A', b'B', b'C', b'D', b'E', b'F', b'G', b'H', b'I', b'J', b'K', b'L', b'M', b'N',
            b'O', b'P', b'Q', b'R', b'S', b'T', b'U', b'V', b'W', b'X', b'Y', b'Z', b'a', b'b',
            b'c', b'd', b'e', b'f', b'g', b'h', b'i', b'j', b'k', b'l', b'm', b'n', b'o', b'p',
            b'q', b'r', b's', b't', b'u', b'v', b'w', b'x', b'y', b'z', b'0', b'1', b'2', b'3',
            b'4', b'5', b'6', b'7', b'8', b'9', b'+', b'/',
        ];
        let lut_dec = build_decoding_lut(&lut_enc);
        Self {
            name,
            lut_enc,
            lut_dec,
        }
    }

    /// Returns the Base64 encoder suitable for URLs using '-' and '_' instead of '+' and '/'.
    pub fn base64url() -> Self {
        let name = "Base64URL".to_string();
        let lut_enc = [
            b'A', b'B', b'C', b'D', b'E', b'F', b'G', b'H', b'I', b'J', b'K', b'L', b'M', b'N',
            b'O', b'P', b'Q', b'R', b'S', b'T', b'U', b'V', b'W', b'X', b'Y', b'Z', b'a', b'b',
            b'c', b'd', b'e', b'f', b'g', b'h', b'i', b'j', b'k', b'l', b'm', b'n', b'o', b'p',
            b'q', b'r', b's', b't', b'u', b'v', b'w', b'x', b'y', b'z', b'0', b'1', b'2', b'3',
            b'4', b'5', b'6', b'7', b'8', b'9', b'-', b'_',
        ];
        let lut_dec = build_decoding_lut(&lut_enc);
        Self {
            name,
            lut_enc,
            lut_dec,
        }
    }
}

impl BinaryText for Base64 {
    fn base(&self) -> usize {
        64
    }

    fn name(&self) -> &str {
        self.name.as_str()
    }

    fn n_bytes_encode(&self) -> usize {
        3
    }

    fn n_bytes_decode(&self) -> usize {
        4
    }

    fn encode_byte(&self, byte: u8) -> Result<u8, BinTxtError> {
        if byte >= 64 {
            let msg = format!("Byte {byte} exceeds maximum {}", 64);
            return Err(BinTxtError::EncodingErr(msg));
        }
        Ok(self.lut_enc[byte as usize])
    }

    fn encode_into_vec(&self, input: &[u8], res: &mut Vec<u8>) -> Result<(), BinTxtError> {
        res.clear();
        let encode_block = |bytes_in: &[u8], bytes_out: &mut [u8]| -> Result<(), BinTxtError> {
            // Loop unrolled
            let pos = bytes_in[0] >> 2;
            bytes_out[0] = self.encode_byte(pos)?;
            let pos = ((bytes_in[0] & 0b00000011u8) << 4) | ((bytes_in[1] & 0b11110000u8) >> 4);
            bytes_out[1] = self.encode_byte(pos)?;
            let pos = ((bytes_in[1] & 0b00001111u8) << 2) | ((bytes_in[2] & 0b11000000u8) >> 6);
            bytes_out[2] = self.encode_byte(pos)?;
            let pos = bytes_in[2] & 0b00111111u8;
            bytes_out[3] = self.encode_byte(pos)?;
            Ok(())
        };
        // Three bytes -> 24 bits
        let iter = input.chunks_exact(3);
        let bytes_rem = iter.remainder();
        let mut bytes_enc = [0u8; 4];
        for bytes in iter {
            encode_block(bytes, &mut bytes_enc)?;
            res.extend(&bytes_enc);
        }
        // Handle the remaining bytes
        if !bytes_rem.is_empty() {
            let mut bytes = [0u8; 3];
            bytes[..bytes_rem.len()].copy_from_slice(bytes_rem);
            encode_block(&bytes, &mut bytes_enc)?;
            // Padding
            if bytes_rem.len() == 1 {
                bytes_enc[2] = b'=';
                bytes_enc[3] = b'=';
            } else if bytes_rem.len() == 2 {
                bytes_enc[3] = b'=';
            }
            res.extend(&bytes_enc);
        }
        Ok(())
    }

    fn decode_byte(&self, byte: u8) -> Result<u8, BinTxtError> {
        let b = if byte < 128 {
            self.lut_dec[byte as usize]
        } else {
            255
        };
        if b < 255 {
            Ok(b)
        } else {
            let errmsg = format!("Invalid byte \"{}\" in Base64 string", byte);
            Err(BinTxtError::DecodingErr(errmsg))
        }
    }

    fn decode_into_vec(&self, input: &[u8], res: &mut Vec<u8>) -> Result<(), BinTxtError> {
        res.clear();
        if input.is_empty() {
            return Ok(());
        }
        // Get the last position before the remainder
        let rem = input.len() % 4;
        let pos_last = if rem == 0 {
            input.len() - 4
        } else {
            input.len() - rem
        };
        // Function for decoding a block of four bytes into three bytes
        let decode_block = |bytes_in: &[u8], bytes_out: &mut [u8]| -> Result<(), BinTxtError> {
            // Loop unrolled
            let dec0 = self.decode_byte(bytes_in[0])?;
            let dec1 = self.decode_byte(bytes_in[1])?;
            let dec2 = self.decode_byte(bytes_in[2])?;
            let dec3 = self.decode_byte(bytes_in[3])?;
            bytes_out[0] = (dec0 << 2) | (dec1 >> 4);
            bytes_out[1] = (dec1 << 4) | (dec2 >> 2);
            bytes_out[2] = (dec2 << 6) | dec3;
            Ok(())
        };
        let mut bytes_dec = [0u8; 3];
        // Decode everything except the last 4 bytes
        // This way we don't have to worry about padding here
        for bytes in input[0..pos_last].chunks_exact(4) {
            decode_block(bytes, &mut bytes_dec)?;
            res.extend(&bytes_dec);
        }
        // Handle the last chunk of bytes with padding
        let bytes_rem = &input[pos_last..];
        // Save the position of the first padding character, if it exists
        let pos_padding = bytes_rem.iter().position(|&x| x == b'=');
        // 'A' decodes to zero
        let bytes = {
            let mut ret = bytes_rem
                .iter()
                .cloned()
                .map(|x| if x == b'=' { b'A' } else { x })
                .collect::<Vec<u8>>();
            ret.resize(4, b'A');
            ret
        };
        decode_block(&bytes, &mut bytes_dec)?;
        match pos_padding {
            Some(2) => {
                res.extend(&bytes_dec[0..1]);
            }
            Some(3) => {
                res.extend(&bytes_dec[0..2]);
            }
            _ => {
                res.extend(&bytes_dec);
            }
        }
        Ok(())
    }

    fn is_decodable(&self, input: &str) -> bool {
        let rem = input.len() % 4;
        let pos_last = if rem == 0 {
            input.len() - 4
        } else {
            input.len() - rem
        };
        for &byte in &input.as_bytes()[0..pos_last] {
            if self.decode_byte(byte).is_err() {
                return false;
            }
        }
        // In the last chunk padding character "=" is valid
        for &byte in &input.as_bytes()[pos_last..] {
            if byte != b'=' && self.decode_byte(byte).is_err() {
                return false;
            }
        }
        true
    }
}