binarytext 0.1.2

Binary-to-text encoders / decoders
Documentation
//! Implemention of the Base32 encoder and its variants.

use crate::binarytext::{BinaryText, build_decoding_lut};
use crate::error::BinTxtError;

/// Base32 implementation as described in RFC 4648 / Section 6.
#[derive(Clone, Debug)]
pub struct Base32 {
    name: String,
    lut_enc: [u8; 32],
    lut_dec: [u8; 128],
}

impl Default for Base32 {
    fn default() -> Self {
        Self::new()
    }
}

impl Base32 {
    /// Returns the Base32 encoder as described in RFC 4648 / Section 6.
    pub fn new() -> Self {
        let name = "Base32".to_string();
        let lut_enc = [
            b'A', b'B', b'C', b'D', b'E', b'F', b'G', b'H', b'I', b'J', b'K', b'L', b'M', b'N',
            b'O', b'P', b'Q', b'R', b'S', b'T', b'U', b'V', b'W', b'X', b'Y', b'Z', b'2', b'3',
            b'4', b'5', b'6', b'7',
        ];
        let lut_dec = build_decoding_lut(&lut_enc);
        Self {
            name,
            lut_enc,
            lut_dec,
        }
    }

    /// Returns the Base32 variant Base32Hex as described in RFC 4648 / Section 7.
    pub fn base32hex() -> Self {
        let name = "Base32Hex".to_string();
        let lut_enc = [
            b'0', b'1', b'2', b'3', b'4', b'5', b'6', b'7', b'8', b'9', b'A', b'B', b'C', b'D',
            b'E', b'F', b'G', b'H', b'I', b'J', b'K', b'L', b'M', b'N', b'O', b'P', b'Q', b'R',
            b'S', b'T', b'U', b'V',
        ];
        let lut_dec = build_decoding_lut(&lut_enc);
        Self {
            name,
            lut_enc,
            lut_dec,
        }
    }
}

impl BinaryText for Base32 {
    fn base(&self) -> usize {
        32
    }

    fn name(&self) -> &str {
        self.name.as_str()
    }

    fn n_bytes_encode(&self) -> usize {
        5
    }

    fn n_bytes_decode(&self) -> usize {
        8
    }

    fn encode_byte(&self, byte: u8) -> Result<u8, BinTxtError> {
        if byte >= 32 {
            let msg = format!("Byte {byte} exceeds maximum {}", 32);
            return Err(BinTxtError::EncodingErr(msg));
        }
        Ok(self.lut_enc[byte as usize])
    }

    fn encode_into_vec(&self, input: &[u8], res: &mut Vec<u8>) -> Result<(), BinTxtError> {
        let encode_block = |bytes_in: &[u8], bytes_out: &mut [u8]| -> Result<(), BinTxtError> {
            // Loop unrolled
            let pos = bytes_in[0] >> 3;
            bytes_out[0] = self.encode_byte(pos)?;
            let pos = ((bytes_in[0] & 0b111) << 2) | (bytes_in[1] >> 6);
            bytes_out[1] = self.encode_byte(pos)?;
            let pos = (bytes_in[1] & 0b111110) >> 1;
            bytes_out[2] = self.encode_byte(pos)?;
            let pos = ((bytes_in[1] & 0b1) << 4) | (bytes_in[2] >> 4);
            bytes_out[3] = self.encode_byte(pos)?;
            let pos = ((bytes_in[2] & 0b1111) << 1) | (bytes_in[3] >> 7);
            bytes_out[4] = self.encode_byte(pos)?;
            let pos = (bytes_in[3] & 0b1111100) >> 2;
            bytes_out[5] = self.encode_byte(pos)?;
            let pos = ((bytes_in[3] & 0b11) << 3) | (bytes_in[4] >> 5);
            bytes_out[6] = self.encode_byte(pos)?;
            let pos = bytes_in[4] & 0b11111;
            bytes_out[7] = self.encode_byte(pos)?;
            Ok(())
        };
        res.clear();
        // Five bytes -> 40 bits
        let iter = input.chunks_exact(5);
        let bytes_rem = iter.remainder();
        let mut bytes_enc = [0u8; 8];
        for bytes in iter {
            encode_block(bytes, &mut bytes_enc)?;
            res.extend(&bytes_enc);
        }
        // Handle the remaining bytes
        if !bytes_rem.is_empty() {
            let mut bytes = [0u8; 5];
            bytes[..bytes_rem.len()].copy_from_slice(bytes_rem);
            encode_block(&bytes, &mut bytes_enc)?;
            // Padding
            let n_bytes_enc = (bytes_rem.len() * 8) / 5 + 1;
            for byte in bytes_enc.iter_mut().skip(n_bytes_enc) {
                *byte = b'=';
            }
            res.extend(&bytes_enc);
        }
        Ok(())
    }

    fn decode_byte(&self, byte: u8) -> Result<u8, BinTxtError> {
        let b = if byte < 128 {
            self.lut_dec[byte as usize]
        } else {
            255
        };
        if b < 255 {
            Ok(b)
        } else {
            let errmsg = format!("Invalid byte \"{}\" in Base32 string", byte);
            Err(BinTxtError::DecodingErr(errmsg))
        }
    }

    fn decode_into_vec(&self, input: &[u8], res: &mut Vec<u8>) -> Result<(), BinTxtError> {
        res.clear();
        if input.is_empty() {
            return Ok(());
        }
        // Function for decoding a block of eight bytes into five bytes
        let decode_block = |bytes_in: &[u8], bytes_out: &mut [u8]| -> Result<(), BinTxtError> {
            // Loop unrolled
            let dec0 = self.decode_byte(bytes_in[0])?;
            let dec1 = self.decode_byte(bytes_in[1])?;
            let dec2 = self.decode_byte(bytes_in[2])?;
            let dec3 = self.decode_byte(bytes_in[3])?;
            let dec4 = self.decode_byte(bytes_in[4])?;
            let dec5 = self.decode_byte(bytes_in[5])?;
            let dec6 = self.decode_byte(bytes_in[6])?;
            let dec7 = self.decode_byte(bytes_in[7])?;
            bytes_out[0] = (dec0 << 3) | (dec1 >> 2);
            bytes_out[1] = (dec1 << 6) | (dec2 << 1) | (dec3 >> 4);
            bytes_out[2] = (dec3 << 4) | (dec4 >> 1);
            bytes_out[3] = (dec4 << 7) | (dec5 << 2) | (dec6 >> 3);
            bytes_out[4] = (dec6 << 5) | dec7;
            Ok(())
        };
        // Get the last position before the remainder
        let rem = input.len() % 8;
        let pos_last = if rem == 0 {
            input.len() - 8
        } else {
            input.len() - rem
        };
        let mut bytes_dec = [0u8; 5];
        // Decode everything except the remainder bytes
        // This way we don't have to worry about padding here
        for bytes in input[0..pos_last].chunks_exact(8) {
            decode_block(bytes, &mut bytes_dec)?;
            res.extend(&bytes_dec);
        }
        // Handle the last chunk of bytes with padding
        let bytes_rem = &input[pos_last..];
        // Save the position of the first padding character, if it exists
        let pos_padding = bytes_rem.iter().position(|&x| x == b'=');
        // 'A' decodes to zero
        let bytes = {
            let mut ret = bytes_rem
                .iter()
                .cloned()
                .map(|x| if x == b'=' { b'A' } else { x })
                .collect::<Vec<u8>>();
            ret.resize(8, b'A');
            ret
        };
        decode_block(&bytes, &mut bytes_dec)?;
        match pos_padding {
            Some(2) => {
                res.extend(&bytes_dec[0..1]);
            }
            Some(3) => {
                res.extend(&bytes_dec[0..2]);
            }
            Some(4) => {
                res.extend(&bytes_dec[0..2]);
            }
            Some(5) => {
                res.extend(&bytes_dec[0..3]);
            }
            Some(6) => {
                res.extend(&bytes_dec[0..3]);
            }
            Some(7) => {
                res.extend(&bytes_dec[0..4]);
            }
            _ => {
                res.extend(&bytes_dec);
            }
        }
        Ok(())
    }

    fn is_decodable(&self, input: &str) -> bool {
        let rem = input.len() % 8;
        let pos_last = if rem == 0 {
            input.len() - 8
        } else {
            input.len() - rem
        };
        for &byte in &input.as_bytes()[0..pos_last] {
            if self.decode_byte(byte).is_err() {
                return false;
            }
        }
        // In the last chunk padding character "=" is valid
        for &byte in &input.as_bytes()[pos_last..] {
            if byte != b'=' && self.decode_byte(byte).is_err() {
                return false;
            }
        }
        true
    }
}