binarytext 0.1.2

Binary-to-text encoders / decoders
Documentation
//! Definition of the basic trait every encoder needs to implement.

use crate::error::BinTxtError;

/// General trait for Binary-to-text encoders / decoders.
pub trait BinaryText {
    /// Returns the number of different encoded characters.
    fn base(&self) -> usize;

    /// Returns the name of the encoding scheme.
    fn name(&self) -> &str;

    /// Returns the chunk size for encoding.
    fn n_bytes_encode(&self) -> usize;

    /// Returns the chunk size for decoding.
    fn n_bytes_decode(&self) -> usize;

    /// Encodes a single byte using the encoding lookup table.
    /// The byte is the position in the LUT.
    fn encode_byte(&self, byte: u8) -> Result<u8, BinTxtError>;

    /// Encodes a slice of bytes into a Vec of bytes.
    fn encode_into_vec(&self, input: &[u8], res: &mut Vec<u8>) -> Result<(), BinTxtError>;

    /// Encodes a slice of bytes into a string and returns it.
    fn encode_and_return(&self, input: &[u8]) -> Result<Vec<u8>, BinTxtError> {
        let len_enc = input.len() * self.n_bytes_decode() / self.n_bytes_encode();
        let mut ret = Vec::<u8>::with_capacity(len_enc);
        self.encode_into_vec(input, &mut ret)?;
        Ok(ret)
    }

    /// Decodes a single byte or returns DecodingError if the byte is not a valid character.
    fn decode_byte(&self, byte: u8) -> Result<u8, BinTxtError>;

    /// Decodes a slice of bytes into a Vec of bytes or returns a decoding error.
    fn decode_into_vec(&self, input: &[u8], res: &mut Vec<u8>) -> Result<(), BinTxtError>;

    /// Decodes a slice of bytes into a string and returns it.
    fn decode_and_return(&self, input: &[u8]) -> Result<Vec<u8>, BinTxtError> {
        let len_dec = input.len() * self.n_bytes_encode() / self.n_bytes_decode();
        let mut ret = Vec::<u8>::with_capacity(len_dec);
        self.decode_into_vec(input, &mut ret)?;
        Ok(ret)
    }

    /// Checks if a string is decodeable.
    fn is_decodable(&self, input: &str) -> bool;

    /// Encodes a string and returns a UTF-8 string instead of a slice of bytes.
    fn encode_from_str(&self, input: &str) -> Result<String, BinTxtError> {
        let bytes = self.encode_and_return(input.as_bytes())?;
        match String::from_utf8(bytes) {
            Ok(s) => Ok(s),
            Err(_) => Err(BinTxtError::EncodingErr(
                "Error converting to UTF-8 string".to_string(),
            )),
        }
    }

    /// Decodes a string and returns a UTF-8 string instead of a slice of bytes.
    fn decode_from_str(&self, input: &str) -> Result<String, BinTxtError> {
        let bytes = self.decode_and_return(input.as_bytes())?;
        match String::from_utf8(bytes) {
            Ok(s) => Ok(s),
            Err(_) => Err(BinTxtError::DecodingErr(
                "Error converting to UTF-8 string".to_string(),
            )),
        }
    }

    /// Encodes an unsigned integer of 128 bit width.
    fn encode_u128(&self, int: u128) -> Result<String, BinTxtError> {
        let base = self.base() as u128;
        let mut test = int;
        let mut n = 0usize;
        while test > 0 {
            test /= base;
            n += 1;
        }
        test = int;
        let mut div = 1;
        // We can't do this in the loop above as this might result in overflow errors
        for _i in 1..n {
            div *= base;
        }
        let mut ret = vec!['0'; n];
        for r in ret.iter_mut() {
            let byte = test / div;
            let enc = self.encode_byte(byte as u8)?;
            *r = enc as char;
            test -= byte * div;
            div /= base;
        }
        Ok(String::from_iter(ret))
    }

    /// Encodes an unsigned integer of 64 bit width.
    fn encode_u64(&self, int: u64) -> Result<String, BinTxtError> {
        self.encode_u128(int as u128)
    }

    /// Decodes a slice of bytes into an unsigned integer of 128 bits width.
    fn decode_u128(&self, input: &[u8]) -> Result<u128, BinTxtError> {
        let base = self.base() as u128;
        let mut ret = 0;
        let mut mul = 1;
        // Iterate over all bytes except the first one in reverse order
        for &b in input.iter().skip(1).rev() {
            let dec = self.decode_byte(b)? as u128;
            ret += dec * mul;
            mul *= base;
        }
        // To avoid overflow of "mul" in the last iteration
        // we handle the first byte (most significant) separately
        if !input.is_empty() {
            let dec = self.decode_byte(input[0])? as u128;
            ret += dec * mul;
        }
        Ok(ret)
    }

    /// Decodes a slice of bytes into an unsigned integer of 64 bits width.
    fn decode_u64(&self, input: &[u8]) -> Result<u64, BinTxtError> {
        let res = self.decode_u128(input)?;
        if res > u64::MAX as u128 {
            return Err(BinTxtError::DecodingErr(
                "Error decoding into u64".to_string(),
            ));
        }
        Ok(res as u64)
    }
}

impl BinaryText for Box<dyn BinaryText> {
    fn base(&self) -> usize {
        (**self).base()
    }

    fn name(&self) -> &str {
        (**self).name()
    }

    fn n_bytes_encode(&self) -> usize {
        (**self).n_bytes_encode()
    }

    fn n_bytes_decode(&self) -> usize {
        (**self).n_bytes_decode()
    }

    fn encode_byte(&self, byte: u8) -> Result<u8, BinTxtError> {
        (**self).encode_byte(byte)
    }

    fn encode_into_vec(&self, input: &[u8], res: &mut Vec<u8>) -> Result<(), BinTxtError> {
        (**self).encode_into_vec(input, res)
    }

    fn decode_byte(&self, byte: u8) -> Result<u8, BinTxtError> {
        (**self).decode_byte(byte)
    }

    fn decode_into_vec(&self, input: &[u8], res: &mut Vec<u8>) -> Result<(), BinTxtError> {
        (**self).decode_into_vec(input, res)
    }

    fn is_decodable(&self, input: &str) -> bool {
        (**self).is_decodable(input)
    }
}

/// Creates a look-up table from the encoding alphabet.
pub(crate) fn build_decoding_lut(lut_enc: &[u8]) -> [u8; 128] {
    // Initialize with invalid values
    let mut ret = [255u8; 128];
    for (ind, &b) in lut_enc.iter().enumerate() {
        // For decoding the byte value in the alphabet is the position in the
        // decoding table
        ret[b as usize] = ind as u8;
    }
    ret
}