base91le 0.1.0

little-endian base91 encoding format that supports padding
Documentation
//! base91le is an encoding that encode raw bytes into ascii in little-endian with 91 ascii character.
//!
//! It's more efficient than base64 that it encode 13 bytes to 16 bytes
//! and it's able to write training characters to hint how many bytes encoded in last 16 bytes chunk.
//!
//! See [`KEYMAP`] for the characters the encoding used.
//!
//! # Simple encode and decode
//!
//! ```
//! use base91le::{encode, decode};
//!
//! assert_eq!(encode(b"hello"), "%;:w!;00~~~~~~~~");
//! assert_eq!(decode("BM1$A*00~~~~~~~~").unwrap(), b"world");
//! ```
//!
//! [`KEYMAP`]: crate::KEYMAP

/// Provides reader that decoded bytes read from given reader
pub mod read;
/// Fixed size ring-buffer to read and write
pub mod ring;
/// Provides writer that write encoded string to given writer
pub mod write;

pub(crate) mod copier;

#[cfg(test)]
mod tests;

/// keymap used by the encoding
pub const KEYMAP: &[u8; 91] =
    b"0123456789ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz!#$%&()*+,-./:;<=>?@[]^_`{|}~";

const DIGITMAP: [u8; 94] = [
    62, 0xFF, 63, 64, 65, 66, 0xFF, 67, 68, 69, 70, 71, 72, 73, 74, 0, 1, 2, 3, 4, 5, 6, 7, 8, 9,
    75, 76, 77, 78, 79, 80, 81, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26,
    27, 28, 29, 30, 31, 32, 33, 34, 35, 82, 0xFF, 83, 84, 85, 86, 36, 37, 38, 39, 40, 41, 42, 43,
    44, 45, 46, 47, 48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 87, 88, 89, 90,
];

/// errors that could happen when decoding
#[derive(Debug, Clone, Copy)]
pub enum DecodeError {
    /// unallowd character in base91 encoding
    InvalidCode { index: usize, code: u8 },
    /// chunk that too big to represent a 13 bytes chunk
    TooLargeChunk {
        index: usize,
        chunk: [u8; 16],
        decoded: u128,
    },
    /// chunk contains more than 13 padded '~'
    TooMuchPadding { index: usize, padded: usize },
    /// end of stream while not complete a chunk
    UnpaddedTail { index: usize },
}

impl DecodeError {
    fn nth_chunk(self, n: usize) -> Self {
        match self {
            DecodeError::InvalidCode { index, code } => DecodeError::InvalidCode {
                index: index + n * 16,
                code,
            },
            DecodeError::TooLargeChunk {
                index,
                chunk,
                decoded,
            } => DecodeError::TooLargeChunk {
                index: index + n * 16,
                chunk,
                decoded,
            },
            DecodeError::TooMuchPadding { index, padded } => Self::TooMuchPadding {
                index: index + 16 * n,
                padded,
            },
            DecodeError::UnpaddedTail { index } => Self::UnpaddedTail {
                index: index + 16 * n,
            },
        }
    }
}

impl std::fmt::Display for DecodeError {
    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
        match self {
            Self::InvalidCode { index, code } => {
                write!(
                    f,
                    "invalid base91le code \'{}\' at {index}",
                    code.escape_ascii()
                )
            }
            Self::TooLargeChunk {
                index,
                chunk,
                decoded,
            } => write!(
                f,
                "too large chunk \"{}\"(0X{decoded:X}) start at {index} ,expected \".+/Nd*DP8>OFnJg]\"(0XFFFFFFFFFFFFFFFFFFFFFFFFFF) at most",
                chunk.escape_ascii()
            ),
            Self::TooMuchPadding { index, padded } => write!(
                f,
                "too much padding char({padded}) at {index}, expected at most 13"
            ),
            Self::UnpaddedTail { index } => write!(f, "input ended at {index} not padded with '~'"),
        }
    }
}

impl std::error::Error for DecodeError {}

/// encode a 13 bytes chunk into base91le encoding
pub const fn encode_chunk(input: &[u8; 13]) -> [u8; 16] {
    let mut input_arr = [0; 16];
    unsafe { std::slice::from_raw_parts_mut(&raw mut input_arr[0], 13) }.copy_from_slice(input);
    // input_arr[..13].copy_from_slice(input); // const index is not supported
    let mut input_num = u128::from_le_bytes(input_arr);
    let mut out = [0; 16];
    let mut idx = 0;
    while idx < 16 {
        out[idx] = KEYMAP[(input_num % 91) as usize];
        input_num /= 91;
        idx += 1;
    }
    // for ch in &mut out { // const for
    //     *ch = LETTERS[(input_num % 91) as usize];
    //     input_num /= 91;
    // }
    out
}

/// encode remaining(less than 13) bytes
///
/// # Panics
///
/// panics if input len is more than 13
fn encode_tail(input: &[u8]) -> [u8; 16] {
    let len = input.len();
    let mut input_padded = [0; 13];
    input_padded[..len].copy_from_slice(input);
    let mut out = encode_chunk(&input_padded);
    out[len + 3..].copy_from_slice(&b"~~~~~~~~~~~~~"[len..]);
    out
}

/// encode all bytes into a string
pub fn encode(input: &[u8]) -> String {
    let (chunks, remainder) = input.as_chunks();
    let capacity = (chunks.len() + (!remainder.is_empty() as usize)) * 16;
    let mut vec = Vec::with_capacity(capacity);
    for chunk in chunks {
        vec.extend_from_slice(&encode_chunk(chunk));
    }
    if !remainder.is_empty() {
        vec.extend_from_slice(&encode_tail(remainder));
    }
    unsafe { String::from_utf8_unchecked(vec) }
}

/// decode a 16 bytes len chunk to given buf, returns decoded len
pub fn decode_chunk(input: &[u8; 16], buf: &mut [u8; 13]) -> Result<usize, DecodeError> {
    const INVALID: u128 = 1 << (13 * 8);
    let padded = input.iter().rev().take_while(|l| **l == b'~').count();
    let len = 13usize
        .checked_sub(padded)
        .ok_or(DecodeError::TooMuchPadding {
            index: 16 - padded,
            padded,
        })?;
    let mut decoded = 0;
    let mut exp = 1;
    for (index, &code) in input[..len + 3].iter().enumerate() {
        if code <= 0x20 || 0x7F <= code || code == b'"' || code == b'\'' || code == b'\\' {
            return Err(DecodeError::InvalidCode { index, code });
        }
        decoded += DIGITMAP[(code - 0x21) as usize] as u128 * exp;
        exp *= 91;
    }
    if decoded > INVALID {
        return Err(DecodeError::TooLargeChunk {
            index: 0,
            chunk: *input,
            decoded,
        });
    }
    let out = decoded.to_le_bytes();
    buf[..len].copy_from_slice(&out[..len]);
    Ok(len)
}

/// decode base91le encoded string to raw bytes
pub fn decode(bytes: &str) -> Result<Vec<u8>, DecodeError> {
    let (chunks, remainder) = bytes.as_bytes().as_chunks();
    if !remainder.is_empty() {
        return Err(DecodeError::UnpaddedTail { index: bytes.len() });
    }
    decode_chunks(chunks)
}

/// decode base91le encoded string to raw bytes, with already checked length
pub fn decode_chunks(input: &[[u8; 16]]) -> Result<Vec<u8>, DecodeError> {
    let mut v = Vec::with_capacity(input.len() * 13);
    let mut buf = [0; 13];
    for (idx, chunk) in input.iter().enumerate() {
        let n = decode_chunk(chunk, &mut buf).map_err(|err| err.nth_chunk(idx))?;
        v.extend_from_slice(&buf[..n]);
    }
    Ok(v)
}