koibumi-base32 0.0.3

A Base32 encoder/decoder library
Documentation
//! This crate is a Base32 encoder/decoder library.
//!
//! The library is intended to be used to implement an Onion address encoder/decoder.
//! The library uses RFC 4648 Base32 alphabet, but encoded string is lowercase by default.
//! The library does not support padding.
//!
//! # Examples
//!
//! ```rust
//! use koibumi_base32 as base32;
//!
//! let test = base32::encode(b"hello");
//! let expected = "nbswy3dp";
//! assert_eq!(test, expected);
//! ```
//!
//! ```rust
//! use koibumi_base32 as base32;
//!
//! let test = base32::decode("nbswy3dp")?;
//! let expected = b"hello";
//! assert_eq!(test, expected);
//! # Ok::<(), Box<dyn std::error::Error>>(())
//! ```

#![deny(unsafe_code)]
#![warn(missing_docs)]

#[macro_use]
extern crate lazy_static;

use std::fmt;

const ALPHABET: &[u8] = b"abcdefghijklmnopqrstuvwxyz234567";
const INVALID: u8 = ALPHABET.len() as u8;

lazy_static! {
    static ref ALPHABET_INDEX: [u8; 0x100] = {
        let mut index = [INVALID; 0x100];
        for i in 0..ALPHABET.len() {
            index[ALPHABET[i] as usize] = i as u8;
        }
        index
    };
}

/// Encodes byte array into Base32 string.
///
/// The input is arbitrary `[u8]` slice
/// and the output is lowercase `String`.
/// Using lowercase RFC4648 alphabet and can be used for Onion addresses.
/// Padding is not supported.
///
/// # Examples
///
/// ```rust
/// use koibumi_base32 as base32;
///
/// let test = base32::encode(b"hello");
/// let expected = "nbswy3dp";
/// assert_eq!(test, expected);
/// ```
pub fn encode(bytes: impl AsRef<[u8]>) -> String {
    let bytes = bytes.as_ref();

    let mut encoded = Vec::new();

    let mut i = 0;
    let mut bit = 0;
    while i < bytes.len() {
        // 0      1      2     3      4      5     6      7
        // |xxxxx xxx|xx xxxxx x|xxxx xxxx|x xxxxx xx|xxx xxxxx|
        // 0         1          2         3          4
        let c = if bit <= 3 {
            bytes[i] >> (3 - bit)
        } else if i + 1 < bytes.len() {
            bytes[i] << (bit - 3) | bytes[i + 1] >> (11 - bit)
        } else {
            bytes[i] << (bit - 3)
        } & 0x1f;

        encoded.push(ALPHABET[c as usize]);

        bit += 5;
        if bit >= 8 {
            i += 1;
            bit -= 8;
        }
    }

    String::from_utf8(encoded).unwrap()
}

#[test]
fn test_encode() {
    assert_eq!(encode(b""), "");
    assert_eq!(encode(b"f"), "my");
    assert_eq!(encode(b"fo"), "mzxq");
    assert_eq!(encode(b"foo"), "mzxw6");
    assert_eq!(encode(b"foob"), "mzxw6yq");
    assert_eq!(encode(b"fooba"), "mzxw6ytb");
    assert_eq!(encode(b"foobar"), "mzxw6ytboi");
}

/// Indicates that an invalid Base32 character was found.
///
/// This error is used as the error type for the [`decode`] function.
///
/// [`decode`]: fn.decode.html
#[derive(Clone, PartialEq, Eq, Debug)]
pub struct InvalidCharacter(char);

impl InvalidCharacter {
    /// Returns the actual character found invalid.
    pub fn char(&self) -> char {
        self.0
    }
}

impl fmt::Display for InvalidCharacter {
    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
        let ch = self.0;
        let code = u32::from(ch);
        if ch.is_control() {
            write!(f, "invalid character ({:#08x}) found", code)
        } else {
            write!(f, "invalid character '{}' ({:#08x}) found", ch, code)
        }
    }
}

impl std::error::Error for InvalidCharacter {}

fn to_num(ch: char) -> Result<u8, InvalidCharacter> {
    let i = ch as usize;
    if i > 0xff {
        return Err(InvalidCharacter(ch));
    }
    let v = ALPHABET_INDEX[i];
    if v == INVALID {
        Err(InvalidCharacter(ch))
    } else {
        Ok(v)
    }
}

/// Decodes Base32 string into byte array.
///
/// The input is Base32 encoded lowercase `str` reference
/// and the output is arbitrary `Vec<u8>`.
/// Using lowercase RFC4648 alphabet and can be used for Onion addresses.
/// Padding is not supported.
///
/// # Examples
///
/// ```rust
/// use koibumi_base32 as base32;
///
/// let test = base32::decode("nbswy3dp")?;
/// let expected = b"hello";
/// assert_eq!(test, expected);
/// # Ok::<(), Box<dyn std::error::Error>>(())
/// ```
pub fn decode(s: impl AsRef<str>) -> Result<Vec<u8>, InvalidCharacter> {
    let s: Vec<char> = s.as_ref().chars().collect();

    let mut decoded = Vec::new();

    let mut i = 0;
    let mut bit = 0;
    let mut byte = 0;
    while i < s.len() {
        // 0         1          2         3          4
        // |xxxxx xxx|xx xxxxx x|xxxx xxxx|x xxxxx xx|xxx xxxxx|
        // 0      1      2     3      4      5     6      7
        if bit <= 3 {
            byte = byte << 5 | to_num(s[i])?;
            if bit == 3 {
                decoded.push(byte);
            }
        } else {
            let n = to_num(s[i])?;
            byte = byte << (8 - bit) | n >> (bit - 3);
            decoded.push(byte);
            byte = n;
        }

        bit += 5;
        if bit >= 8 {
            bit -= 8;
        }
        i += 1;
    }

    Ok(decoded)
}

#[test]
fn test_decode() {
    assert_eq!(decode("").unwrap(), b"");
    assert_eq!(decode("my").unwrap(), b"f");
    assert_eq!(decode("mzxq").unwrap(), b"fo");
    assert_eq!(decode("mzxw6").unwrap(), b"foo");
    assert_eq!(decode("mzxw6yq").unwrap(), b"foob");
    assert_eq!(decode("mzxw6ytb").unwrap(), b"fooba");
    assert_eq!(decode("mzxw6ytboi").unwrap(), b"foobar");
}