herolib-sid 0.1.0

SmartID - Short, human-readable, collision-free identifiers
Documentation
//! Base-36 encoding and decoding for SmartID
//!
//! SmartID uses base-36 encoding (0-9, a-z) for human-readable,
//! compact identifiers.
//!
//! # Alphabet
//!
//! ```text
//! 0123456789abcdefghijklmnopqrstuvwxyz
//! ```
//!
//! # Properties
//!
//! - Lowercase only
//! - ASCII safe
//! - No punctuation
//! - Visually compact
//! - Widely understood

/// The base-36 alphabet (0-9, a-z)
pub const ALPHABET: &[u8; 36] = b"0123456789abcdefghijklmnopqrstuvwxyz";

/// The numeric base for SmartID encoding
pub const BASE: u64 = 36;

/// Capacity at each ID length
pub const CAPACITY_4: u64 = 36_u64.pow(4); // 1,679,616
pub const CAPACITY_5: u64 = 36_u64.pow(5); // 60,466,176
pub const CAPACITY_6: u64 = 36_u64.pow(6); // 2,176,782,336

/// Encode a numeric value to base-36 string.
///
/// # Arguments
///
/// * `value` - The numeric value to encode
///
/// # Returns
///
/// A base-36 encoded string (lowercase, no padding)
///
/// # Examples
///
/// ```
/// use herolib_osis::sid::base36::encode;
///
/// assert_eq!(encode(0), "0");
/// assert_eq!(encode(35), "z");
/// assert_eq!(encode(36), "10");
/// assert_eq!(encode(1295), "zz");
/// ```
pub fn encode(value: u64) -> String {
    if value == 0 {
        return "0".to_string();
    }

    let mut result = Vec::new();
    let mut n = value;

    while n > 0 {
        let remainder = (n % BASE) as usize;
        result.push(ALPHABET[remainder] as char);
        n /= BASE;
    }

    result.into_iter().rev().collect()
}

/// Encode a numeric value to base-36 string with padding.
///
/// # Arguments
///
/// * `value` - The numeric value to encode
/// * `min_length` - Minimum length (left-padded with '0')
///
/// # Returns
///
/// A base-36 encoded string with at least `min_length` characters
///
/// # Examples
///
/// ```
/// use herolib_osis::sid::base36::encode_padded;
///
/// assert_eq!(encode_padded(0, 4), "0000");
/// assert_eq!(encode_padded(12, 4), "000c");
/// assert_eq!(encode_padded(35, 4), "000z");
/// assert_eq!(encode_padded(36, 4), "0010");
/// ```
pub fn encode_padded(value: u64, min_length: usize) -> String {
    let encoded = encode(value);
    if encoded.len() >= min_length {
        encoded
    } else {
        let padding = "0".repeat(min_length - encoded.len());
        format!("{}{}", padding, encoded)
    }
}

/// Decode a base-36 string to numeric value.
///
/// # Arguments
///
/// * `s` - The base-36 encoded string
///
/// # Returns
///
/// The decoded numeric value, or an error if invalid
///
/// # Errors
///
/// Returns an error if the string contains characters not in the base-36 alphabet.
///
/// # Examples
///
/// ```
/// use herolib_osis::sid::base36::decode;
///
/// assert_eq!(decode("0").unwrap(), 0);
/// assert_eq!(decode("z").unwrap(), 35);
/// assert_eq!(decode("10").unwrap(), 36);
/// assert_eq!(decode("zz").unwrap(), 1295);
/// assert_eq!(decode("0000").unwrap(), 0);
/// assert_eq!(decode("000c").unwrap(), 12);
/// ```
pub fn decode(s: &str) -> Result<u64, DecodeError> {
    if s.is_empty() {
        return Err(DecodeError::EmptyString);
    }

    let mut result: u64 = 0;

    for c in s.chars() {
        let digit = match c {
            '0'..='9' => (c as u64) - ('0' as u64),
            'a'..='z' => (c as u64) - ('a' as u64) + 10,
            'A'..='Z' => (c as u64) - ('A' as u64) + 10, // Accept uppercase for lenient parsing
            _ => return Err(DecodeError::InvalidCharacter(c)),
        };

        result = result
            .checked_mul(BASE)
            .ok_or(DecodeError::Overflow)?
            .checked_add(digit)
            .ok_or(DecodeError::Overflow)?;
    }

    Ok(result)
}

/// Determine the minimum length needed for a given global_id.
///
/// # Arguments
///
/// * `global_id` - The numeric global ID
///
/// # Returns
///
/// The minimum character length (4, 5, or 6)
///
/// # Examples
///
/// ```
/// use herolib_osis::sid::base36::required_length;
///
/// assert_eq!(required_length(0), 4);
/// assert_eq!(required_length(1_679_615), 4);  // 36^4 - 1
/// assert_eq!(required_length(1_679_616), 5);  // 36^4
/// assert_eq!(required_length(60_466_175), 5); // 36^5 - 1
/// assert_eq!(required_length(60_466_176), 6); // 36^5
/// ```
pub fn required_length(global_id: u64) -> usize {
    if global_id < CAPACITY_4 {
        4
    } else if global_id < CAPACITY_5 {
        5
    } else {
        6
    }
}

/// Check if a string is a valid base-36 SmartID.
///
/// # Arguments
///
/// * `s` - The string to validate
///
/// # Returns
///
/// `true` if the string is a valid SmartID (4-6 lowercase alphanumeric chars)
pub fn is_valid_sid(s: &str) -> bool {
    let len = s.len();
    if !(4..=6).contains(&len) {
        return false;
    }

    s.chars().all(|c| c.is_ascii_lowercase() || c.is_ascii_digit())
}

/// Error type for base-36 decoding
#[derive(Debug, Clone, PartialEq, Eq)]
pub enum DecodeError {
    /// The input string was empty
    EmptyString,
    /// An invalid character was encountered
    InvalidCharacter(char),
    /// The decoded value overflowed u64
    Overflow,
}

impl std::fmt::Display for DecodeError {
    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
        match self {
            DecodeError::EmptyString => write!(f, "empty string"),
            DecodeError::InvalidCharacter(c) => write!(f, "invalid character: '{}'", c),
            DecodeError::Overflow => write!(f, "numeric overflow"),
        }
    }
}

impl std::error::Error for DecodeError {}

#[cfg(test)]
mod tests {
    use super::*;

    #[test]
    fn test_encode_basic() {
        assert_eq!(encode(0), "0");
        assert_eq!(encode(1), "1");
        assert_eq!(encode(9), "9");
        assert_eq!(encode(10), "a");
        assert_eq!(encode(35), "z");
        assert_eq!(encode(36), "10");
        assert_eq!(encode(37), "11");
    }

    #[test]
    fn test_encode_larger_values() {
        assert_eq!(encode(1295), "zz");
        assert_eq!(encode(1296), "100");
        assert_eq!(encode(46655), "zzz");
        assert_eq!(encode(46656), "1000");
    }

    #[test]
    fn test_encode_padded() {
        assert_eq!(encode_padded(0, 4), "0000");
        assert_eq!(encode_padded(1, 4), "0001");
        assert_eq!(encode_padded(12, 4), "000c");
        assert_eq!(encode_padded(35, 4), "000z");
        assert_eq!(encode_padded(36, 4), "0010");
        assert_eq!(encode_padded(1295, 4), "00zz");
        assert_eq!(encode_padded(46656, 4), "1000");
        assert_eq!(encode_padded(1679615, 4), "zzzz");
        assert_eq!(encode_padded(1679616, 5), "10000");
    }

    #[test]
    fn test_decode_basic() {
        assert_eq!(decode("0").unwrap(), 0);
        assert_eq!(decode("1").unwrap(), 1);
        assert_eq!(decode("9").unwrap(), 9);
        assert_eq!(decode("a").unwrap(), 10);
        assert_eq!(decode("z").unwrap(), 35);
        assert_eq!(decode("10").unwrap(), 36);
    }

    #[test]
    fn test_decode_with_padding() {
        assert_eq!(decode("0000").unwrap(), 0);
        assert_eq!(decode("0001").unwrap(), 1);
        assert_eq!(decode("000c").unwrap(), 12);
        assert_eq!(decode("00zz").unwrap(), 1295);
    }

    #[test]
    fn test_decode_accepts_uppercase() {
        assert_eq!(decode("A").unwrap(), 10);
        assert_eq!(decode("Z").unwrap(), 35);
        assert_eq!(decode("ZZZZ").unwrap(), 1679615);
    }

    #[test]
    fn test_decode_errors() {
        assert!(matches!(decode(""), Err(DecodeError::EmptyString)));
        assert!(matches!(decode("!"), Err(DecodeError::InvalidCharacter('!'))));
        assert!(matches!(decode("a-b"), Err(DecodeError::InvalidCharacter('-'))));
    }

    #[test]
    fn test_roundtrip() {
        for value in [0, 1, 35, 36, 1295, 1296, 46655, 46656, 1679615, 1679616] {
            let encoded = encode(value);
            let decoded = decode(&encoded).unwrap();
            assert_eq!(decoded, value, "roundtrip failed for {}", value);
        }
    }

    #[test]
    fn test_required_length() {
        // 4 char range: 0 to 36^4 - 1 = 1,679,615
        assert_eq!(required_length(0), 4);
        assert_eq!(required_length(1_679_615), 4);

        // 5 char range: 36^4 to 36^5 - 1
        assert_eq!(required_length(1_679_616), 5);
        assert_eq!(required_length(60_466_175), 5);

        // 6 char range: 36^5 and above
        assert_eq!(required_length(60_466_176), 6);
        assert_eq!(required_length(2_176_782_335), 6);
    }

    #[test]
    fn test_is_valid_sid() {
        // Valid SIDs
        assert!(is_valid_sid("0000"));
        assert!(is_valid_sid("abcd"));
        assert!(is_valid_sid("z9a0"));
        assert!(is_valid_sid("12345"));
        assert!(is_valid_sid("abcdef"));

        // Invalid: wrong length
        assert!(!is_valid_sid("abc"));
        assert!(!is_valid_sid("abcdefg"));

        // Invalid: uppercase
        assert!(!is_valid_sid("ABCD"));

        // Invalid: special characters
        assert!(!is_valid_sid("ab-d"));
        assert!(!is_valid_sid("ab_d"));
    }

    #[test]
    fn test_capacity_constants() {
        assert_eq!(CAPACITY_4, 1_679_616);
        assert_eq!(CAPACITY_5, 60_466_176);
        assert_eq!(CAPACITY_6, 2_176_782_336);
    }
}