codlet-core 0.9.0

Core authentication primitives for codlet: code policy, generation, normalization, keyed lookup derivation, lifecycle state machines, and storage traits.
Documentation
//! Code alphabet (RFC-003).
//!
//! An [`Alphabet`] is a validated, deduplicated set of ASCII symbols used for
//! code generation. The default excludes visually ambiguous glyphs
//! (`0 1 O I L`) so non-technical users can read and re-enter codes reliably.

use crate::error::PolicyError;

/// The default unambiguous alphabet: ASCII uppercase letters and digits with
/// `0`, `1`, `O`, `I`, `L` removed. 31 symbols (RFC-003 §4).
///
/// This matches the `zinnias-ciao` generation alphabet exactly, so codes
/// generated by either side draw from the same symbol set.
pub const DEFAULT_ALPHABET: &[u8] = b"ABCDEFGHJKMNPQRSTUVWXYZ23456789";

/// A validated set of ASCII code symbols.
#[derive(Debug, Clone, PartialEq, Eq)]
pub struct Alphabet {
    symbols: Vec<u8>,
}

impl Alphabet {
    /// Build an alphabet from raw bytes, validating the invariants required for
    /// unbiased generation (RFC-003 §11.1):
    ///
    /// - at least 2 symbols,
    /// - all symbols ASCII,
    /// - no duplicate symbols.
    ///
    /// # Errors
    /// Returns [`PolicyError`] if any invariant is violated.
    pub fn new(symbols: &[u8]) -> Result<Self, PolicyError> {
        if symbols.len() < 2 {
            return Err(PolicyError::AlphabetTooSmall);
        }
        if !symbols.iter().all(u8::is_ascii) {
            return Err(PolicyError::AlphabetNotAscii);
        }
        // Reject duplicates: a repeated symbol would be over-weighted.
        for (i, &b) in symbols.iter().enumerate() {
            if symbols[i + 1..].contains(&b) {
                return Err(PolicyError::AlphabetNotUnique);
            }
        }
        Ok(Self {
            symbols: symbols.to_vec(),
        })
    }

    /// The unambiguous default alphabet (31 symbols).
    #[must_use]
    pub fn unambiguous() -> Self {
        // DEFAULT_ALPHABET is a known-good constant; construction cannot fail.
        Self {
            symbols: DEFAULT_ALPHABET.to_vec(),
        }
    }

    /// Number of symbols in the alphabet.
    #[must_use]
    pub fn len(&self) -> usize {
        self.symbols.len()
    }

    /// Whether the alphabet is empty. Always `false` for a constructed
    /// alphabet, present to satisfy clippy and API completeness.
    #[must_use]
    pub fn is_empty(&self) -> bool {
        self.symbols.is_empty()
    }

    /// Borrow the symbols.
    #[must_use]
    pub fn symbols(&self) -> &[u8] {
        &self.symbols
    }

    /// Whether `byte` is a member of this alphabet.
    #[must_use]
    pub fn contains(&self, byte: u8) -> bool {
        self.symbols.contains(&byte)
    }

    /// The rejection-sampling ceiling: the largest multiple of the alphabet
    /// length that fits in a byte. Random bytes `>= ceiling` are discarded to
    /// avoid modulo bias (RFC-003 §4, §11.5). For the 31-symbol default this is
    /// `256 - (256 % 31) = 248`.
    #[must_use]
    pub fn unbiased_ceiling(&self) -> usize {
        let n = self.symbols.len();
        256 - (256 % n)
    }

    /// Map an accepted random byte (`< unbiased_ceiling`) to a symbol.
    #[must_use]
    pub(crate) fn symbol_for_byte(&self, byte: u8) -> u8 {
        self.symbols[byte as usize % self.symbols.len()]
    }
}

#[cfg(test)]
mod tests {
    use super::*;

    #[test]
    fn default_excludes_ambiguous_characters() {
        let a = Alphabet::unambiguous();
        for &c in b"01OIL" {
            assert!(
                !a.contains(c),
                "default alphabet contains ambiguous '{}'",
                c as char
            );
        }
        assert_eq!(a.len(), 31);
    }

    #[test]
    fn ceiling_is_248_for_default() {
        assert_eq!(Alphabet::unambiguous().unbiased_ceiling(), 248);
    }

    #[test]
    fn all_accepted_bytes_map_into_alphabet() {
        let a = Alphabet::unambiguous();
        for b in 0..a.unbiased_ceiling() {
            let sym = a.symbol_for_byte(b as u8);
            assert!(a.contains(sym));
        }
    }

    #[test]
    fn rejects_small_duplicate_and_non_ascii() {
        assert_eq!(Alphabet::new(b"A"), Err(PolicyError::AlphabetTooSmall));
        assert_eq!(Alphabet::new(b"AAB"), Err(PolicyError::AlphabetNotUnique));
        assert_eq!(
            Alphabet::new(&[b'A', 0x80]),
            Err(PolicyError::AlphabetNotAscii)
        );
        assert!(Alphabet::new(b"AB").is_ok());
    }
}