Skip to main content

codlet_core/code/
alphabet.rs

1//! Code alphabet (RFC-003).
2//!
3//! An [`Alphabet`] is a validated, deduplicated set of ASCII symbols used for
4//! code generation. The default excludes visually ambiguous glyphs
5//! (`0 1 O I L`) so non-technical users can read and re-enter codes reliably.
6
7use crate::error::PolicyError;
8
9/// The default unambiguous alphabet: ASCII uppercase letters and digits with
10/// `0`, `1`, `O`, `I`, `L` removed. 31 symbols (RFC-003 §4).
11///
12/// This matches the `zinnias-ciao` generation alphabet exactly, so codes
13/// generated by either side draw from the same symbol set.
14pub const DEFAULT_ALPHABET: &[u8] = b"ABCDEFGHJKMNPQRSTUVWXYZ23456789";
15
16/// A validated set of ASCII code symbols.
17#[derive(Debug, Clone, PartialEq, Eq)]
18pub struct Alphabet {
19    symbols: Vec<u8>,
20}
21
22impl Alphabet {
23    /// Build an alphabet from raw bytes, validating the invariants required for
24    /// unbiased generation (RFC-003 §11.1):
25    ///
26    /// - at least 2 symbols,
27    /// - all symbols ASCII,
28    /// - no duplicate symbols.
29    ///
30    /// # Errors
31    /// Returns [`PolicyError`] if any invariant is violated.
32    pub fn new(symbols: &[u8]) -> Result<Self, PolicyError> {
33        if symbols.len() < 2 {
34            return Err(PolicyError::AlphabetTooSmall);
35        }
36        if !symbols.iter().all(u8::is_ascii) {
37            return Err(PolicyError::AlphabetNotAscii);
38        }
39        // Reject duplicates: a repeated symbol would be over-weighted.
40        for (i, &b) in symbols.iter().enumerate() {
41            if symbols[i + 1..].contains(&b) {
42                return Err(PolicyError::AlphabetNotUnique);
43            }
44        }
45        Ok(Self {
46            symbols: symbols.to_vec(),
47        })
48    }
49
50    /// The unambiguous default alphabet (31 symbols).
51    #[must_use]
52    pub fn unambiguous() -> Self {
53        // DEFAULT_ALPHABET is a known-good constant; construction cannot fail.
54        Self {
55            symbols: DEFAULT_ALPHABET.to_vec(),
56        }
57    }
58
59    /// Number of symbols in the alphabet.
60    #[must_use]
61    pub fn len(&self) -> usize {
62        self.symbols.len()
63    }
64
65    /// Whether the alphabet is empty. Always `false` for a constructed
66    /// alphabet, present to satisfy clippy and API completeness.
67    #[must_use]
68    pub fn is_empty(&self) -> bool {
69        self.symbols.is_empty()
70    }
71
72    /// Borrow the symbols.
73    #[must_use]
74    pub fn symbols(&self) -> &[u8] {
75        &self.symbols
76    }
77
78    /// Whether `byte` is a member of this alphabet.
79    #[must_use]
80    pub fn contains(&self, byte: u8) -> bool {
81        self.symbols.contains(&byte)
82    }
83
84    /// The rejection-sampling ceiling: the largest multiple of the alphabet
85    /// length that fits in a byte. Random bytes `>= ceiling` are discarded to
86    /// avoid modulo bias (RFC-003 §4, §11.5). For the 31-symbol default this is
87    /// `256 - (256 % 31) = 248`.
88    #[must_use]
89    pub fn unbiased_ceiling(&self) -> usize {
90        let n = self.symbols.len();
91        256 - (256 % n)
92    }
93
94    /// Map an accepted random byte (`< unbiased_ceiling`) to a symbol.
95    #[must_use]
96    pub(crate) fn symbol_for_byte(&self, byte: u8) -> u8 {
97        self.symbols[byte as usize % self.symbols.len()]
98    }
99}
100
101#[cfg(test)]
102mod tests {
103    use super::*;
104
105    #[test]
106    fn default_excludes_ambiguous_characters() {
107        let a = Alphabet::unambiguous();
108        for &c in b"01OIL" {
109            assert!(
110                !a.contains(c),
111                "default alphabet contains ambiguous '{}'",
112                c as char
113            );
114        }
115        assert_eq!(a.len(), 31);
116    }
117
118    #[test]
119    fn ceiling_is_248_for_default() {
120        assert_eq!(Alphabet::unambiguous().unbiased_ceiling(), 248);
121    }
122
123    #[test]
124    fn all_accepted_bytes_map_into_alphabet() {
125        let a = Alphabet::unambiguous();
126        for b in 0..a.unbiased_ceiling() {
127            let sym = a.symbol_for_byte(b as u8);
128            assert!(a.contains(sym));
129        }
130    }
131
132    #[test]
133    fn rejects_small_duplicate_and_non_ascii() {
134        assert_eq!(Alphabet::new(b"A"), Err(PolicyError::AlphabetTooSmall));
135        assert_eq!(Alphabet::new(b"AAB"), Err(PolicyError::AlphabetNotUnique));
136        assert_eq!(
137            Alphabet::new(&[b'A', 0x80]),
138            Err(PolicyError::AlphabetNotAscii)
139        );
140        assert!(Alphabet::new(b"AB").is_ok());
141    }
142}