codlet_core/code/normalize.rs
1//! Code normalization (RFC-003 FR-2, INV-4).
2//!
3//! Normalization must be **identical** on the issue path and the redeem path,
4//! and **idempotent** (`normalize(normalize(x)) == normalize(x)`), or valid
5//! codes fail to match their stored lookup key.
6//!
7//! ## Compatibility note
8//!
9//! Normalization strips ASCII whitespace and
10//! hyphens and uppercases ASCII letters — and **nothing else**. In particular
11//! it does *not* drop the visually ambiguous characters `0 1 O I L` (those are
12//! merely excluded from the generation *alphabet*, RFC-003 §4). codlet
13//! reproduces that exact behavior so existing service codes keep matching. The
14//! ambiguity handling lives in [`super::alphabet`], not here.
15
16/// Normalize raw code input into its canonical form: strip ASCII whitespace and
17/// `-`, uppercase ASCII letters, leave everything else untouched.
18///
19/// This never panics on arbitrary Unicode input. Non-ASCII characters are
20/// preserved here; validation (RFC-003 FR-2) is responsible for rejecting them.
21#[must_use]
22pub fn normalize(raw: &str) -> String {
23 raw.chars()
24 .filter(|c| !c.is_whitespace() && *c != '-')
25 .map(|c| c.to_ascii_uppercase())
26 .collect()
27}
28
29#[cfg(test)]
30mod tests {
31 use super::*;
32
33 #[test]
34 fn strips_separators_and_uppercases() {
35 assert_eq!(normalize("x7-y9 z2"), "X7Y9Z2");
36 assert_eq!(normalize("X7Y9Z2"), "X7Y9Z2");
37 assert_eq!(normalize(" a b - c "), "ABC");
38 }
39
40 #[test]
41 fn does_not_drop_ambiguous_characters() {
42 // Compatibility guard: normalization must NOT remove 0/1/O/I/L
43 // (contrast with the generation alphabet, which excludes them).
44 assert_eq!(normalize("o1il0"), "O1IL0");
45 }
46
47 #[test]
48 fn idempotent() {
49 for s in ["X7-Y9 Z2", "abc", " ", "Ünïcödé", "a-b-c-1-2-3", ""] {
50 assert_eq!(
51 normalize(&normalize(s)),
52 normalize(s),
53 "not idempotent for {s:?}"
54 );
55 }
56 }
57
58 #[test]
59 fn empty_and_separator_only_become_empty() {
60 assert_eq!(normalize(""), "");
61 assert_eq!(normalize(" -- "), "");
62 }
63
64 #[test]
65 fn no_panic_on_arbitrary_unicode() {
66 // Spot-check a range of scalar values; full coverage is in the
67 // property test in the crate test suite.
68 for cp in [
69 0u32, 0x09, 0x20, 0x2d, 0x41, 0x7f, 0x80, 0xa0, 0x1f600, 0x10ffff,
70 ] {
71 if let Some(ch) = char::from_u32(cp) {
72 let s: String = core::iter::once(ch).collect();
73 let _ = normalize(&s);
74 }
75 }
76 }
77}