Skip to main content

adler_core/
permute.rs

1//! Username permutation: expand one handle into plausible variants.
2//!
3//! People reuse the same identity with small spelling shifts —
4//! `john_doe` / `johndoe` / `john.doe`, or leet like `j0hn`. Scanning a few
5//! variants raises recall at the cost of more requests, so it's opt-in via
6//! `--permute`.
7//!
8//! Levels:
9//! - [`PermuteLevel::None`]: just the original.
10//! - [`PermuteLevel::Basic`]: separator swaps (`_` `-` `.` and removal).
11//! - [`PermuteLevel::Aggressive`]: basic, plus single-class leet
12//!   substitutions and a couple of digit suffixes.
13//!
14//! Output is deduplicated, always leads with the original, contains only
15//! strings that pass [`Username`] validation, and is capped at
16//! [`MAX_VARIANTS`] to bound the request blow-up.
17
18use std::collections::HashSet;
19
20use crate::username::Username;
21
22/// Hard cap on the number of variants returned (including the original).
23pub const MAX_VARIANTS: usize = 64;
24
25const SEPARATORS: [char; 3] = ['_', '-', '.'];
26/// Single-character leet substitutions, applied one class at a time.
27const LEET: [(char, char); 5] = [('o', '0'), ('i', '1'), ('e', '3'), ('a', '4'), ('s', '5')];
28const SUFFIXES: [&str; 2] = ["1", "123"];
29
30/// How aggressively to expand a username into variants.
31#[derive(Debug, Clone, Copy, PartialEq, Eq)]
32pub enum PermuteLevel {
33    /// No expansion — just the original username.
34    None,
35    /// Separator swaps and removal.
36    Basic,
37    /// Basic, plus leet substitutions and digit suffixes.
38    Aggressive,
39}
40
41/// Expand `username` into a deduplicated list of variants per `level`.
42///
43/// The original is always first. Variants that fail [`Username`] validation
44/// are dropped. The result never exceeds [`MAX_VARIANTS`].
45#[must_use]
46pub fn permute(username: &Username, level: PermuteLevel) -> Vec<Username> {
47    let base = username.as_str().to_owned();
48    let mut out: Vec<Username> = Vec::new();
49    let mut seen: HashSet<String> = HashSet::new();
50    add(&mut out, &mut seen, base.clone());
51
52    if level == PermuteLevel::None {
53        return out;
54    }
55
56    // Basic: separator swaps + removal (only meaningful if a separator exists).
57    if base.contains(SEPARATORS) {
58        let stripped: String = base.chars().filter(|c| !SEPARATORS.contains(c)).collect();
59        try_add(&mut out, &mut seen, stripped);
60        for &sep in &SEPARATORS {
61            let swapped: String = base
62                .chars()
63                .map(|c| if SEPARATORS.contains(&c) { sep } else { c })
64                .collect();
65            try_add(&mut out, &mut seen, swapped);
66        }
67    }
68
69    if level == PermuteLevel::Basic {
70        out.truncate(MAX_VARIANTS);
71        return out;
72    }
73
74    // Aggressive: leet over every variant produced so far, one class at a time.
75    let snapshot: Vec<String> = out.iter().map(|u| u.as_str().to_owned()).collect();
76    for variant in &snapshot {
77        for &(from, to) in &LEET {
78            if variant.contains(from) {
79                let leeted: String = variant
80                    .chars()
81                    .map(|c| if c == from { to } else { c })
82                    .collect();
83                try_add(&mut out, &mut seen, leeted);
84            }
85        }
86    }
87    // Digit suffixes on the original handle.
88    for suffix in SUFFIXES {
89        try_add(&mut out, &mut seen, format!("{base}{suffix}"));
90    }
91
92    out.truncate(MAX_VARIANTS);
93    out
94}
95
96/// Insert a candidate that is already known to be a valid username.
97fn add(out: &mut Vec<Username>, seen: &mut HashSet<String>, candidate: String) {
98    if seen.insert(candidate.clone()) {
99        if let Ok(u) = Username::new(candidate) {
100            out.push(u);
101        }
102    }
103}
104
105/// Insert a candidate string, validating it as a username first.
106fn try_add(out: &mut Vec<Username>, seen: &mut HashSet<String>, candidate: String) {
107    if out.len() >= MAX_VARIANTS {
108        return;
109    }
110    add(out, seen, candidate);
111}
112
113#[cfg(test)]
114mod tests {
115    use super::*;
116
117    fn names(v: &[Username]) -> Vec<&str> {
118        v.iter().map(Username::as_str).collect()
119    }
120
121    fn user(s: &str) -> Username {
122        Username::new(s).unwrap()
123    }
124
125    #[test]
126    fn none_returns_only_original() {
127        let v = permute(&user("john_doe"), PermuteLevel::None);
128        assert_eq!(names(&v), ["john_doe"]);
129    }
130
131    #[test]
132    fn original_is_always_first() {
133        for level in [PermuteLevel::Basic, PermuteLevel::Aggressive] {
134            let v = permute(&user("john_doe"), level);
135            assert_eq!(v[0].as_str(), "john_doe");
136        }
137    }
138
139    #[test]
140    fn basic_swaps_separators() {
141        let v = permute(&user("john_doe"), PermuteLevel::Basic);
142        let n = names(&v);
143        for expected in ["john_doe", "johndoe", "john.doe", "john-doe"] {
144            assert!(n.contains(&expected), "missing {expected:?} in {n:?}");
145        }
146    }
147
148    #[test]
149    fn basic_without_separator_is_just_original() {
150        let v = permute(&user("johndoe"), PermuteLevel::Basic);
151        assert_eq!(names(&v), ["johndoe"]);
152    }
153
154    #[test]
155    fn aggressive_adds_leet_and_suffixes() {
156        let v = permute(&user("bob"), PermuteLevel::Aggressive);
157        let n = names(&v);
158        assert!(n.contains(&"bob"));
159        assert!(n.contains(&"b0b"), "leet o→0 missing in {n:?}");
160        assert!(n.contains(&"bob1"));
161        assert!(n.contains(&"bob123"));
162    }
163
164    #[test]
165    fn results_are_deduplicated() {
166        let v = permute(&user("aaa"), PermuteLevel::Aggressive);
167        let n = names(&v);
168        let mut sorted = n.clone();
169        sorted.sort_unstable();
170        sorted.dedup();
171        assert_eq!(sorted.len(), n.len(), "duplicates in {n:?}");
172    }
173
174    #[test]
175    fn all_variants_are_valid_usernames() {
176        // Every emitted variant must round-trip through Username::new.
177        let v = permute(&user("john.doe_x"), PermuteLevel::Aggressive);
178        for u in &v {
179            assert!(Username::new(u.as_str()).is_ok());
180        }
181    }
182
183    #[test]
184    fn never_exceeds_cap() {
185        let v = permute(&user("a.b.c-d_e.o.i.e.a.s"), PermuteLevel::Aggressive);
186        assert!(v.len() <= MAX_VARIANTS, "got {}", v.len());
187    }
188
189    proptest::proptest! {
190        /// Invariants that must hold for any valid username and any level.
191        #[test]
192        fn permute_invariants(
193            s in "[A-Za-z0-9._-]{1,64}",
194            level_idx in 0usize..3,
195        ) {
196            let level = [
197                PermuteLevel::None,
198                PermuteLevel::Basic,
199                PermuteLevel::Aggressive,
200            ][level_idx];
201            let variants = permute(&user(&s), level);
202
203            proptest::prop_assert!(!variants.is_empty());
204            // The original always leads.
205            proptest::prop_assert_eq!(variants[0].as_str(), s.as_str());
206            // Bounded request blow-up.
207            proptest::prop_assert!(variants.len() <= MAX_VARIANTS);
208            // Every variant is itself a valid username.
209            for v in &variants {
210                proptest::prop_assert!(Username::new(v.as_str()).is_ok());
211            }
212            // No duplicates.
213            let unique: std::collections::HashSet<&str> =
214                variants.iter().map(Username::as_str).collect();
215            proptest::prop_assert_eq!(unique.len(), variants.len());
216            // None means exactly the original.
217            if level == PermuteLevel::None {
218                proptest::prop_assert_eq!(variants.len(), 1);
219            }
220        }
221    }
222}