pub const SYMBOLS: &[u8] = b"!\"#$%&'()*+,-./:;<=>?@[\\]^_`{|}~";
pub const AMBIGUOUS: &[u8] = b"l1OI0";
pub const VOWELS: &[u8] = b"aeiouAEIOU";
pub const LOWERCASE: &[u8] = b"abcdefghijklmnopqrstuvwxyz";
pub const UPPERCASE: &[u8] = b"ABCDEFGHIJKLMNOPQRSTUVWXYZ";
pub const DIGITS: &[u8] = b"0123456789";
#[derive(Debug, Clone, Copy)]
pub struct CharSetFlags {
pub capitalize: bool,
pub numerals: bool,
pub symbols: bool,
pub ambiguous_filter: bool,
pub no_vowels: bool,
}
impl Default for CharSetFlags {
fn default() -> Self {
Self {
capitalize: true,
numerals: true,
symbols: false,
ambiguous_filter: false,
no_vowels: false,
}
}
}
pub fn build(flags: CharSetFlags, remove_chars: &[u8]) -> Vec<u8> {
let mut set: Vec<u8> = LOWERCASE.to_vec();
if flags.capitalize {
set.extend_from_slice(UPPERCASE);
}
if flags.numerals {
set.extend_from_slice(DIGITS);
}
if flags.symbols {
set.extend_from_slice(SYMBOLS);
}
if flags.ambiguous_filter {
set = filter_ambiguous(&set);
}
if flags.no_vowels {
set = filter_vowels(&set);
}
if !remove_chars.is_empty() {
set = remove_chars_from(&set, remove_chars);
}
set
}
pub fn filter_ambiguous(set: &[u8]) -> Vec<u8> {
set.iter()
.copied()
.filter(|b| !AMBIGUOUS.contains(b))
.collect()
}
pub fn filter_vowels(set: &[u8]) -> Vec<u8> {
set.iter()
.copied()
.filter(|b| !VOWELS.contains(b))
.collect()
}
pub fn remove_chars_from(set: &[u8], bad: &[u8]) -> Vec<u8> {
set.iter().copied().filter(|b| !bad.contains(b)).collect()
}
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn default_set_is_lowercase_upper_digits() {
let set = build(CharSetFlags::default(), &[]);
assert!(set.iter().all(|b| b.is_ascii_alphanumeric()));
assert!(set.contains(&b'a'));
assert!(set.contains(&b'Z'));
assert!(set.contains(&b'5'));
assert!(set.iter().all(|b| !SYMBOLS.contains(b)));
}
#[test]
fn no_capitalize_drops_uppercase() {
let flags = CharSetFlags {
capitalize: false,
..Default::default()
};
let set = build(flags, &[]);
assert!(set.iter().all(|b| !b.is_ascii_uppercase()));
}
#[test]
fn no_numerals_drops_digits() {
let flags = CharSetFlags {
numerals: false,
..Default::default()
};
let set = build(flags, &[]);
assert!(set.iter().all(|b| !b.is_ascii_digit()));
}
#[test]
fn symbols_includes_symbol_set() {
let flags = CharSetFlags {
symbols: true,
..Default::default()
};
let set = build(flags, &[]);
assert!(set.contains(&b'!'));
assert!(set.contains(&b'@'));
assert!(set.contains(&b'~'));
}
#[test]
fn ambiguous_filter_removes_l1_oi0() {
let flags = CharSetFlags {
ambiguous_filter: true,
..Default::default()
};
let set = build(flags, &[]);
for &b in AMBIGUOUS {
assert!(!set.contains(&b), "ambiguous char {b:#x} should be removed");
}
}
#[test]
fn no_vowels_removes_vowels_both_cases() {
let flags = CharSetFlags {
no_vowels: true,
..Default::default()
};
let set = build(flags, &[]);
for &b in VOWELS {
assert!(!set.contains(&b), "vowel {b:#x} should be removed");
}
}
#[test]
fn remove_chars_drops_requested_bytes() {
let set = build(CharSetFlags::default(), b"abc");
assert!(!set.contains(&b'a'));
assert!(!set.contains(&b'b'));
assert!(!set.contains(&b'c'));
assert!(set.contains(&b'd'));
}
#[test]
fn set_is_ascii_only_invariant() {
for cap in [true, false] {
for num in [true, false] {
for sym in [true, false] {
for amb in [true, false] {
for nov in [true, false] {
let flags = CharSetFlags {
capitalize: cap,
numerals: num,
symbols: sym,
ambiguous_filter: amb,
no_vowels: nov,
};
let set = build(flags, &[]);
assert!(set.iter().all(|b| b.is_ascii()));
}
}
}
}
}
}
}