mod data;
mod lookup;
#[must_use]
pub fn contains_confusable(input: &str) -> bool {
if input.is_ascii() {
return false;
}
input
.chars()
.any(|c| !c.is_ascii() && lookup::lookup(c).is_some())
}
#[must_use]
pub fn replace_confusable(input: &str) -> String {
if input.is_ascii() {
return input.to_string();
}
let mut result = String::with_capacity(input.len());
for c in input.chars() {
if c.is_ascii() {
result.push(c);
} else if let Some(replacement) = lookup::lookup(c) {
result.push_str(replacement);
} else {
result.push(c);
}
}
result
}
#[cfg(test)]
mod tests {
use super::data::{SOURCE_CODEPOINTS, TARGET_STRINGS};
use super::*;
#[test]
fn test_source_codepoints_sorted() {
for window in SOURCE_CODEPOINTS.windows(2) {
assert!(
window[0] < window[1],
"SOURCE_CODEPOINTS not strictly ascending: {:?} >= {:?}",
window[0],
window[1]
);
}
}
#[test]
fn test_parallel_arrays_same_length() {
assert_eq!(SOURCE_CODEPOINTS.len(), TARGET_STRINGS.len());
}
#[test]
fn test_ascii_only_false() {
assert!(!contains_confusable("hello"));
}
#[test]
fn test_cyrillic_detected() {
assert!(contains_confusable("p\u{0430}ypal"));
}
#[test]
fn test_empty_string_false() {
assert!(!contains_confusable(""));
}
#[test]
fn test_cyrillic_to_latin() {
let result = replace_confusable("\u{0430}uthentication");
assert_eq!(result, "authentication");
}
#[test]
fn test_multi_codepoint_target() {
let result = replace_confusable("\u{01C9}");
assert_eq!(result, "lj");
}
#[test]
fn test_non_ascii_non_confusable_unchanged() {
if lookup::lookup('\u{00E9}').is_some() {
let result = replace_confusable("caf\u{00E9}");
assert!(!result.is_empty());
} else {
let result = replace_confusable("caf\u{00E9}");
assert_eq!(result, "caf\u{00E9}");
}
}
#[test]
fn test_mixed_content() {
let result = replace_confusable("h\u{0435}llo");
assert_eq!(result, "hello");
}
#[test]
fn test_no_change_passthrough() {
let result = replace_confusable("pure ascii text");
assert_eq!(result, "pure ascii text");
}
#[test]
fn test_generated_data_entry_count() {
assert_eq!(
SOURCE_CODEPOINTS.len(),
6311,
"Entry count mismatch — regenerate data.rs with: python3 scripts/generate_confusables.py"
);
}
}