use std::borrow::Cow;
use crate::Error;
mod presets;
mod safety;
mod text;
mod transliterate;
pub use presets::*;
pub use safety::*;
pub use text::*;
pub use transliterate::*;
pub trait DisarmStr: AsRef<str> {
#[must_use]
fn normalize_confusables(&self, target: TargetScript) -> Cow<'_, str> {
normalize_confusables(self.as_ref(), target)
}
#[must_use]
fn is_confusable(&self, target: TargetScript) -> bool {
is_confusable(self.as_ref(), target)
}
#[must_use]
fn fold_case(&self) -> Cow<'_, str> {
fold_case(self.as_ref())
}
#[must_use]
fn strip_accents(&self) -> Cow<'_, str> {
strip_accents(self.as_ref())
}
#[must_use]
fn transliterate(&self) -> Cow<'_, str> {
transliterate(self.as_ref())
}
#[must_use]
fn demojize(&self, strip_modifiers: bool) -> String {
demojize(self.as_ref(), strip_modifiers)
}
#[must_use]
fn normalize(&self, form: NormalizationForm) -> String {
normalize(self.as_ref(), form)
}
#[must_use]
fn is_normalized(&self, form: NormalizationForm) -> bool {
is_normalized(self.as_ref(), form)
}
#[must_use]
fn escape_html(&self) -> Cow<'_, str> {
escape_html(self.as_ref())
}
#[must_use]
fn strip_zalgo(&self, max_marks: usize) -> String {
strip_zalgo(self.as_ref(), max_marks)
}
#[must_use]
fn is_zalgo(&self, threshold: usize) -> bool {
is_zalgo(self.as_ref(), threshold)
}
#[must_use]
fn detect_scripts(&self) -> Vec<&'static str> {
detect_scripts(self.as_ref())
}
#[must_use]
fn is_mixed_script(&self) -> bool {
is_mixed_script(self.as_ref())
}
#[must_use]
fn is_suspicious_hostname(&self) -> HostnameAnalysis {
is_suspicious_hostname(self.as_ref())
}
#[must_use]
fn grapheme_len(&self) -> usize {
grapheme_len(self.as_ref())
}
#[must_use]
fn slugify(&self, config: &SlugConfig) -> String {
slugify(self.as_ref(), config)
}
#[must_use]
fn display_clean(&self) -> String {
display_clean(self.as_ref())
}
fn security_clean(&self) -> Result<String, Error> {
security_clean(self.as_ref())
}
fn strip_obfuscation(&self) -> Result<String, Error> {
strip_obfuscation(self.as_ref())
}
fn normalize_user_input(&self) -> Result<String, Error> {
normalize_user_input(self.as_ref())
}
}
impl<T: AsRef<str> + ?Sized> DisarmStr for T {}
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn normalize_folds_cyrillic_to_latin() {
assert_eq!(
normalize_confusables("\u{0430}pple", TargetScript::Latin),
"apple"
);
assert_eq!(normalize_confusables("hello", TargetScript::Latin), "hello");
assert_eq!(normalize_confusables("", TargetScript::Latin), "");
}
#[test]
fn is_confusable_detects_homoglyph() {
assert!(is_confusable("p\u{0430}ypal", TargetScript::Latin)); assert!(!is_confusable("paypal", TargetScript::Latin));
}
#[test]
fn target_script_tokens() {
assert_eq!(TargetScript::Latin.as_str(), "latin");
assert_eq!(TargetScript::Cyrillic.as_str(), "cyrillic");
}
#[test]
fn terminal_width_sums_clusters() {
assert_eq!(terminal_width("hello", false), 5);
assert_eq!(terminal_width("世界", false), 4); assert_eq!(terminal_width("", false), 0);
}
#[test]
fn grapheme_width_single_cluster() {
assert_eq!(grapheme_width("a", false), 1);
assert_eq!(grapheme_width("世", false), 2);
assert_eq!(grapheme_width("👨\u{200D}👩\u{200D}👧\u{200D}👦", false), 2);
}
#[test]
fn ambiguous_wide_policy() {
assert_eq!(terminal_width("\u{00A1}", false), 1);
assert_eq!(terminal_width("\u{00A1}", true), 2);
assert_eq!(grapheme_width("\u{00A1}", true), 2);
}
#[test]
fn sanitize_filename_happy_path() {
let out = sanitize_filename("héllo/wörld.txt", "_", 255, Platform::Universal, None, true)
.unwrap();
assert_eq!(out, "hello_world.txt");
let out = sanitize_filename("a:b", "_", 255, Platform::Posix, None, true).unwrap();
assert_eq!(out, "a:b");
}
#[test]
fn sanitize_filename_bad_lang_is_invalid_argument() {
let err =
sanitize_filename("x", "_", 255, Platform::Universal, Some("zzz"), true).unwrap_err();
assert_eq!(err.kind(), crate::ErrorKind::InvalidArgument);
assert!(std::error::Error::source(&err).is_none());
}
#[test]
fn decode_to_utf8_explicit_and_error() {
let decoded =
decode_to_utf8(&[0x63, 0x61, 0x66, 0xE9], Some("ISO-8859-1"), 0.0, false).unwrap();
assert_eq!(decoded.text, "café");
assert!(!decoded.had_errors);
let err = decode_to_utf8(b"hi", Some("FAKE-999"), 0.0, false).unwrap_err();
assert_eq!(err.kind(), crate::ErrorKind::InvalidArgument);
let det = detect_encoding(b"hello world");
assert!(!det.label.is_empty() && det.confidence > 0.0);
}
#[test]
fn strip_log_injection_and_bad_replacement() {
assert_eq!(
strip_log_injection("a\r\nb\0c", "\u{FFFD}", false).unwrap(),
"a\u{FFFD}\u{FFFD}b\u{FFFD}c"
);
assert!(matches!(
strip_log_injection("plain line", "\u{FFFD}", false).unwrap(),
std::borrow::Cow::Borrowed(_)
));
let err = strip_log_injection("x", "\r", false).unwrap_err();
assert_eq!(err.kind(), crate::ErrorKind::InvalidArgument);
assert_eq!(err.code(), "invalid_log_replacement");
}
#[test]
fn slugify_with_config() {
assert_eq!(
slugify("Héllo Wörld", &SlugConfig::default()),
"hello-world"
);
let bounded = SlugConfig {
max_length: 5,
word_boundary: true,
..SlugConfig::default()
};
assert_eq!(slugify("hello world", &bounded), "hello");
}
#[test]
fn transliterate_surface() {
assert_eq!(transliterate("hello"), "hello");
let out = Transliterate::new()
.on_unknown(OnUnknown::Replace("?".into()))
.run("Москва");
assert!(out.is_ascii() && !out.is_empty(), "got {out:?}");
assert_eq!(strip_accents("café"), "cafe");
assert!(is_ascii("hi") && !is_ascii("café"));
assert!(list_langs().iter().any(|l| l == "ru"));
assert!(Transliterate::new().find_untranslatable("hello").is_empty());
}
#[test]
fn preset_pipelines_surface() {
assert_eq!(security_clean("\u{0440}\u{0430}ypal").unwrap(), "paypal");
assert_eq!(search_key("CAFÉ", None).unwrap(), "cafe");
assert_eq!(sort_key("Москва", None).unwrap(), "moskva");
assert_eq!(catalog_key("Café", None, false).unwrap(), "cafe");
assert_eq!(ml_normalize("Café", None, "cldr").unwrap(), "cafe");
assert_eq!(display_clean("hello world"), "hello world");
assert_eq!(strip_bidi("pass\u{00AD}word"), "password");
assert_eq!(normalize_user_input("café").unwrap(), "café");
assert!(!strip_obfuscation("p\u{0430}ypal").unwrap().is_empty());
assert_eq!(
search_key("x", Some("zzz")).unwrap_err().kind(),
crate::ErrorKind::InvalidArgument
);
assert_eq!(
ml_normalize("x", None, "bogus").unwrap_err().kind(),
crate::ErrorKind::InvalidArgument
);
}
#[test]
fn list_profiles_surface() {
let profiles = list_profiles();
assert!(profiles.iter().any(|p| p == "llm_guardrail"));
assert!(profiles.iter().any(|p| p == "search_index"));
let mut sorted = profiles.clone();
sorted.sort();
assert_eq!(profiles, sorted);
}
#[test]
fn is_suspicious_hostname_surface() {
let a = is_suspicious_hostname("example.com");
assert!(!a.suspicious && !a.mixed_script);
assert_eq!(a.canonical, "example.com");
let a2 = is_suspicious_hostname("p\u{0430}ypal.com");
assert!(a2.suspicious);
assert!(a2.mixed_script || a2.has_confusables);
}
}