use simd_normalizer::CaseFoldMode;
use simd_normalizer::matching::{
MatchingOptions, matches_normalized, normalize_for_matching, normalize_for_matching_utf16,
};
fn default_opts() -> MatchingOptions {
MatchingOptions::default()
}
fn turkish_opts() -> MatchingOptions {
MatchingOptions {
case_fold: CaseFoldMode::Turkish,
}
}
#[test]
fn file_variants_produce_identical_output() {
let opts = default_opts();
let canonical = normalize_for_matching("file", &opts);
assert_eq!(
normalize_for_matching("File", &opts),
canonical,
"'File' should match 'file'"
);
assert_eq!(
normalize_for_matching("FILE", &opts),
canonical,
"'FILE' should match 'file'"
);
let result_fıle = normalize_for_matching("f\u{0131}le", &opts);
assert_eq!(result_fıle, canonical, "'fıle' should match 'file'");
let result_fıle_mixed = normalize_for_matching("F\u{0131}LE", &opts);
assert_eq!(result_fıle_mixed, canonical, "'FıLE' should match 'file'");
}
#[test]
fn case_insensitive_matching() {
let opts = default_opts();
assert!(matches_normalized("Hello", "hello", &opts));
assert!(matches_normalized("WORLD", "world", &opts));
assert!(matches_normalized("CaFé", "café", &opts));
}
#[test]
fn case_insensitive_greek() {
let opts = default_opts();
assert!(matches_normalized(
"\u{0391}\u{0392}\u{0393}\u{0394}",
"\u{03B1}\u{03B2}\u{03B3}\u{03B4}",
&opts,
));
}
#[test]
fn confusable_latin_cyrillic() {
let opts = default_opts();
assert!(matches_normalized("a", "\u{0430}", &opts));
assert!(matches_normalized("e", "\u{0435}", &opts));
assert!(matches_normalized("o", "\u{043E}", &opts));
}
#[test]
fn confusable_word_level() {
let opts = default_opts();
let latin = "apple";
let mixed = "\u{0430}\u{0440}\u{0440}l\u{0435}";
assert!(matches_normalized(latin, mixed, &opts));
}
#[test]
fn nfkc_fullwidth_matching() {
let opts = default_opts();
assert!(matches_normalized("\u{FF21}", "a", &opts));
}
#[test]
fn nfkc_superscript_matching() {
let opts = default_opts();
assert!(matches_normalized("\u{00B2}", "2", &opts));
}
#[test]
fn nfkc_ligature_matching() {
let opts = default_opts();
assert!(matches_normalized("\u{FB01}", "fi", &opts));
}
#[test]
fn turkish_dotless_i_matching() {
let opts = turkish_opts();
assert!(matches_normalized("Istanbul", "\u{0131}stanbul", &opts));
}
#[test]
fn turkish_dotted_capital_i_matching() {
let opts = turkish_opts();
assert!(matches_normalized("\u{0130}stanbul", "istanbul", &opts));
}
#[test]
fn different_words_dont_match() {
let opts = default_opts();
assert!(!matches_normalized("hello", "world", &opts));
assert!(!matches_normalized("cat", "dog", &opts));
assert!(!matches_normalized("file", "pile", &opts));
}
#[test]
fn matching_is_idempotent() {
let opts = default_opts();
let inputs = [
"hello",
"File",
"CAFÉ",
"\u{0430}\u{0440}\u{0440}l\u{0435}",
"\u{00C0}\u{00C9}\u{00D6}",
"\u{1F600}",
"\u{FF21}\u{FF22}\u{FF23}",
];
for input in &inputs {
let once = normalize_for_matching(input, &opts);
let twice = normalize_for_matching(&once, &opts);
assert_eq!(once, twice, "not idempotent for {:?}", input);
}
}
#[test]
fn utf16_roundtrip() {
let opts = default_opts();
let inputs = ["hello", "File", "CAFÉ", "\u{1F600}"];
for input in &inputs {
let utf16 = normalize_for_matching_utf16(input, &opts);
let decoded = String::from_utf16(&utf16).expect("valid UTF-16");
assert_eq!(decoded, normalize_for_matching(input, &opts));
}
}
#[test]
fn utf16_supplementary_surrogates() {
let opts = default_opts();
let utf16 = normalize_for_matching_utf16("\u{1F600}", &opts);
assert!(
utf16.len() >= 2,
"supplementary char should produce surrogate pair"
);
let decoded = String::from_utf16(&utf16).expect("valid UTF-16");
assert_eq!(decoded, normalize_for_matching("\u{1F600}", &opts));
}
#[test]
fn empty_string() {
assert_eq!(normalize_for_matching("", &default_opts()), "");
assert!(matches_normalized("", "", &default_opts()));
}
#[test]
fn single_char() {
let opts = default_opts();
let _ = normalize_for_matching("a", &opts);
let _ = normalize_for_matching("A", &opts);
let _ = normalize_for_matching("\u{0430}", &opts);
}
#[test]
fn long_input_no_panic() {
let opts = default_opts();
let input = "The quick brown fox ".repeat(1000);
let result = normalize_for_matching(&input, &opts);
assert!(!result.is_empty());
}
#[test]
fn combining_marks_only() {
let opts = default_opts();
let _ = normalize_for_matching("\u{0300}\u{0301}\u{0302}", &opts);
}