mod data;
use data::normalization_tests::NORMALIZATION_TESTS;
use simd_normalizer::UnicodeNormalization;
#[test]
fn test_nfd_vectors() {
let cases: &[(&str, &str)] = &[
("abc", "abc"),
("\u{1e0b}\u{1c4}", "d\u{307}\u{1c4}"),
("\u{2026}", "\u{2026}"),
("\u{2126}", "\u{3a9}"),
("\u{1e0b}\u{323}", "d\u{323}\u{307}"),
("\u{1e0d}\u{307}", "d\u{323}\u{307}"),
("a\u{301}", "a\u{301}"),
("\u{301}a", "\u{301}a"),
("\u{d4db}", "\u{1111}\u{1171}\u{11b6}"),
("\u{ac1c}", "\u{1100}\u{1162}"),
];
for (input, expected) in cases {
let result = input.nfd();
assert_eq!(
&*result, *expected,
"NFD({:?}): got {:?}, expected {:?}",
input, &*result, expected
);
}
}
#[test]
fn test_nfkd_vectors() {
let cases: &[(&str, &str)] = &[
("abc", "abc"),
("\u{1e0b}\u{1c4}", "d\u{307}DZ\u{30c}"),
("\u{2026}", "..."),
("\u{2126}", "\u{3a9}"),
("\u{1e0b}\u{323}", "d\u{323}\u{307}"),
("\u{1e0d}\u{307}", "d\u{323}\u{307}"),
("a\u{301}", "a\u{301}"),
("\u{301}a", "\u{301}a"),
("\u{d4db}", "\u{1111}\u{1171}\u{11b6}"),
("\u{ac1c}", "\u{1100}\u{1162}"),
];
for (input, expected) in cases {
let result = input.nfkd();
assert_eq!(
&*result, *expected,
"NFKD({:?}): got {:?}, expected {:?}",
input, &*result, expected
);
}
}
#[test]
fn test_nfc_vectors() {
let cases: &[(&str, &str)] = &[
("abc", "abc"),
("\u{1e0b}\u{1c4}", "\u{1e0b}\u{1c4}"),
("\u{2026}", "\u{2026}"),
("\u{2126}", "\u{3a9}"),
("\u{1e0b}\u{323}", "\u{1e0d}\u{307}"),
("\u{1e0d}\u{307}", "\u{1e0d}\u{307}"),
("a\u{301}", "\u{e1}"),
("\u{301}a", "\u{301}a"),
("\u{d4db}", "\u{d4db}"),
("\u{ac1c}", "\u{ac1c}"),
(
"a\u{300}\u{305}\u{315}\u{5ae}b",
"\u{e0}\u{5ae}\u{305}\u{315}b",
),
];
for (input, expected) in cases {
let result = input.nfc();
assert_eq!(
&*result, *expected,
"NFC({:?}): got {:?}, expected {:?}",
input, &*result, expected
);
}
}
#[test]
fn test_nfkc_vectors() {
let cases: &[(&str, &str)] = &[
("abc", "abc"),
("\u{1e0b}\u{1c4}", "\u{1e0b}D\u{17d}"),
("\u{2026}", "..."),
("\u{2126}", "\u{3a9}"),
("\u{1e0b}\u{323}", "\u{1e0d}\u{307}"),
("\u{1e0d}\u{307}", "\u{1e0d}\u{307}"),
("a\u{301}", "\u{e1}"),
("\u{301}a", "\u{301}a"),
("\u{d4db}", "\u{d4db}"),
("\u{ac1c}", "\u{ac1c}"),
(
"a\u{300}\u{305}\u{315}\u{5ae}b",
"\u{e0}\u{5ae}\u{305}\u{315}b",
),
];
for (input, expected) in cases {
let result = input.nfkc();
assert_eq!(
&*result, *expected,
"NFKC({:?}): got {:?}, expected {:?}",
input, &*result, expected
);
}
}
#[test]
fn test_cjk_compat_decomposition() {
let s = "\u{2f999}\u{2f8a6}";
let expected = "\u{831d}\u{6148}";
assert_eq!(&*s.nfd(), expected, "NFD of CJK compat ideographs");
assert_eq!(&*s.nfkd(), expected, "NFKD of CJK compat ideographs");
assert_eq!(&*s.nfc(), expected, "NFC of CJK compat ideographs");
assert_eq!(&*s.nfkc(), expected, "NFKC of CJK compat ideographs");
}
#[test]
fn test_quick_check_cross_form_consistency() {
let mut failures = Vec::new();
for (i, t) in NORMALIZATION_TESTS.iter().enumerate() {
if !t.nfc.is_nfc() {
failures.push(format!(
"case {i}: is_nfc(nfc) should be true, nfc={:?}",
t.nfc
));
}
if !t.nfd.is_nfd() {
failures.push(format!(
"case {i}: is_nfd(nfd) should be true, nfd={:?}",
t.nfd
));
}
if !t.nfkc.is_nfkc() {
failures.push(format!(
"case {i}: is_nfkc(nfkc) should be true, nfkc={:?}",
t.nfkc
));
}
if !t.nfkd.is_nfkd() {
failures.push(format!(
"case {i}: is_nfkd(nfkd) should be true, nfkd={:?}",
t.nfkd
));
}
if t.nfc != t.nfd {
if t.nfd.is_nfc() {
failures.push(format!(
"case {i}: nfc!=nfd but is_nfc(nfd) is true; nfc={:?} nfd={:?}",
t.nfc, t.nfd
));
}
if t.nfc.is_nfd() {
failures.push(format!(
"case {i}: nfc!=nfd but is_nfd(nfc) is true; nfc={:?} nfd={:?}",
t.nfc, t.nfd
));
}
}
if t.nfkc != t.nfc {
if t.nfc.is_nfkc() {
failures.push(format!(
"case {i}: nfkc!=nfc but is_nfkc(nfc) is true; nfkc={:?} nfc={:?}",
t.nfkc, t.nfc
));
}
if !t.nfkc.is_nfc() {
failures.push(format!(
"case {i}: nfkc!=nfc but is_nfc(nfkc) is false; nfkc={:?} nfc={:?}",
t.nfkc, t.nfc
));
}
}
if t.nfkd != t.nfd {
if t.nfd.is_nfkd() {
failures.push(format!(
"case {i}: nfkd!=nfd but is_nfkd(nfd) is true; nfkd={:?} nfd={:?}",
t.nfkd, t.nfd
));
}
if !t.nfkd.is_nfd() {
failures.push(format!(
"case {i}: nfkd!=nfd but is_nfd(nfkd) is false; nfkd={:?} nfd={:?}",
t.nfkd, t.nfd
));
}
}
if failures.len() > 100 {
failures.push("... (truncated after 100 failures)".to_string());
break;
}
}
assert!(
failures.is_empty(),
"Quick-check cross-form consistency failures ({} failures):\n{}",
failures.len(),
failures.join("\n")
);
}
mod un_reference {
use unicode_normalization::UnicodeNormalization as UNUnicodeNormalization;
pub fn nfc(s: &str) -> String {
s.nfc().collect()
}
pub fn nfd(s: &str) -> String {
s.nfd().collect()
}
pub fn nfkc(s: &str) -> String {
s.nfkc().collect()
}
pub fn nfkd(s: &str) -> String {
s.nfkd().collect()
}
}
#[test]
fn test_differential_vs_unicode_normalization() {
let mut failures = Vec::new();
for (i, t) in NORMALIZATION_TESTS.iter().enumerate() {
let columns: &[(&str, &str)] = &[
("source", t.source),
("nfc", t.nfc),
("nfd", t.nfd),
("nfkc", t.nfkc),
("nfkd", t.nfkd),
];
for &(col_name, input) in columns {
let ours = input.nfc();
let theirs = un_reference::nfc(input);
if *ours != theirs {
failures.push(format!(
"case {i} NFC({col_name}): simd={:?} un={:?} (input={:?})",
&*ours, theirs, input
));
}
let ours = input.nfd();
let theirs = un_reference::nfd(input);
if *ours != theirs {
failures.push(format!(
"case {i} NFD({col_name}): simd={:?} un={:?} (input={:?})",
&*ours, theirs, input
));
}
let ours = input.nfkc();
let theirs = un_reference::nfkc(input);
if *ours != theirs {
failures.push(format!(
"case {i} NFKC({col_name}): simd={:?} un={:?} (input={:?})",
&*ours, theirs, input
));
}
let ours = input.nfkd();
let theirs = un_reference::nfkd(input);
if *ours != theirs {
failures.push(format!(
"case {i} NFKD({col_name}): simd={:?} un={:?} (input={:?})",
&*ours, theirs, input
));
}
}
if failures.len() > 100 {
failures.push("... (truncated after 100 failures)".to_string());
break;
}
}
assert!(
failures.is_empty(),
"Differential failures against unicode-normalization ({} failures):\n{}",
failures.len(),
failures.join("\n")
);
}
#[test]
fn test_specific_vectors_differential() {
let inputs: &[&str] = &[
"abc",
"\u{1e0b}\u{1c4}",
"\u{2026}",
"\u{2126}",
"\u{1e0b}\u{323}",
"\u{1e0d}\u{307}",
"a\u{301}",
"\u{301}a",
"\u{d4db}",
"\u{ac1c}",
"a\u{300}\u{305}\u{315}\u{5ae}b",
"\u{2f999}\u{2f8a6}", ];
for input in inputs {
let ours = input.nfc();
let theirs = un_reference::nfc(input);
assert_eq!(
&*ours, &*theirs,
"NFC({:?}): simd={:?} un={:?}",
input, &*ours, theirs
);
let ours = input.nfd();
let theirs = un_reference::nfd(input);
assert_eq!(
&*ours, &*theirs,
"NFD({:?}): simd={:?} un={:?}",
input, &*ours, theirs
);
let ours = input.nfkc();
let theirs = un_reference::nfkc(input);
assert_eq!(
&*ours, &*theirs,
"NFKC({:?}): simd={:?} un={:?}",
input, &*ours, theirs
);
let ours = input.nfkd();
let theirs = un_reference::nfkd(input);
assert_eq!(
&*ours, &*theirs,
"NFKD({:?}): simd={:?} un={:?}",
input, &*ours, theirs
);
}
}