use icu_normalizer::{ComposingNormalizerBorrowed, DecomposingNormalizerBorrowed};
use proptest::prelude::*;
use simd_normalizer::IsNormalized;
use simd_normalizer::UnicodeNormalization;
use std::borrow::Cow;
fn unicode_string_strategy() -> impl Strategy<Value = String> {
let ranges = prop::char::ranges(std::borrow::Cow::Borrowed(&[
'\u{0020}'..='\u{007E}',
'\u{0100}'..='\u{024F}',
'\u{0300}'..='\u{036F}',
'\u{0400}'..='\u{04FF}',
'\u{0600}'..='\u{06FF}',
'\u{0900}'..='\u{097F}',
'\u{1100}'..='\u{11FF}',
'\u{3040}'..='\u{309F}',
'\u{4E00}'..='\u{4FFF}',
'\u{AC00}'..='\u{D7A3}',
'\u{1F600}'..='\u{1F64F}',
]));
prop::collection::vec(ranges, 1..64).prop_map(|chars| chars.into_iter().collect::<String>())
}
fn compat_heavy_strategy() -> impl Strategy<Value = String> {
let ranges = prop::char::ranges(std::borrow::Cow::Borrowed(&[
'\u{0041}'..='\u{005A}',
'\u{0061}'..='\u{007A}',
'\u{00C0}'..='\u{00FF}',
'\u{0300}'..='\u{036F}',
'\u{2070}'..='\u{209F}',
'\u{2100}'..='\u{214F}',
'\u{2150}'..='\u{218F}',
'\u{2460}'..='\u{24FF}',
'\u{3300}'..='\u{33FF}',
'\u{FB00}'..='\u{FB06}',
'\u{FF01}'..='\u{FF5E}',
'\u{AC00}'..='\u{AC10}',
]));
prop::collection::vec(ranges, 1..32).prop_map(|chars| chars.into_iter().collect::<String>())
}
#[allow(dead_code)]
fn supplementary_heavy_strategy() -> impl Strategy<Value = String> {
let ranges = prop::char::ranges(std::borrow::Cow::Borrowed(&[
'\u{1D100}'..='\u{1D1FF}',
'\u{1D400}'..='\u{1D7FF}',
'\u{1F600}'..='\u{1F64F}',
'\u{20000}'..='\u{200FF}',
'\u{E0001}'..='\u{E007F}',
'\u{0041}'..='\u{005A}',
'\u{0300}'..='\u{036F}',
]));
prop::collection::vec(ranges, 1..32).prop_map(|chars| chars.into_iter().collect::<String>())
}
proptest! {
#![proptest_config(ProptestConfig::with_cases(500))]
#[test]
fn nfkc_implies_nfc(s in unicode_string_strategy()) {
if s.is_nfkc() {
prop_assert!(
s.is_nfc(),
"is_nfkc(s) is true but is_nfc(s) is false for {:?}",
s
);
}
}
#[test]
fn nfc_of_nfkc_is_nfkc(s in unicode_string_strategy()) {
let nfkc_s = s.nfkc();
let nfc_of_nfkc = nfkc_s.nfc();
prop_assert_eq!(
&*nfc_of_nfkc, &*nfkc_s,
"nfc(nfkc(s)) != nfkc(s)"
);
}
}
proptest! {
#![proptest_config(ProptestConfig::with_cases(500))]
#[test]
fn nfkc_implies_nfc_compat_heavy(s in compat_heavy_strategy()) {
if s.is_nfkc() {
prop_assert!(
s.is_nfc(),
"is_nfkc(s) is true but is_nfc(s) is false for {:?}",
s
);
}
}
#[test]
fn nfc_of_nfkc_is_nfkc_compat_heavy(s in compat_heavy_strategy()) {
let nfkc_s = s.nfkc();
let nfc_of_nfkc = nfkc_s.nfc();
prop_assert_eq!(
&*nfc_of_nfkc, &*nfkc_s,
"nfc(nfkc(s)) != nfkc(s) for compat-heavy input"
);
}
}
proptest! {
#![proptest_config(ProptestConfig::with_cases(500))]
#[test]
fn nfkd_implies_nfd(s in unicode_string_strategy()) {
if s.is_nfkd() {
prop_assert!(
s.is_nfd(),
"is_nfkd(s) is true but is_nfd(s) is false for {:?}",
s
);
}
}
#[test]
fn nfd_of_nfkd_is_nfkd(s in unicode_string_strategy()) {
let nfkd_s = s.nfkd();
let nfd_of_nfkd = nfkd_s.nfd();
prop_assert_eq!(
&*nfd_of_nfkd, &*nfkd_s,
"nfd(nfkd(s)) != nfkd(s)"
);
}
}
proptest! {
#![proptest_config(ProptestConfig::with_cases(500))]
#[test]
fn nfkd_implies_nfd_compat_heavy(s in compat_heavy_strategy()) {
if s.is_nfkd() {
prop_assert!(
s.is_nfd(),
"is_nfkd(s) is true but is_nfd(s) is false for {:?}",
s
);
}
}
#[test]
fn nfd_of_nfkd_is_nfkd_compat_heavy(s in compat_heavy_strategy()) {
let nfkd_s = s.nfkd();
let nfd_of_nfkd = nfkd_s.nfd();
prop_assert_eq!(
&*nfd_of_nfkd, &*nfkd_s,
"nfd(nfkd(s)) != nfkd(s) for compat-heavy input"
);
}
}
proptest! {
#![proptest_config(ProptestConfig::with_cases(1000))]
#[test]
fn nfkc_equals_nfc_of_nfkd(s in unicode_string_strategy()) {
let nfkc_s = s.nfkc();
let nfkd_s = s.nfkd();
let nfc_of_nfkd = nfkd_s.nfc();
prop_assert_eq!(
&*nfkc_s, &*nfc_of_nfkd,
"NFKC(s) != NFC(NFKD(s))"
);
}
}
proptest! {
#![proptest_config(ProptestConfig::with_cases(500))]
#[test]
fn nfkc_equals_nfc_of_nfkd_compat_heavy(s in compat_heavy_strategy()) {
let nfkc_s = s.nfkc();
let nfkd_s = s.nfkd();
let nfc_of_nfkd = nfkd_s.nfc();
prop_assert_eq!(
&*nfkc_s, &*nfc_of_nfkd,
"NFKC(s) != NFC(NFKD(s)) for compat-heavy input"
);
}
}
proptest! {
#![proptest_config(ProptestConfig::with_cases(1000))]
#[test]
fn nfkd_equals_nfd_of_nfkc(s in unicode_string_strategy()) {
let nfkd_s = s.nfkd();
let nfkc_s = s.nfkc();
let nfd_of_nfkc = nfkc_s.nfd();
prop_assert_eq!(
&*nfkd_s, &*nfd_of_nfkc,
"NFKD(s) != NFD(NFKC(s))"
);
}
}
proptest! {
#![proptest_config(ProptestConfig::with_cases(500))]
#[test]
fn nfkd_equals_nfd_of_nfkc_compat_heavy(s in compat_heavy_strategy()) {
let nfkd_s = s.nfkd();
let nfkc_s = s.nfkc();
let nfd_of_nfkc = nfkc_s.nfd();
prop_assert_eq!(
&*nfkd_s, &*nfd_of_nfkc,
"NFKD(s) != NFD(NFKC(s)) for compat-heavy input"
);
}
}
#[test]
fn nfkd_differs_from_nfd_but_equals_nfd_of_nfkc() {
let inputs = [
"\u{FB01}", "\u{00A0}", "\u{FF21}", "\u{2126}", "\u{2460}", "\u{3300}", "\u{FB20}", "\u{2075}", "\u{00BC}", "\u{FB49}", ];
for input in &inputs {
let nfkd = simd_normalizer::nfkd().normalize(input);
let nfd = simd_normalizer::nfd().normalize(input);
let nfkc = simd_normalizer::nfkc().normalize(input);
let nfd_of_nfkc = simd_normalizer::nfd().normalize(&nfkc);
assert_eq!(
&*nfkd, &*nfd_of_nfkc,
"NFKD({:?}) != NFD(NFKC({:?})): nfkd={:?}, nfd_of_nfkc={:?}",
input, input, nfkd, nfd_of_nfkc
);
let has_compat_mapping = [
"\u{FB01}", "\u{00A0}", "\u{FF21}", "\u{2460}", "\u{3300}", "\u{FB20}", "\u{2075}",
"\u{00BC}",
];
if has_compat_mapping.contains(input) {
assert_ne!(
&*nfkd, &*nfd,
"Expected NFKD({:?}) != NFD({:?}) for compatibility char",
input, input
);
}
}
}
proptest! {
#![proptest_config(ProptestConfig::with_cases(500))]
#[test]
fn nfc_stable_after_nfd(s in unicode_string_strategy()) {
let nfc_s = s.nfc();
let nfd_s = s.nfd();
let nfc_of_nfd = nfd_s.nfc();
prop_assert_eq!(
&*nfc_s, &*nfc_of_nfd,
"NFC(s) != NFC(NFD(s))"
);
}
#[test]
fn nfkc_stable_after_nfkd(s in unicode_string_strategy()) {
let nfkc_s = s.nfkc();
let nfkd_s = s.nfkd();
let nfkc_of_nfkd = nfkd_s.nfkc();
prop_assert_eq!(
&*nfkc_s, &*nfkc_of_nfkd,
"NFKC(s) != NFKC(NFKD(s))"
);
}
}
proptest! {
#![proptest_config(ProptestConfig::with_cases(500))]
#[test]
fn nfc_stable_after_nfd_compat_heavy(s in compat_heavy_strategy()) {
let nfc_s = s.nfc();
let nfd_s = s.nfd();
let nfc_of_nfd = nfd_s.nfc();
prop_assert_eq!(
&*nfc_s, &*nfc_of_nfd,
"NFC(s) != NFC(NFD(s)) for compat-heavy input"
);
}
#[test]
fn nfkc_stable_after_nfkd_compat_heavy(s in compat_heavy_strategy()) {
let nfkc_s = s.nfkc();
let nfkd_s = s.nfkd();
let nfkc_of_nfkd = nfkd_s.nfkc();
prop_assert_eq!(
&*nfkc_s, &*nfkc_of_nfkd,
"NFKC(s) != NFKC(NFKD(s)) for compat-heavy input"
);
}
}
proptest! {
#![proptest_config(ProptestConfig::with_cases(500))]
#[test]
fn quick_check_normalize_agreement_nfc(s in unicode_string_strategy()) {
let norm = simd_normalizer::nfc();
let qc = norm.quick_check(&s);
let result = norm.normalize(&s);
match qc {
IsNormalized::Yes => {
prop_assert!(
matches!(&result, Cow::Borrowed(_)),
"NFC quick_check=Yes but normalize returned Owned for {:?}",
s
);
}
IsNormalized::No => {
prop_assert!(
matches!(&result, Cow::Owned(_)),
"NFC quick_check=No but normalize returned Borrowed for {:?}",
s
);
prop_assert_ne!(
&*result, s.as_str(),
"NFC quick_check=No but normalize output equals input for {:?}",
s
);
}
IsNormalized::Maybe => {
}
}
}
#[test]
fn quick_check_normalize_agreement_nfd(s in unicode_string_strategy()) {
let norm = simd_normalizer::nfd();
let qc = norm.quick_check(&s);
let result = norm.normalize(&s);
match qc {
IsNormalized::Yes => {
prop_assert!(
matches!(&result, Cow::Borrowed(_)),
"NFD quick_check=Yes but normalize returned Owned for {:?}",
s
);
}
IsNormalized::No => {
prop_assert!(
matches!(&result, Cow::Owned(_)),
"NFD quick_check=No but normalize returned Borrowed for {:?}",
s
);
prop_assert_ne!(
&*result, s.as_str(),
"NFD quick_check=No but normalize output equals input for {:?}",
s
);
}
IsNormalized::Maybe => {}
}
}
#[test]
fn quick_check_normalize_agreement_nfkc(s in unicode_string_strategy()) {
let norm = simd_normalizer::nfkc();
let qc = norm.quick_check(&s);
let result = norm.normalize(&s);
match qc {
IsNormalized::Yes => {
prop_assert!(
matches!(&result, Cow::Borrowed(_)),
"NFKC quick_check=Yes but normalize returned Owned for {:?}",
s
);
}
IsNormalized::No => {
prop_assert!(
matches!(&result, Cow::Owned(_)),
"NFKC quick_check=No but normalize returned Borrowed for {:?}",
s
);
prop_assert_ne!(
&*result, s.as_str(),
"NFKC quick_check=No but normalize output equals input for {:?}",
s
);
}
IsNormalized::Maybe => {}
}
}
#[test]
fn quick_check_normalize_agreement_nfkd(s in unicode_string_strategy()) {
let norm = simd_normalizer::nfkd();
let qc = norm.quick_check(&s);
let result = norm.normalize(&s);
match qc {
IsNormalized::Yes => {
prop_assert!(
matches!(&result, Cow::Borrowed(_)),
"NFKD quick_check=Yes but normalize returned Owned for {:?}",
s
);
}
IsNormalized::No => {
prop_assert!(
matches!(&result, Cow::Owned(_)),
"NFKD quick_check=No but normalize returned Borrowed for {:?}",
s
);
prop_assert_ne!(
&*result, s.as_str(),
"NFKD quick_check=No but normalize output equals input for {:?}",
s
);
}
IsNormalized::Maybe => {}
}
}
}
proptest! {
#![proptest_config(ProptestConfig::with_cases(500))]
#[test]
fn normalize_to_matches_normalize_nfc(s in unicode_string_strategy()) {
let norm = simd_normalizer::nfc();
let expected = norm.normalize(&s);
let mut buf = String::new();
let was_normalized = norm.normalize_to(&s, &mut buf);
prop_assert_eq!(
&buf, &*expected,
"NFC normalize_to output mismatch"
);
prop_assert_eq!(
was_normalized,
matches!(&expected, Cow::Borrowed(_)),
"NFC normalize_to return value mismatch"
);
}
#[test]
fn normalize_to_matches_normalize_nfd(s in unicode_string_strategy()) {
let norm = simd_normalizer::nfd();
let expected = norm.normalize(&s);
let mut buf = String::new();
let was_normalized = norm.normalize_to(&s, &mut buf);
prop_assert_eq!(
&buf, &*expected,
"NFD normalize_to output mismatch"
);
prop_assert_eq!(
was_normalized,
matches!(&expected, Cow::Borrowed(_)),
"NFD normalize_to return value mismatch"
);
}
#[test]
fn normalize_to_matches_normalize_nfkc(s in unicode_string_strategy()) {
let norm = simd_normalizer::nfkc();
let expected = norm.normalize(&s);
let mut buf = String::new();
let was_normalized = norm.normalize_to(&s, &mut buf);
prop_assert_eq!(
&buf, &*expected,
"NFKC normalize_to output mismatch"
);
prop_assert_eq!(
was_normalized,
matches!(&expected, Cow::Borrowed(_)),
"NFKC normalize_to return value mismatch"
);
}
#[test]
fn normalize_to_matches_normalize_nfkd(s in unicode_string_strategy()) {
let norm = simd_normalizer::nfkd();
let expected = norm.normalize(&s);
let mut buf = String::new();
let was_normalized = norm.normalize_to(&s, &mut buf);
prop_assert_eq!(
&buf, &*expected,
"NFKD normalize_to output mismatch"
);
prop_assert_eq!(
was_normalized,
matches!(&expected, Cow::Borrowed(_)),
"NFKD normalize_to return value mismatch"
);
}
}
proptest! {
#![proptest_config(ProptestConfig::with_cases(500))]
#[test]
fn normalize_to_matches_normalize_supplementary_nfc(s in supplementary_heavy_strategy()) {
let norm = simd_normalizer::nfc();
let expected = norm.normalize(&s);
let mut buf = String::new();
let was_normalized = norm.normalize_to(&s, &mut buf);
prop_assert_eq!(&buf, &*expected, "NFC normalize_to mismatch for supplementary input");
prop_assert_eq!(was_normalized, matches!(&expected, Cow::Borrowed(_)));
}
#[test]
fn normalize_to_matches_normalize_supplementary_nfd(s in supplementary_heavy_strategy()) {
let norm = simd_normalizer::nfd();
let expected = norm.normalize(&s);
let mut buf = String::new();
let was_normalized = norm.normalize_to(&s, &mut buf);
prop_assert_eq!(&buf, &*expected, "NFD normalize_to mismatch for supplementary input");
prop_assert_eq!(was_normalized, matches!(&expected, Cow::Borrowed(_)));
}
#[test]
fn normalize_to_matches_normalize_supplementary_nfkc(s in supplementary_heavy_strategy()) {
let norm = simd_normalizer::nfkc();
let expected = norm.normalize(&s);
let mut buf = String::new();
let was_normalized = norm.normalize_to(&s, &mut buf);
prop_assert_eq!(&buf, &*expected, "NFKC normalize_to mismatch for supplementary input");
prop_assert_eq!(was_normalized, matches!(&expected, Cow::Borrowed(_)));
}
#[test]
fn normalize_to_matches_normalize_supplementary_nfkd(s in supplementary_heavy_strategy()) {
let norm = simd_normalizer::nfkd();
let expected = norm.normalize(&s);
let mut buf = String::new();
let was_normalized = norm.normalize_to(&s, &mut buf);
prop_assert_eq!(&buf, &*expected, "NFKD normalize_to mismatch for supplementary input");
prop_assert_eq!(was_normalized, matches!(&expected, Cow::Borrowed(_)));
}
}
#[test]
fn normalize_to_appends_correctly_all_forms() {
let inputs = [
"hello",
"\u{00C5}", "e\u{0301}", "\u{1100}\u{1161}", "\u{AC00}", "\u{FB01}", "\u{2126}", "\u{00A0}", ];
type NormFn = fn() -> Box<dyn Fn(&str) -> Cow<'_, str> + 'static>;
type NormToFn = fn() -> Box<dyn Fn(&str, &mut String) -> bool + 'static>;
let constructors: [(&str, NormFn, NormToFn); 4] = [
(
"NFC",
|| Box::new(|s: &str| simd_normalizer::nfc().normalize(s)),
|| Box::new(|s: &str, buf: &mut String| simd_normalizer::nfc().normalize_to(s, buf)),
),
(
"NFD",
|| Box::new(|s: &str| simd_normalizer::nfd().normalize(s)),
|| Box::new(|s: &str, buf: &mut String| simd_normalizer::nfd().normalize_to(s, buf)),
),
(
"NFKC",
|| Box::new(|s: &str| simd_normalizer::nfkc().normalize(s)),
|| Box::new(|s: &str, buf: &mut String| simd_normalizer::nfkc().normalize_to(s, buf)),
),
(
"NFKD",
|| Box::new(|s: &str| simd_normalizer::nfkd().normalize(s)),
|| Box::new(|s: &str, buf: &mut String| simd_normalizer::nfkd().normalize_to(s, buf)),
),
];
for (label, make_norm, make_norm_to) in &constructors {
let norm = make_norm();
let norm_to = make_norm_to();
for input in &inputs {
let expected = norm(input);
let mut buf = String::from("PREFIX:");
norm_to(input, &mut buf);
let expected_with_prefix = format!("PREFIX:{}", &*expected);
assert_eq!(
buf, expected_with_prefix,
"{}: normalize_to did not append correctly for {:?}",
label, input
);
}
}
}
proptest! {
#![proptest_config(ProptestConfig::with_cases(500))]
#[test]
fn is_normalized_agrees_with_normalize_all_forms(s in unicode_string_strategy()) {
let nfc_s = s.nfc();
if &*nfc_s == s.as_str() {
prop_assert!(s.is_nfc(), "nfc(s) == s but is_nfc is false for {:?}", s);
}
let nfd_s = s.nfd();
if &*nfd_s == s.as_str() {
prop_assert!(s.is_nfd(), "nfd(s) == s but is_nfd is false for {:?}", s);
}
let nfkc_s = s.nfkc();
if &*nfkc_s == s.as_str() {
prop_assert!(s.is_nfkc(), "nfkc(s) == s but is_nfkc is false for {:?}", s);
}
let nfkd_s = s.nfkd();
if &*nfkd_s == s.as_str() {
prop_assert!(s.is_nfkd(), "nfkd(s) == s but is_nfkd is false for {:?}", s);
}
}
#[test]
fn is_normalized_subsumption(s in unicode_string_strategy()) {
if s.is_nfkc() {
prop_assert!(s.is_nfc(), "is_nfkc but not is_nfc for {:?}", s);
}
if s.is_nfkd() {
prop_assert!(s.is_nfd(), "is_nfkd but not is_nfd for {:?}", s);
}
}
#[test]
fn normalize_output_is_normalized(s in unicode_string_strategy()) {
let nfc_s = s.nfc();
prop_assert!(nfc_s.is_nfc(), "nfc(s) result not recognized as NFC for {:?}", s);
let nfd_s = s.nfd();
prop_assert!(nfd_s.is_nfd(), "nfd(s) result not recognized as NFD for {:?}", s);
let nfkc_s = s.nfkc();
prop_assert!(nfkc_s.is_nfkc(), "nfkc(s) result not recognized as NFKC for {:?}", s);
let nfkd_s = s.nfkd();
prop_assert!(nfkd_s.is_nfkd(), "nfkd(s) result not recognized as NFKD for {:?}", s);
}
}
#[test]
fn is_normalized_cross_form_deterministic() {
assert!("hello".is_nfc());
assert!("hello".is_nfd());
assert!("hello".is_nfkc());
assert!("hello".is_nfkd());
assert!("\u{00C5}".is_nfc());
assert!(!"\u{00C5}".is_nfd());
assert!("\u{00C5}".is_nfkc());
assert!(!"\u{00C5}".is_nfkd());
let decomposed = "A\u{030A}";
assert!(!decomposed.is_nfc());
assert!(decomposed.is_nfd());
assert!(!decomposed.is_nfkc());
assert!(decomposed.is_nfkd());
assert!("\u{FB01}".is_nfc());
assert!("\u{FB01}".is_nfd());
assert!(!"\u{FB01}".is_nfkc());
assert!(!"\u{FB01}".is_nfkd());
assert!("\u{AC00}".is_nfc());
assert!(!"\u{AC00}".is_nfd());
assert!("\u{AC00}".is_nfkc());
assert!(!"\u{AC00}".is_nfkd());
assert!("".is_nfc());
assert!("".is_nfd());
assert!("".is_nfkc());
assert!("".is_nfkd());
}
proptest! {
#![proptest_config(ProptestConfig::with_cases(1000))]
#[test]
fn nfd_of_nfc_equals_nfd(s in unicode_string_strategy()) {
let nfc_s = s.nfc();
let nfd_of_nfc = nfc_s.nfd();
let nfd_s = s.nfd();
prop_assert_eq!(
&*nfd_of_nfc, &*nfd_s,
"NFD(NFC(s)) != NFD(s)"
);
}
#[test]
fn nfkd_of_nfkc_equals_nfkd(s in unicode_string_strategy()) {
let nfkc_s = s.nfkc();
let nfkd_of_nfkc = nfkc_s.nfkd();
let nfkd_s = s.nfkd();
prop_assert_eq!(
&*nfkd_of_nfkc, &*nfkd_s,
"NFKD(NFKC(s)) != NFKD(s)"
);
}
}
proptest! {
#![proptest_config(ProptestConfig::with_cases(500))]
#[test]
fn nfd_of_nfc_equals_nfd_compat_heavy(s in compat_heavy_strategy()) {
let nfc_s = s.nfc();
let nfd_of_nfc = nfc_s.nfd();
let nfd_s = s.nfd();
prop_assert_eq!(
&*nfd_of_nfc, &*nfd_s,
"NFD(NFC(s)) != NFD(s) for compat-heavy input"
);
}
#[test]
fn nfkd_of_nfkc_equals_nfkd_compat_heavy(s in compat_heavy_strategy()) {
let nfkc_s = s.nfkc();
let nfkd_of_nfkc = nfkc_s.nfkd();
let nfkd_s = s.nfkd();
prop_assert_eq!(
&*nfkd_of_nfkc, &*nfkd_s,
"NFKD(NFKC(s)) != NFKD(s) for compat-heavy input"
);
}
}
#[test]
fn cross_validate_icu4x_tricky_inputs() {
let inputs = [
"\u{AC00}", "\u{1100}\u{1161}", "\u{1100}\u{1161}\u{11A8}", "\u{D4DB}", "\u{2126}", "\u{212A}", "\u{212B}", "\u{0958}", "\u{FB1D}", "\u{FB2A}", "\u{0340}", "\u{0341}", "\u{0344}", "\u{FB01}", "\u{00A0}", "\u{FF21}", "\u{2075}", "\u{00BC}", "\u{FB49}", "a\u{0308}\u{0301}", "A\u{0327}\u{030A}", "\u{1E0A}\u{0323}", "\u{2F800}", "e\u{200C}\u{0301}", "e\u{034F}\u{0301}", "\u{FEFF}a\u{0308}",
"Hello\u{0300}World\u{0301}\u{AC00}\u{FB01}",
"",
"The quick brown fox jumps over the lazy dog.",
];
let icu_nfc = ComposingNormalizerBorrowed::new_nfc();
let icu_nfd = DecomposingNormalizerBorrowed::new_nfd();
let icu_nfkc = ComposingNormalizerBorrowed::new_nfkc();
let icu_nfkd = DecomposingNormalizerBorrowed::new_nfkd();
for input in &inputs {
let simd_nfc = simd_normalizer::nfc().normalize(input);
let icu_nfc_result = icu_nfc.normalize(input);
assert_eq!(
&*simd_nfc, &*icu_nfc_result,
"NFC mismatch with ICU4X for {:?}: simd={:?}, icu={:?}",
input, simd_nfc, icu_nfc_result
);
let simd_nfd = simd_normalizer::nfd().normalize(input);
let icu_nfd_result = icu_nfd.normalize(input);
assert_eq!(
&*simd_nfd, &*icu_nfd_result,
"NFD mismatch with ICU4X for {:?}: simd={:?}, icu={:?}",
input, simd_nfd, icu_nfd_result
);
let simd_nfkc = simd_normalizer::nfkc().normalize(input);
let icu_nfkc_result = icu_nfkc.normalize(input);
assert_eq!(
&*simd_nfkc, &*icu_nfkc_result,
"NFKC mismatch with ICU4X for {:?}: simd={:?}, icu={:?}",
input, simd_nfkc, icu_nfkc_result
);
let simd_nfkd = simd_normalizer::nfkd().normalize(input);
let icu_nfkd_result = icu_nfkd.normalize(input);
assert_eq!(
&*simd_nfkd, &*icu_nfkd_result,
"NFKD mismatch with ICU4X for {:?}: simd={:?}, icu={:?}",
input, simd_nfkd, icu_nfkd_result
);
assert_eq!(
input.is_nfc(),
icu_nfc.is_normalized(input),
"is_nfc mismatch with ICU4X for {:?}",
input
);
assert_eq!(
input.is_nfd(),
icu_nfd.is_normalized(input),
"is_nfd mismatch with ICU4X for {:?}",
input
);
assert_eq!(
input.is_nfkc(),
icu_nfkc.is_normalized(input),
"is_nfkc mismatch with ICU4X for {:?}",
input
);
assert_eq!(
input.is_nfkd(),
icu_nfkd.is_normalized(input),
"is_nfkd mismatch with ICU4X for {:?}",
input
);
}
}
proptest! {
#![proptest_config(ProptestConfig::with_cases(500))]
#[test]
fn cross_validate_icu4x_nfc(s in unicode_string_strategy()) {
let simd_result = simd_normalizer::nfc().normalize(&s);
let icu_result = ComposingNormalizerBorrowed::new_nfc().normalize(&s);
prop_assert_eq!(
&*simd_result, &*icu_result,
"NFC cross-validation failed with ICU4X"
);
}
#[test]
fn cross_validate_icu4x_nfd(s in unicode_string_strategy()) {
let simd_result = simd_normalizer::nfd().normalize(&s);
let icu_result = DecomposingNormalizerBorrowed::new_nfd().normalize(&s);
prop_assert_eq!(
&*simd_result, &*icu_result,
"NFD cross-validation failed with ICU4X"
);
}
#[test]
fn cross_validate_icu4x_nfkc(s in unicode_string_strategy()) {
let simd_result = simd_normalizer::nfkc().normalize(&s);
let icu_result = ComposingNormalizerBorrowed::new_nfkc().normalize(&s);
prop_assert_eq!(
&*simd_result, &*icu_result,
"NFKC cross-validation failed with ICU4X"
);
}
#[test]
fn cross_validate_icu4x_nfkd(s in unicode_string_strategy()) {
let simd_result = simd_normalizer::nfkd().normalize(&s);
let icu_result = DecomposingNormalizerBorrowed::new_nfkd().normalize(&s);
prop_assert_eq!(
&*simd_result, &*icu_result,
"NFKD cross-validation failed with ICU4X"
);
}
}
#[test]
fn hangul_cross_form_invariants() {
let ga = "\u{AC00}";
let ga_jamo = "\u{1100}\u{1161}";
assert_eq!(&*ga_jamo.nfc(), ga);
assert_eq!(&*ga.nfd(), ga_jamo);
assert_eq!(&*ga.nfkc(), &*ga.nfc());
assert_eq!(&*ga_jamo.nfkc(), &*ga_jamo.nfc());
assert_eq!(&*ga.nfkd(), &*ga.nfd());
assert_eq!(&*ga_jamo.nfkd(), &*ga_jamo.nfd());
assert_eq!(&*ga.nfkc(), &*ga.nfkd().nfc());
assert_eq!(&*ga_jamo.nfkc(), &*ga_jamo.nfkd().nfc());
assert_eq!(&*ga.nfc().nfd(), &*ga.nfd());
assert_eq!(&*ga_jamo.nfc().nfd(), &*ga_jamo.nfd());
let lvt = "\u{1100}\u{1161}\u{11A8}"; let syllable = "\u{AC01}"; assert_eq!(&*lvt.nfc(), syllable);
assert_eq!(&*syllable.nfd(), lvt);
assert_eq!(&*syllable.nfkc(), &*syllable.nfkd().nfc());
assert_eq!(&*syllable.nfc().nfd(), &*syllable.nfd());
}
#[test]
fn composition_exclusions_cross_form() {
let canonical_exclusions = [
"\u{2126}", "\u{212A}", "\u{212B}", "\u{0340}", "\u{0341}", "\u{0344}", "\u{0958}", "\u{FB1D}", ];
for input in &canonical_exclusions {
assert_eq!(
&*input.nfc(),
&*input.nfkc(),
"NFC != NFKC for canonical exclusion {:?}",
input
);
assert_eq!(
&*input.nfd(),
&*input.nfkd(),
"NFD != NFKD for canonical exclusion {:?}",
input
);
assert_eq!(
&*input.nfkc(),
&*input.nfkd().nfc(),
"NFKC != NFC(NFKD) for canonical exclusion {:?}",
input
);
assert_eq!(
&*input.nfc().nfd(),
&*input.nfd(),
"NFD(NFC) != NFD for canonical exclusion {:?}",
input
);
}
}
#[test]
fn compatibility_chars_cross_form() {
let compat_chars = [
("\u{FB01}", "fi"), ("\u{00A0}", " "), ("\u{FF21}", "A"), ("\u{2075}", "5"), ];
for (input, expected_compat) in &compat_chars {
assert_eq!(&*input.nfc(), *input, "NFC changed compat char {:?}", input);
assert_eq!(&*input.nfd(), *input, "NFD changed compat char {:?}", input);
assert_eq!(
&*input.nfkc(),
*expected_compat,
"NFKC mismatch for {:?}",
input
);
assert_eq!(
&*input.nfkd(),
*expected_compat,
"NFKD mismatch for {:?}",
input
);
assert_eq!(
&*input.nfkc(),
&*input.nfkd().nfc(),
"NFKC != NFC(NFKD) for compat char {:?}",
input
);
assert_eq!(
&*input.nfkc().nfkd(),
&*input.nfkd(),
"NFKD(NFKC) != NFKD for compat char {:?}",
input
);
}
}
#[test]
fn multi_combining_cross_form() {
let input = "a\u{0327}\u{0301}";
let nfc = input.nfc();
let nfd = input.nfd();
let nfkc = input.nfkc();
let nfkd = input.nfkd();
assert_eq!(
&*nfc, &*nfkc,
"NFC != NFKC for canonical combining sequence"
);
assert_eq!(
&*nfd, &*nfkd,
"NFD != NFKD for canonical combining sequence"
);
assert_eq!(&*nfkc, &*nfkd.nfc(), "NFKC != NFC(NFKD)");
assert_eq!(&*nfc.nfd(), &*nfd, "NFD(NFC) != NFD");
let input2 = "\u{1E0A}\u{0323}"; let nfc2 = input2.nfc();
let nfd2 = input2.nfd();
assert_eq!(
&*nfc2.nfd(),
&*nfd2,
"NFD(NFC) != NFD for dot-above+dot-below"
);
assert_eq!(
&*input2.nfkc(),
&*input2.nfkd().nfc(),
"NFKC != NFC(NFKD) for dot-above+dot-below"
);
}
#[test]
fn quick_check_deterministic_agreement() {
let ascii = "Hello, world!";
assert_eq!(simd_normalizer::nfc().quick_check(ascii), IsNormalized::Yes);
assert!(matches!(
simd_normalizer::nfc().normalize(ascii),
Cow::Borrowed(_)
));
assert_eq!(simd_normalizer::nfd().quick_check(ascii), IsNormalized::Yes);
assert!(matches!(
simd_normalizer::nfd().normalize(ascii),
Cow::Borrowed(_)
));
assert_eq!(
simd_normalizer::nfkc().quick_check(ascii),
IsNormalized::Yes
);
assert!(matches!(
simd_normalizer::nfkc().normalize(ascii),
Cow::Borrowed(_)
));
assert_eq!(
simd_normalizer::nfkd().quick_check(ascii),
IsNormalized::Yes
);
assert!(matches!(
simd_normalizer::nfkd().normalize(ascii),
Cow::Borrowed(_)
));
let precomposed = "\u{00C5}";
assert_eq!(
simd_normalizer::nfc().quick_check(precomposed),
IsNormalized::Yes
);
assert_eq!(
simd_normalizer::nfd().quick_check(precomposed),
IsNormalized::No
);
assert!(matches!(
simd_normalizer::nfd().normalize(precomposed),
Cow::Owned(_)
));
let fi = "\u{FB01}";
assert_eq!(simd_normalizer::nfc().quick_check(fi), IsNormalized::Yes);
assert_eq!(simd_normalizer::nfkc().quick_check(fi), IsNormalized::No);
assert!(matches!(
simd_normalizer::nfkc().normalize(fi),
Cow::Owned(_)
));
let ohm = "\u{2126}";
assert_eq!(simd_normalizer::nfc().quick_check(ohm), IsNormalized::No);
assert_eq!(simd_normalizer::nfd().quick_check(ohm), IsNormalized::No);
assert_eq!(simd_normalizer::nfkc().quick_check(ohm), IsNormalized::No);
assert_eq!(simd_normalizer::nfkd().quick_check(ohm), IsNormalized::No);
}
#[test]
fn comprehensive_deterministic_cross_form() {
let icu_nfc = ComposingNormalizerBorrowed::new_nfc();
let icu_nfd = DecomposingNormalizerBorrowed::new_nfd();
let icu_nfkc = ComposingNormalizerBorrowed::new_nfkc();
let icu_nfkd = DecomposingNormalizerBorrowed::new_nfkd();
let inputs: Vec<&str> = vec![
"",
"ASCII only",
"\u{00E9}", "e\u{0301}", "\u{AC00}", "\u{1100}\u{1161}", "\u{1100}\u{1161}\u{11A8}", "\u{FB01}", "\u{00A0}", "\u{2126}", "\u{212A}", "\u{212B}", "\u{FF21}\u{FF22}\u{FF23}", "\u{0958}", "\u{0340}", "a\u{0308}\u{0301}", "A\u{0327}\u{030A}", "\u{1E0A}\u{0323}", "\u{FEFF}a\u{0308}", "\u{2F800}", "\u{00BC}", "\u{2075}", "e\u{200C}\u{0301}", "\u{D4DB}", ];
for input in &inputs {
let nfc = simd_normalizer::nfc().normalize(input);
let nfd = simd_normalizer::nfd().normalize(input);
let nfkc = simd_normalizer::nfkc().normalize(input);
let nfkd = simd_normalizer::nfkd().normalize(input);
let nfc_of_nfkd = simd_normalizer::nfc().normalize(&nfkd);
assert_eq!(&*nfkc, &*nfc_of_nfkd, "NFKC != NFC(NFKD) for {:?}", input);
let nfd_of_nfkc = simd_normalizer::nfd().normalize(&nfkc);
assert_eq!(&*nfkd, &*nfd_of_nfkc, "NFKD != NFD(NFKC) for {:?}", input);
let nfd_of_nfc = simd_normalizer::nfd().normalize(&nfc);
assert_eq!(&*nfd_of_nfc, &*nfd, "NFD(NFC) != NFD for {:?}", input);
let nfkd_of_nfkc = simd_normalizer::nfkd().normalize(&nfkc);
assert_eq!(&*nfkd_of_nfkc, &*nfkd, "NFKD(NFKC) != NFKD for {:?}", input);
assert_eq!(
&*nfc,
&*icu_nfc.normalize(input),
"NFC ICU4X mismatch for {:?}",
input
);
assert_eq!(
&*nfd,
&*icu_nfd.normalize(input),
"NFD ICU4X mismatch for {:?}",
input
);
assert_eq!(
&*nfkc,
&*icu_nfkc.normalize(input),
"NFKC ICU4X mismatch for {:?}",
input
);
assert_eq!(
&*nfkd,
&*icu_nfkd.normalize(input),
"NFKD ICU4X mismatch for {:?}",
input
);
}
}