use std::borrow::Cow;
use simd_normalizer::UnicodeNormalization;
fn main() {
println!("=== simd-normalizer: Basic Normalization Examples ===\n");
section_four_forms();
section_trait_api();
section_constructor_api();
section_zero_copy();
section_quick_check();
section_normalize_to();
section_real_world_examples();
println!("=== All examples completed. ===");
}
fn section_four_forms() {
println!("--- 1. The Four Normalization Forms ---\n");
let decomposed = "e\u{0301}";
let precomposed = "\u{00E9}";
println!(
"Input (decomposed): {:?} (e + \\u{{0301}} combining acute)",
decomposed
);
println!(
"Input (precomposed): {:?} (\\u{{00E9}} e-acute)\n",
precomposed
);
let nfc_result = decomposed.nfc();
println!("NFC of {:?} = {:?}", decomposed, &*nfc_result);
println!(" NFC composes: e + \\u{{0301}} -> \\u{{00E9}} (precomposed e-acute)");
let nfd_result = precomposed.nfd();
println!("NFD of {:?} = {:?}", precomposed, &*nfd_result);
println!(" NFD decomposes: \\u{{00E9}} -> e + \\u{{0301}}");
let nfkc_result = "\u{FB01}".nfkc(); println!(
"NFKC of {:?} (fi ligature) = {:?}",
"\u{FB01}", &*nfkc_result
);
println!(" NFKC decomposes compatibility characters: fi ligature -> \"fi\"");
let nfkd_result = "\u{FB01}".nfkd();
println!(
"NFKD of {:?} (fi ligature) = {:?}",
"\u{FB01}", &*nfkd_result
);
println!(" NFKD decomposes compatibility characters without recomposing");
println!();
}
fn section_trait_api() {
println!("--- 2. Trait API (UnicodeNormalization) ---\n");
let input = "caf\u{00E9}";
println!("Input: {:?}", input);
let nfc: Cow<'_, str> = input.nfc();
let nfd: Cow<'_, str> = input.nfd();
let nfkc: Cow<'_, str> = input.nfkc();
let nfkd: Cow<'_, str> = input.nfkd();
println!(" .nfc() = {:?}", &*nfc);
println!(" .nfd() = {:?}", &*nfd);
println!(" .nfkc() = {:?}", &*nfkc);
println!(" .nfkd() = {:?}", &*nfkd);
println!(" .is_nfc() = {}", input.is_nfc());
println!(" .is_nfd() = {}", input.is_nfd());
println!(" .is_nfkc() = {}", input.is_nfkc());
println!(" .is_nfkd() = {}", input.is_nfkd());
println!();
}
fn section_constructor_api() {
println!("--- 3. Constructor API (simd_normalizer::nfc(), etc.) ---\n");
let normalizer = simd_normalizer::nfc();
let input = "e\u{0301}"; println!("Input: {:?} (e + combining acute)", input);
let result = normalizer.normalize(input);
println!(" normalizer.normalize() = {:?}", &*result);
let is_norm = normalizer.is_normalized(input);
println!(" normalizer.is_normalized() = {}", is_norm);
let qc = normalizer.quick_check(input);
println!(" normalizer.quick_check() = {:?}", qc);
println!();
}
fn section_zero_copy() {
println!("--- 4. Zero-Copy (Cow::Borrowed) Detection ---\n");
let normalizer = simd_normalizer::nfc();
let already_nfc = "caf\u{00E9}";
let result = normalizer.normalize(already_nfc);
let borrowed = matches!(&result, Cow::Borrowed(_));
println!("Input: {:?} (already NFC)", already_nfc);
println!(" Result is Cow::Borrowed (zero alloc): {}", borrowed);
let not_nfc = "e\u{0301}";
let result2 = normalizer.normalize(not_nfc);
let borrowed2 = matches!(&result2, Cow::Borrowed(_));
println!("Input: {:?} (not NFC, needs normalization)", not_nfc);
println!(" Result is Cow::Borrowed (zero alloc): {}", borrowed2);
let ascii = "Hello, world!";
let result3 = normalizer.normalize(ascii);
let borrowed3 = matches!(&result3, Cow::Borrowed(_));
println!("Input: {:?} (pure ASCII)", ascii);
println!(" Result is Cow::Borrowed (zero alloc): {}", borrowed3);
println!();
}
fn section_quick_check() {
println!("--- 5. quick_check() and is_normalized() ---\n");
let nfc = simd_normalizer::nfc();
let nfd = simd_normalizer::nfd();
let inputs = [
("Hello", "pure ASCII"),
("\u{00E9}", "precomposed e-acute (U+00E9)"),
("e\u{0301}", "e + combining acute (decomposed)"),
("\u{AC00}", "Hangul syllable GA (U+AC00)"),
];
println!(" quick_check returns IsNormalized::Yes, No, or Maybe.");
println!(" is_normalized resolves Maybe by performing full normalization.\n");
for (input, desc) in &inputs {
let nfc_qc = nfc.quick_check(input);
let nfc_is = nfc.is_normalized(input);
let nfd_qc = nfd.quick_check(input);
let nfd_is = nfd.is_normalized(input);
println!(" {:?} ({})", input, desc);
println!(
" NFC quick_check={:?}, is_normalized={}",
nfc_qc, nfc_is
);
println!(
" NFD quick_check={:?}, is_normalized={}",
nfd_qc, nfd_is
);
}
println!();
}
fn section_normalize_to() {
println!("--- 6. normalize_to() with Pre-Allocated Buffer ---\n");
let normalizer = simd_normalizer::nfc();
let mut buf = String::with_capacity(256);
let inputs = [
"e\u{0301}", "caf\u{00E9}", "\u{1100}\u{1161}", ];
for input in &inputs {
buf.clear();
let was_already = normalizer.normalize_to(input, &mut buf);
println!(" Input: {:?}", input);
println!(" Output: {:?}", buf);
println!(" Was already normalized: {}", was_already);
}
println!("\n Buffer reuse avoids repeated heap allocations in tight loops.");
println!();
}
fn section_real_world_examples() {
println!("--- 7. Real-World Unicode Examples ---\n");
println!(" [E-acute composition]");
let decomposed = "e\u{0301}";
let composed = decomposed.nfc();
println!(" NFD \"e\\u{{0301}}\" -> NFC {:?}", &*composed);
let round_trip = composed.nfd();
println!(" NFC {:?} -> NFD {:?}", &*composed, &*round_trip);
println!("\n [Hangul syllables]");
let hangul = "\u{AC00}"; let nfd_hangul = hangul.nfd();
println!(
" Hangul GA (U+AC00) NFD = {:?} (jamo L \\u{{1100}} + V \\u{{1161}})",
&*nfd_hangul
);
let jamo = "\u{1100}\u{1161}";
let nfc_jamo = jamo.nfc();
println!(
" Jamo \\u{{1100}}\\u{{1161}} NFC = {:?} (syllable GA U+AC00)",
&*nfc_jamo
);
let hangul_lvt = "\u{D55C}"; let nfd_lvt = hangul_lvt.nfd();
println!(
" Hangul HAN (U+D55C) NFD = {:?} (L + A + N jamo)",
&*nfd_lvt
);
println!("\n [Fullwidth characters (compatibility)]");
let fullwidth = "\u{FF21}\u{FF22}\u{FF23}"; println!(" Fullwidth: {:?}", fullwidth);
let nfkc_fw = fullwidth.nfkc();
println!(" NFKC: {:?} (normalized to standard ASCII)", &*nfkc_fw);
let nfc_fw = fullwidth.nfc();
println!(
" NFC: {:?} (canonical normalization does NOT change compatibility chars)",
&*nfc_fw
);
println!("\n [fi ligature (compatibility decomposition)]");
let fi = "\u{FB01}";
println!(" Input: {:?} (U+FB01 LATIN SMALL LIGATURE FI)", fi);
println!(" NFC: {:?} (unchanged -- canonical only)", &*fi.nfc());
println!(" NFKC: {:?} (decomposed to f + i)", &*fi.nfkc());
println!(" NFKD: {:?} (decomposed to f + i)", &*fi.nfkd());
println!("\n [Superscript digits (compatibility decomposition)]");
let superscripts = "\u{00B9}\u{00B2}\u{00B3}";
println!(" Input: {:?} (superscript 1, 2, 3)", superscripts);
println!(" NFC: {:?} (unchanged)", &*superscripts.nfc());
println!(
" NFKC: {:?} (decomposed to plain digits)",
&*superscripts.nfkc()
);
println!(
" NFKD: {:?} (decomposed to plain digits)",
&*superscripts.nfkd()
);
println!("\n [Key takeaway]");
println!(" NFC/NFD = canonical normalization (preserves meaning)");
println!(" NFKC/NFKD = compatibility normalization (lossy: strips formatting)");
println!(" Use NFC for storage, NFKC for search/matching.");
println!();
}