use std::env;
use std::io::{self, BufRead, IsTerminal};
fn main() {
let args: Vec<String> = env::args().collect();
let (form, force_demo) = parse_args(&args);
eprintln!("normalize_file: using {} normalization\n", form_name(form));
let stdin = io::stdin();
let use_demo = force_demo || stdin.lock().is_terminal();
if use_demo {
eprintln!("(running built-in demo)\n");
let demo = demo_strings();
process_lines(demo.into_iter(), form);
} else {
let reader = stdin.lock();
process_lines(
reader.lines().map(|r| r.expect("failed to read stdin")),
form,
);
}
}
#[derive(Clone, Copy)]
enum Form {
Nfc,
Nfd,
Nfkc,
Nfkd,
}
fn form_name(f: Form) -> &'static str {
match f {
Form::Nfc => "NFC",
Form::Nfd => "NFD",
Form::Nfkc => "NFKC",
Form::Nfkd => "NFKD",
}
}
fn parse_args(args: &[String]) -> (Form, bool) {
let mut form = Form::Nfc;
let mut demo = false;
for arg in args.iter().skip(1) {
match arg.as_str() {
"--nfc" => form = Form::Nfc,
"--nfd" => form = Form::Nfd,
"--nfkc" => form = Form::Nfkc,
"--nfkd" => form = Form::Nfkd,
"--demo" => demo = true,
"--help" | "-h" => {
eprintln!("Usage: normalize_file [--nfc|--nfd|--nfkc|--nfkd] [--demo]");
eprintln!(" Reads lines from stdin (or runs a built-in demo) and");
eprintln!(" normalizes each line to the chosen Unicode form (default: NFC).");
eprintln!();
eprintln!("Options:");
eprintln!(
" --nfc Canonical Decomposition, then Canonical Composition (default)"
);
eprintln!(" --nfd Canonical Decomposition");
eprintln!(" --nfkc Compatibility Decomposition, then Canonical Composition");
eprintln!(" --nfkd Compatibility Decomposition");
eprintln!(" --demo Run with built-in sample strings (ignores stdin)");
std::process::exit(0);
},
other => {
eprintln!("Unknown argument: {other}");
eprintln!("Usage: normalize_file [--nfc|--nfd|--nfkc|--nfkd] [--demo]");
std::process::exit(1);
},
}
}
(form, demo)
}
fn process_lines(lines: impl Iterator<Item = String>, form: Form) {
let mut buf = String::with_capacity(256);
let mut count = 0u64;
let mut already_normalized_count = 0u64;
for line in lines {
buf.clear();
let was_normalized = normalize_to_form(&line, &mut buf, form);
if was_normalized {
already_normalized_count += 1;
}
count += 1;
println!(
"--- line {} {}",
count,
if was_normalized {
"(already normalized)"
} else {
"(changed)"
}
);
println!(" input: {:?}", line);
println!(" output: {:?}", buf);
}
println!();
println!(
"Processed {} line(s): {} already in {}, {} changed.",
count,
already_normalized_count,
form_name(form),
count - already_normalized_count,
);
}
fn normalize_to_form(input: &str, out: &mut String, form: Form) -> bool {
match form {
Form::Nfc => simd_normalizer::nfc().normalize_to(input, out),
Form::Nfd => simd_normalizer::nfd().normalize_to(input, out),
Form::Nfkc => simd_normalizer::nfkc().normalize_to(input, out),
Form::Nfkd => simd_normalizer::nfkd().normalize_to(input, out),
}
}
fn demo_strings() -> Vec<String> {
vec![
"Hello, world!".into(),
"caf\u{00E9}".into(),
"caf\u{0065}\u{0301}".into(),
"\u{006F}\u{0303}\u{0301}".into(),
"\u{AC00}".into(),
"\u{1100}\u{1161}\u{11A8}".into(),
"of\u{FB01}ce".into(),
"\u{FF11}\u{FF12}\u{FF13}".into(),
"x\u{00B2}".into(),
"\u{03BF}\u{0301}".into(),
"a\u{200D}b".into(),
"R\u{00E9}sum\u{00E9} for Na\u{00EF}ve Caf\u{00E9}".into(),
]
}