relon-unicode 0.1.0-rc2

Leaf Unicode tables, case-folding / normalization algorithms, and the glob matcher shared across Relon crates.
Documentation
//! Compile-time generator for the simple Unicode case-folding tables
//! the wasm-AOT `upper` / `lower` stdlib bodies look up at runtime.
//!
//! The tables are derived from Rust's bundled Unicode data via
//! `char::to_uppercase` / `char::to_lowercase`. We keep only the
//! **simple** mappings (input codepoint maps to exactly one different
//! codepoint) — multi-codepoint cases such as `ß` -> `SS` are
//! deferred to a future "full case folding" pass. The wasm body emits
//! the input codepoint unchanged when no entry hits, so multi-codepoint
//! cases simply pass through verbatim instead of being mangled into
//! the first replacement character.
//!
//! The generated file lives at `$OUT_DIR/case_folding_table.rs` and is
//! included by `src/case_folding.rs`. It defines:
//!
//! ```ignore
//! pub(crate) const SIMPLE_UPPER_FOLDING: &[(u32, u32)] = &[..];
//! pub(crate) const SIMPLE_LOWER_FOLDING: &[(u32, u32)] = &[..];
//! ```
//!
//! Both arrays are sorted ascending by the input codepoint so the
//! runtime binary search in the wasm body has a stable contract.

use std::env;
use std::fs;
use std::io::Write;
use std::path::PathBuf;

fn collect_mapping<F, I>(map: F) -> Vec<(u32, u32)>
where
    F: Fn(char) -> I,
    I: Iterator<Item = char>,
{
    let mut entries: Vec<(u32, u32)> = Vec::new();
    for cp in 0u32..=0x10_FFFFu32 {
        let Some(ch) = char::from_u32(cp) else {
            continue;
        };
        let mut iter = map(ch);
        let Some(first) = iter.next() else {
            continue;
        };
        // Skip multi-codepoint mappings — keep simple folding only.
        if iter.next().is_some() {
            continue;
        }
        if first != ch {
            entries.push((cp, first as u32));
        }
    }
    entries.sort_by_key(|(k, _)| *k);
    entries
}

fn write_table(out: &mut impl Write, name: &str, entries: &[(u32, u32)]) -> std::io::Result<()> {
    writeln!(out, "pub(crate) const {name}: &[(u32, u32)] = &[")?;
    for (k, v) in entries {
        writeln!(out, "    ({k:#06x}, {v:#06x}),")?;
    }
    writeln!(out, "];")?;
    Ok(())
}

fn main() {
    println!("cargo:rerun-if-changed=build.rs");

    let upper = collect_mapping(char::to_uppercase);
    let lower = collect_mapping(char::to_lowercase);

    let out_dir = PathBuf::from(env::var_os("OUT_DIR").expect("OUT_DIR not set"));
    let path = out_dir.join("case_folding_table.rs");
    let mut f = fs::File::create(&path).expect("create case_folding_table.rs");

    writeln!(
        &mut f,
        "// Auto-generated by build.rs. Do not edit by hand."
    )
    .unwrap();
    writeln!(&mut f).unwrap();
    write_table(&mut f, "SIMPLE_UPPER_FOLDING", &upper).expect("write upper table");
    writeln!(&mut f).unwrap();
    write_table(&mut f, "SIMPLE_LOWER_FOLDING", &lower).expect("write lower table");
}