disarm 0.10.0

Unicode canonicalization and TR39 confusable analysis: building blocks for text-security pipelines (homoglyph/bidi/zalgo handling) plus standards-based transliteration
Documentation
//! Unicode TR39 confusable character mappings (multi-target).
//!
//! Auto-generated from confusables.txt by scripts/gen_confusables.py.
//! Version: 17.0.0
//!
//! Contains ~2,063 non-Latin → Latin mappings and ~1,369 non-Cyrillic →
//! Cyrillic mappings. Uses compile-time perfect hash maps (`phf`) for O(1)
//! lookups. Covers Cyrillic, Greek, Armenian, Georgian, CJK compatibility,
//! mathematical symbols, fullwidth forms, and other confusable characters.
//!
//! PHF maps generated by build.rs from src/tables/data/confusables_to_*.tsv.
//! To update the data, regenerate with: python scripts/gen_confusables.py

// Non-Latin → Latin confusable mappings.
include!(concat!(env!("OUT_DIR"), "/confusables_phf.rs"));

// Non-Cyrillic → Cyrillic confusable mappings.
include!(concat!(env!("OUT_DIR"), "/confusables_to_cyrillic_phf.rs"));

/// Look up a confusable mapping for a character to the target script.
///
/// Returns the target-script equivalent if the character is a known
/// confusable, or None if it is not.
///
/// Supported target scripts: `"latin"`, `"cyrillic"`.
#[inline]
pub fn lookup(ch: char, target_script: &str) -> Option<&'static str> {
    resolve_map(target_script).and_then(|m| m.get(&ch).copied())
}

/// Resolve a `target_script` to its confusables PHF map, once.
///
/// Lets callers hoist the `match target_script` out of a per-character loop
/// (#236 / #233 review item) and probe the resolved map directly. Returns
/// `None` for an unknown script.
///
/// Note: there is intentionally **no** ASCII fast path built on top of this —
/// the latin table maps ASCII source code points (e.g. U+007C `|`→`l`,
/// U+0022 `"`→`''`, U+0060 `` ` ``→`'`), so ASCII input is *not* identity even
/// for `target="latin"`.
#[inline]
pub fn resolve_map(target_script: &str) -> Option<&'static phf::Map<char, &'static str>> {
    match target_script {
        "latin" => Some(&TO_LATIN),
        "cyrillic" => Some(&TO_CYRILLIC),
        _ => None,
    }
}