disarm 0.10.0

Unicode canonicalization and TR39 confusable analysis: building blocks for text-security pipelines (homoglyph/bidi/zalgo handling) plus standards-based transliteration
Documentation
//! Named constants for Unicode codepoint ranges used throughout the library.
//!
//! Centralising these ranges prevents magic numbers from being duplicated
//! across modules and makes future Unicode version updates easier to apply.

// ── CJK ──────────────────────────────────────────────────────────────────────

/// CJK Unified Ideographs (U+4E00–U+9FFF)
pub const CJK_UNIFIED: std::ops::RangeInclusive<u32> = 0x4E00..=0x9FFF;

/// CJK Extension A (U+3400–U+4DBF)
pub const CJK_EXT_A: std::ops::RangeInclusive<u32> = 0x3400..=0x4DBF;

/// CJK Extension A + Unified combined range used for capacity estimation
/// (U+3000–U+9FFF, covers CJK Symbols, kana, and Unified Ideographs)
pub const CJK_CAPACITY_RANGE: std::ops::RangeInclusive<u32> = 0x3000..=0x9FFF;

/// CJK Compatibility Ideographs (U+F900–U+FAFF)
pub const CJK_COMPAT: std::ops::RangeInclusive<u32> = 0xF900..=0xFAFF;

// ── Hangul ────────────────────────────────────────────────────────────────────

/// Hangul Syllables (U+AC00–U+D7AF)
pub const HANGUL_SYLLABLES: std::ops::RangeInclusive<u32> = 0xAC00..=0xD7AF;

/// Hangul Compatibility Jamo (U+3131–U+3163)
pub const HANGUL_COMPAT_JAMO: std::ops::RangeInclusive<u32> = 0x3131..=0x3163;

// ── Kana ──────────────────────────────────────────────────────────────────────

/// Hiragana block (U+3040–U+309F)
pub const HIRAGANA: std::ops::RangeInclusive<u32> = 0x3040..=0x309F;

/// Katakana block (U+30A0–U+30FF)
pub const KATAKANA: std::ops::RangeInclusive<u32> = 0x30A0..=0x30FF;

/// Half-width Katakana (U+FF65–U+FF9F)
pub const KATAKANA_HALFWIDTH: std::ops::RangeInclusive<u32> = 0xFF65..=0xFF9F;

// ── Indic (Brahmic) ─────────────────────────────────────────────────────────

/// Indic Brahmic scripts: Devanagari through Sinhala (U+0900–U+0DFF)
pub const INDIC: std::ops::RangeInclusive<u32> = 0x0900..=0x0DFF;

// ── Tibetan ─────────────────────────────────────────────────────────────────

/// Tibetan block (U+0F00–U+0FFF)
pub const TIBETAN: std::ops::RangeInclusive<u32> = 0x0F00..=0x0FFF;

// ── Myanmar ─────────────────────────────────────────────────────────────────

/// Myanmar block (U+1000–U+109F)
pub const MYANMAR: std::ops::RangeInclusive<u32> = 0x1000..=0x109F;

// ── Khmer ───────────────────────────────────────────────────────────────────

/// Khmer block (U+1780–U+17FF)
pub const KHMER: std::ops::RangeInclusive<u32> = 0x1780..=0x17FF;

// ── Balinese ──────────────────────────────────────────────────────────────

/// Balinese block (U+1B00–U+1B7F)
pub const BALINESE: std::ops::RangeInclusive<u32> = 0x1B00..=0x1B7F;

// ── Javanese ──────────────────────────────────────────────────────────────

/// Javanese block (U+A980–U+A9DF)
pub const JAVANESE: std::ops::RangeInclusive<u32> = 0xA980..=0xA9DF;

// ── Sundanese ─────────────────────────────────────────────────────────────

/// Sundanese block (U+1B80–U+1BBF)
pub const SUNDANESE: std::ops::RangeInclusive<u32> = 0x1B80..=0x1BBF;

// ── Tai Tham ──────────────────────────────────────────────────────────────

/// Tai Tham (Lanna) block (U+1A20–U+1AAF)
pub const TAI_THAM: std::ops::RangeInclusive<u32> = 0x1A20..=0x1AAF;

// ── Cham ──────────────────────────────────────────────────────────────────

/// Cham block (U+AA00–U+AA5F)
pub const CHAM: std::ops::RangeInclusive<u32> = 0xAA00..=0xAA5F;

// ── Batak ─────────────────────────────────────────────────────────────────

/// Batak block (U+1BC0–U+1BFF)
pub const BATAK: std::ops::RangeInclusive<u32> = 0x1BC0..=0x1BFF;

// ── Buginese ──────────────────────────────────────────────────────────────

/// Buginese (Lontara) block (U+1A00–U+1A1F)
pub const BUGINESE: std::ops::RangeInclusive<u32> = 0x1A00..=0x1A1F;

// ── Philippine scripts ───────────────────────────────────────────────────

/// Tagalog block (U+1700–U+171F)
pub const TAGALOG: std::ops::RangeInclusive<u32> = 0x1700..=0x171F;

/// Hanunoo block (U+1720–U+173F)
pub const HANUNOO: std::ops::RangeInclusive<u32> = 0x1720..=0x173F;

/// Buhid block (U+1740–U+175F)
pub const BUHID: std::ops::RangeInclusive<u32> = 0x1740..=0x175F;

/// Tagbanwa block (U+1760–U+177F)
pub const TAGBANWA: std::ops::RangeInclusive<u32> = 0x1760..=0x177F;

// ── Meetei Mayek ──────────────────────────────────────────────────────────

/// Meetei Mayek block (U+ABC0–U+ABFF)
pub const MEETEI_MAYEK: std::ops::RangeInclusive<u32> = 0xABC0..=0xABFF;

/// Meetei Mayek Extensions block (U+AAE0–U+AAFF)
pub const MEETEI_MAYEK_EXT: std::ops::RangeInclusive<u32> = 0xAAE0..=0xAAFF;