1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
//! Pre-compiled binary constants for text-transformation pipelines.
//!
//! All constants are binary artifacts embedded at build time by `build.rs`
//! and decoded lazily when the corresponding matcher is first requested.
// ── Normalize page tables ──────────────────────────────────────────────────
/// L1 index for the Normalize 2-stage page table (`u16[4352]`, little-endian).
pub const NORMALIZE_L1_BYTES: & =
include_bytes!;
/// L2 data for the Normalize 2-stage page table (`u32[num_pages * 256]`, little-endian).
///
/// Each entry packs `(offset << 8) | length` into a `u32`, pointing into [`NORMALIZE_STR_BYTES`].
pub const NORMALIZE_L2_BYTES: & =
include_bytes!;
/// Concatenated replacement strings referenced by [`NORMALIZE_L2_BYTES`].
pub const NORMALIZE_STR_BYTES: &str =
include_str!;
// ── VariantNorm page tables ──────────────────────────────────────────────
/// L1 index for the VariantNorm 2-stage page table (`u16[4352]`, little-endian).
pub const VARIANT_NORM_L1_BYTES: & =
include_bytes!;
/// L2 data for the VariantNorm 2-stage page table (`u32[num_pages * 256]`, little-endian).
pub const VARIANT_NORM_L2_BYTES: & =
include_bytes!;
// ── Romanize page tables ─────────────────────────────────────────────────
/// L1 index for the Romanize 2-stage page table (`u16[4352]`, little-endian).
pub const ROMANIZE_L1_BYTES: & =
include_bytes!;
/// L2 data for the Romanize 2-stage page table (`u32[num_pages * 256]`, little-endian).
///
/// Each entry packs `(offset << 8) | length` into a `u32`, pointing into [`ROMANIZE_STR_BYTES`].
pub const ROMANIZE_L2_BYTES: & =
include_bytes!;
/// Concatenated romanization strings referenced by [`ROMANIZE_L2_BYTES`].
pub const ROMANIZE_STR_BYTES: &str =
include_str!;
// ── EmojiNorm page tables ────────────────────────────────────────────────
/// L1 index for the EmojiNorm 2-stage page table (`u16[4352]`, little-endian).
pub const EMOJI_NORM_L1_BYTES: & =
include_bytes!;
/// L2 data for the EmojiNorm 2-stage page table (`u32[num_pages * 256]`, little-endian).
///
/// Each entry packs `(offset << 8) | length` into a `u32`, pointing into [`EMOJI_NORM_STR_BYTES`].
/// Length 0 means the codepoint is stripped (emoji modifiers).
pub const EMOJI_NORM_L2_BYTES: & =
include_bytes!;
/// Concatenated emoji name strings referenced by [`EMOJI_NORM_L2_BYTES`].
pub const EMOJI_NORM_STR_BYTES: &str =
include_str!;
// ── Delete BitSet ──────────────────────────────────────────────────────────
/// Flat 139 KB bitset (`u8[139264]`) covering all Unicode codepoints 0x0–0x10FFFF.
///
/// Bit `cp % 8` of byte `cp / 8` is set when codepoint `cp` should be removed by the
/// Delete step.
pub const DELETE_BITSET_BYTES: & =
include_bytes!;