oxitext_shape/script_detect.rs
1//! Script detection helpers for oxitext-shape.
2//!
3//! Provides lightweight Unicode block-range checks to determine whether text
4//! contains characters from a given script family, without requiring a full
5//! ICU4X or Unicode database lookup. These functions are used internally by
6//! [`crate::SwashShaper::shape_request`] to apply smart direction defaults and
7//! may also be called directly by consumers.
8
9// ──────────────────────────────────────────────────────────────────────────────
10// Public helpers
11// ──────────────────────────────────────────────────────────────────────────────
12
13/// Returns `true` if `text` contains Arabic/RTL characters requiring complex GSUB.
14///
15/// Covers the Arabic block (U+0600–U+06FF), Arabic Supplement (U+0750–U+077F),
16/// and both Arabic Presentation Forms blocks (U+FB50–U+FDFF and U+FE70–U+FEFF).
17pub fn requires_arabic_shaping(text: &str) -> bool {
18 text.chars().any(|c| {
19 let cp = c as u32;
20 (0x0600..=0x06FF).contains(&cp)
21 || (0x0750..=0x077F).contains(&cp)
22 || (0xFB50..=0xFDFF).contains(&cp)
23 || (0xFE70..=0xFEFF).contains(&cp)
24 })
25}
26
27/// Returns `true` if `text` contains Indic characters needing conjunct handling.
28///
29/// Covers Devanagari (U+0900–U+097F), Bengali (U+0980–U+09FF),
30/// Tamil (U+0B80–U+0BFF), Telugu (U+0C00–U+0C7F), and Kannada (U+0C80–U+0CFF).
31pub fn requires_indic_shaping(text: &str) -> bool {
32 text.chars().any(|c| {
33 let cp = c as u32;
34 (0x0900..=0x097F).contains(&cp)
35 || (0x0980..=0x09FF).contains(&cp)
36 || (0x0B80..=0x0BFF).contains(&cp)
37 || (0x0C00..=0x0C7F).contains(&cp)
38 || (0x0C80..=0x0CFF).contains(&cp)
39 })
40}
41
42/// Returns `true` if `text` requires mark positioning (Thai, Khmer, Myanmar).
43///
44/// Covers Thai (U+0E00–U+0E7F), Khmer (U+1780–U+17FF), and Myanmar (U+1000–U+109F).
45pub fn requires_mark_positioning(text: &str) -> bool {
46 text.chars().any(|c| {
47 let cp = c as u32;
48 (0x0E00..=0x0E7F).contains(&cp)
49 || (0x1780..=0x17FF).contains(&cp)
50 || (0x1000..=0x109F).contains(&cp)
51 })
52}