Skip to main content

oxitext_shape/
script_detect.rs

1//! Script detection helpers for oxitext-shape.
2//!
3//! Provides lightweight Unicode block-range checks to determine whether text
4//! contains characters from a given script family, without requiring a full
5//! ICU4X or Unicode database lookup.  These functions are used internally by
6//! [`crate::SwashShaper::shape_request`] to apply smart direction defaults and
7//! may also be called directly by consumers.
8
9// ──────────────────────────────────────────────────────────────────────────────
10// Public helpers
11// ──────────────────────────────────────────────────────────────────────────────
12
13/// Returns `true` if `text` contains Arabic/RTL characters requiring complex GSUB.
14///
15/// Covers the Arabic block (U+0600–U+06FF), Arabic Supplement (U+0750–U+077F),
16/// and both Arabic Presentation Forms blocks (U+FB50–U+FDFF and U+FE70–U+FEFF).
17pub fn requires_arabic_shaping(text: &str) -> bool {
18    text.chars().any(|c| {
19        let cp = c as u32;
20        (0x0600..=0x06FF).contains(&cp)
21            || (0x0750..=0x077F).contains(&cp)
22            || (0xFB50..=0xFDFF).contains(&cp)
23            || (0xFE70..=0xFEFF).contains(&cp)
24    })
25}
26
27/// Returns `true` if `text` contains Indic characters needing conjunct handling.
28///
29/// Covers Devanagari (U+0900–U+097F), Bengali (U+0980–U+09FF),
30/// Tamil (U+0B80–U+0BFF), Telugu (U+0C00–U+0C7F), and Kannada (U+0C80–U+0CFF).
31pub fn requires_indic_shaping(text: &str) -> bool {
32    text.chars().any(|c| {
33        let cp = c as u32;
34        (0x0900..=0x097F).contains(&cp)
35            || (0x0980..=0x09FF).contains(&cp)
36            || (0x0B80..=0x0BFF).contains(&cp)
37            || (0x0C00..=0x0C7F).contains(&cp)
38            || (0x0C80..=0x0CFF).contains(&cp)
39    })
40}
41
42/// Returns `true` if `text` requires mark positioning (Thai, Khmer, Myanmar).
43///
44/// Covers Thai (U+0E00–U+0E7F), Khmer (U+1780–U+17FF), and Myanmar (U+1000–U+109F).
45pub fn requires_mark_positioning(text: &str) -> bool {
46    text.chars().any(|c| {
47        let cp = c as u32;
48        (0x0E00..=0x0E7F).contains(&cp)
49            || (0x1780..=0x17FF).contains(&cp)
50            || (0x1000..=0x109F).contains(&cp)
51    })
52}