Skip to main content

oxiui_text/
fallback.rs

1//! Font fallback chain with Unicode-range based script detection.
2//!
3//! When oxifont does not expose per-glyph availability queries, this module
4//! implements a conservative Unicode-range heuristic so that CJK, emoji, and
5//! Latin text can be routed to appropriate font families without linking a
6//! full font parser.
7
8// ── Unicode range helpers ─────────────────────────────────────────────────────
9
10/// Returns `true` when `ch` is in one of the CJK Unicode blocks:
11/// CJK Unified Ideographs, Hiragana, Katakana, Hangul, Bopomofo, CJK
12/// Compatibility, and related extension blocks.
13pub fn is_cjk(ch: char) -> bool {
14    matches!(ch,
15        // Hiragana
16        '\u{3040}'..='\u{309F}' |
17        // Katakana
18        '\u{30A0}'..='\u{30FF}' |
19        // Bopomofo
20        '\u{02EA}'..='\u{02EB}' |
21        '\u{3105}'..='\u{312F}' |
22        '\u{31A0}'..='\u{31BF}' |
23        // Hangul
24        '\u{1100}'..='\u{11FF}' |
25        '\u{302E}'..='\u{302F}' |
26        '\u{3131}'..='\u{318E}' |
27        '\u{3200}'..='\u{321E}' |
28        '\u{3260}'..='\u{327E}' |
29        '\u{A960}'..='\u{A97C}' |
30        '\u{AC00}'..='\u{D7A3}' |
31        '\u{D7B0}'..='\u{D7C6}' |
32        '\u{D7CB}'..='\u{D7FB}' |
33        '\u{FFA0}'..='\u{FFBE}' |
34        '\u{FFC2}'..='\u{FFC7}' |
35        '\u{FFCA}'..='\u{FFCF}' |
36        '\u{FFD2}'..='\u{FFD7}' |
37        '\u{FFDA}'..='\u{FFDC}' |
38        // CJK Radicals Supplement
39        '\u{2E80}'..='\u{2EFF}' |
40        // Kangxi Radicals
41        '\u{2F00}'..='\u{2FDF}' |
42        // Ideographic Description Characters
43        '\u{2FF0}'..='\u{2FFF}' |
44        // CJK Symbols and Punctuation
45        '\u{3000}'..='\u{303F}' |
46        // CJK Unified Ideographs Extension A
47        '\u{3400}'..='\u{4DBF}' |
48        // CJK Unified Ideographs
49        '\u{4E00}'..='\u{9FFF}' |
50        // Yi Syllables
51        '\u{A000}'..='\u{A48F}' |
52        // Yi Radicals
53        '\u{A490}'..='\u{A4CF}' |
54        // CJK Compatibility Ideographs
55        '\u{F900}'..='\u{FAFF}' |
56        // CJK Compatibility Forms
57        '\u{FE30}'..='\u{FE4F}' |
58        // CJK Unified Ideographs Extension B-H
59        '\u{20000}'..='\u{3134F}'
60    )
61}
62
63/// Returns `true` when `ch` is in a standard emoji Unicode block.
64pub fn is_emoji(ch: char) -> bool {
65    matches!(ch,
66        // Emoticons
67        '\u{1F600}'..='\u{1F64F}' |
68        // Miscellaneous Symbols and Pictographs
69        '\u{1F300}'..='\u{1F5FF}' |
70        // Transport and Map Symbols
71        '\u{1F680}'..='\u{1F6FF}' |
72        // Supplemental Symbols and Pictographs
73        '\u{1F900}'..='\u{1F9FF}' |
74        // Symbols and Pictographs Extended-A
75        '\u{1FA00}'..='\u{1FA6F}' |
76        '\u{1FA70}'..='\u{1FAFF}' |
77        // Enclosed Alphanumeric Supplement (emoji subset including Regional Indicators)
78        '\u{1F1E0}'..='\u{1F1FF}' |
79        // Dingbats
80        '\u{2702}'..='\u{27B0}' |
81        // Miscellaneous Symbols
82        '\u{2600}'..='\u{26FF}'
83    )
84}
85
86/// A font family paired with the Unicode-range predicate that covers it.
87pub type FamilyEntry = (String, fn(char) -> bool);
88
89// ── FallbackChain ─────────────────────────────────────────────────────────────
90
91/// An ordered list of font families used for glyph-level fallback.
92///
93/// The resolver walks the chain in order and returns the first family whose
94/// Unicode-range heuristic covers `ch`.  The last entry acts as a universal
95/// fallback ("tofu" / .notdef).
96pub struct FallbackChain {
97    /// Entries in priority order.  Each entry is `(family, predicate)`.
98    families: Vec<FamilyEntry>,
99}
100
101/// Universal accept predicate (tofu / last-resort).
102fn accept_all(_ch: char) -> bool {
103    true
104}
105
106impl FallbackChain {
107    /// Construct the default fallback chain:
108    ///
109    /// 1. CJK fonts  — covers Unified Ideographs, Kana, Hangul …
110    /// 2. Emoji font — covers emoji symbol ranges
111    /// 3. Latin / universal fallback
112    pub fn default_chain() -> Self {
113        Self {
114            families: vec![
115                ("Noto Sans CJK".to_owned(), is_cjk as fn(char) -> bool),
116                ("Noto Emoji".to_owned(), is_emoji as fn(char) -> bool),
117                ("DejaVu Sans".to_owned(), accept_all as fn(char) -> bool),
118            ],
119        }
120    }
121
122    /// Append a new family at the end of the chain (before the universal
123    /// fallback if one is present — this is handled automatically via the
124    /// chain-walk in [`Self::resolve_glyph`]).
125    pub fn add_family(&mut self, family: String) {
126        // Insert before the last (tofu) entry so it takes priority.
127        let len = self.families.len();
128        let insert_pos = if len > 0 { len - 1 } else { 0 };
129        self.families
130            .insert(insert_pos, (family, accept_all as fn(char) -> bool));
131    }
132
133    /// Return the name of the first family in the chain that can render `ch`.
134    ///
135    /// Returns `None` only when the chain is empty (which the default chain
136    /// never produces).
137    pub fn resolve_glyph(&self, ch: char) -> Option<&str> {
138        for (family, predicate) in &self.families {
139            if predicate(ch) {
140                return Some(family.as_str());
141            }
142        }
143        None
144    }
145
146    /// Borrow the family list.
147    pub fn families(&self) -> &[FamilyEntry] {
148        &self.families
149    }
150}
151
152// ── Tests ─────────────────────────────────────────────────────────────────────
153
154#[cfg(test)]
155mod tests {
156    use super::*;
157
158    #[test]
159    fn fallback_cjk_detected() {
160        assert!(is_cjk('中'), "'中' must be CJK");
161        assert!(is_cjk('あ'), "'あ' (Hiragana) must be CJK");
162        assert!(!is_cjk('a'), "'a' must not be CJK");
163        assert!(!is_cjk('!'), "'!' must not be CJK");
164    }
165
166    #[test]
167    fn fallback_emoji_detected() {
168        assert!(is_emoji('😀'), "'😀' must be emoji");
169        assert!(is_emoji('🎉'), "'🎉' must be emoji");
170        assert!(!is_emoji('a'), "'a' must not be emoji");
171    }
172
173    #[test]
174    fn fallback_chain_has_entries() {
175        let chain = FallbackChain::default_chain();
176        assert!(
177            !chain.families().is_empty(),
178            "default chain must have entries"
179        );
180    }
181
182    #[test]
183    fn fallback_resolves_cjk_to_cjk_family() {
184        let chain = FallbackChain::default_chain();
185        let family = chain.resolve_glyph('中').unwrap();
186        assert!(
187            family.contains("CJK"),
188            "CJK char should resolve to a CJK family"
189        );
190    }
191
192    #[test]
193    fn fallback_resolves_emoji() {
194        let chain = FallbackChain::default_chain();
195        let family = chain.resolve_glyph('😀').unwrap();
196        assert!(
197            family.to_lowercase().contains("emoji"),
198            "emoji should resolve to emoji family"
199        );
200    }
201
202    #[test]
203    fn fallback_resolves_latin_to_last_resort() {
204        let chain = FallbackChain::default_chain();
205        // 'a' is not CJK or emoji → falls through to the last universal entry.
206        let family = chain.resolve_glyph('a').unwrap();
207        assert!(!family.is_empty());
208    }
209
210    #[test]
211    fn fallback_add_family_inserts_before_tofu() {
212        let mut chain = FallbackChain::default_chain();
213        let original_len = chain.families().len();
214        chain.add_family("My Custom Font".to_owned());
215        assert_eq!(chain.families().len(), original_len + 1);
216    }
217}