Skip to main content

egui_cjk_font/
lib.rs

1// SPDX-License-Identifier: MIT OR Apache-2.0
2
3//! egui-cjk-font
4//!
5//! A lightweight utility crate that loads a suitable system CJK font
6//! for egui / eframe applications on Windows, macOS, and Linux.
7//!
8//! ## Design goals
9//!
10//! - Automatically selects native platform UI fonts
11//! - Activates only for CJK locales (zh / ja / ko)
12//! - Provides two APIs to navigate egui's font initialization constraints
13//!
14//! ## Two usage modes
15//!
16//! This crate provides two APIs with different guarantees:
17//!
18//! - [`load_cjk_font`]: Safe to call at any time (including before `Context::run()`), but **does
19//!   not merge** with existing font configuration.
20//!
21//! - [`merge_cjk_font`]: Merges a CJK font into the existing egui font configuration, but **must be
22//!   called after the first `Context::run()`**.
23//!
24//! ## Important notes
25//!
26//! Due to egui API constraints, it is not possible to safely inspect
27//! existing font configuration before the first frame is rendered.
28//! As a result, merging fonts before `Context::run()` is fundamentally
29//! unsupported by egui.
30//!
31//! Choose the API that best matches your application's initialization flow.
32
33use {
34    egui::{Context, FontData, FontDefinitions, FontFamily},
35    font_kit::{
36        family_name::FamilyName,
37        handle::Handle,
38        properties::Properties,
39        source::SystemSource,
40    },
41    std::{fs::File, io::Read, sync::Arc},
42    sys_locale::get_locale,
43};
44
45const CJK_FONT_NAME: &str = "egui_cjk_font";
46
47#[derive(Debug, Clone)]
48struct LocaleInfo {
49    lang: String,
50    script: Option<String>,
51    region: Option<String>,
52}
53
54fn current_locale() -> LocaleInfo {
55    parse_locale(&get_locale().unwrap_or_else(|| "en-us".to_string()))
56}
57
58/// Parse a locale string into language, script, and region components.
59///
60/// Handles both POSIX (`zh_CN.UTF-8`, `zh_TW@calendar`) and BCP-47 (`zh-Hans-CN`) formats.
61/// All output values are lowercased for case-insensitive comparisons.
62///
63/// Examples:
64/// - `zh_CN.UTF-8` → lang=`zh`, script=`None`, region=`cn`
65/// - `zh-Hans-CN`  → lang=`zh`, script=`hans`, region=`cn`
66/// - `ja_JP`       → lang=`ja`, script=`None`, region=`jp`
67/// - `zh-Hant`     → lang=`zh`, script=`hant`, region=`None`
68/// - `zh-TW`       → lang=`zh`, script=`None`, region=`tw`
69fn parse_locale(raw: &str) -> LocaleInfo {
70    let raw = raw.split(['.', '@']).next().unwrap_or(raw).to_lowercase();
71
72    let parts: Vec<&str> = raw.split(&['-', '_'][..]).collect();
73
74    // BCP-47 script subtags are exactly 4 ASCII letters (e.g. "Hans", "Hant").
75    // Region subtags are 2 ASCII letters or 3 digits (e.g. "CN", "TW", "419").
76    let is_script = |s: &str| s.len() == 4 && s.chars().all(|c| c.is_ascii_alphabetic());
77
78    match parts.len() {
79        1 => LocaleInfo {
80            lang: parts[0].to_string(),
81            script: None,
82            region: None,
83        },
84        2 => {
85            if is_script(parts[1]) {
86                LocaleInfo {
87                    lang: parts[0].to_string(),
88                    script: Some(parts[1].to_string()),
89                    region: None,
90                }
91            } else {
92                LocaleInfo {
93                    lang: parts[0].to_string(),
94                    script: None,
95                    region: Some(parts[1].to_string()),
96                }
97            }
98        }
99        _ => {
100            // For 3+ subtags, apply the same 4-letter heuristic to parts[1].
101            // This handles "zh-Hant-TW" (script first) and "zh-TW-variant" (region first).
102            let (script, region_idx) = if is_script(parts[1]) {
103                (Some(parts[1].to_string()), 2)
104            } else {
105                (None, 1)
106            };
107            LocaleInfo {
108                lang: parts[0].to_string(),
109                script,
110                region: parts.get(region_idx).map(|&s| s.to_string()),
111            }
112        }
113    }
114}
115
116fn is_cjk(locale: &LocaleInfo) -> bool { matches!(locale.lang.as_str(), "zh" | "ja" | "ko") }
117
118/// Load a suitable system CJK font into the egui context.
119///
120/// This function is safe to call before or after `egui::Context::run()`.
121/// It does nothing for non-CJK locales.
122///
123/// The font is installed using a fresh `FontDefinitions` based on egui
124/// defaults, with the CJK font inserted at highest priority in the
125/// Proportional family and appended as a fallback in Monospace.
126/// Existing font configuration is not inspected or merged.
127///
128/// If your application customizes fonts manually, consider using
129/// [`merge_cjk_font`] instead.
130pub fn load_cjk_font(ctx: &Context) {
131    let locale = current_locale();
132    if !is_cjk(&locale) {
133        return;
134    }
135
136    if let Some(font_data) = find_cjk_font(&locale) {
137        apply_font(ctx, font_data);
138    }
139}
140
141/// Merge a suitable system CJK font into the existing egui font configuration.
142///
143/// ## Contract
144///
145/// This function **must be called after the first `egui::Context::run()`**.
146/// Calling it before egui has rendered its first frame may produce incorrect
147/// results or panic, because the font system has not yet been fully initialized.
148///
149/// ## Behavior
150///
151/// - Reads existing font definitions from egui.
152/// - Inserts the CJK font at **position 0** of the Proportional family (highest priority), so CJK
153///   glyphs are preferred over any previously configured font. For the Monospace family, the CJK
154///   font is appended as a low-priority fallback.
155/// - Does not remove any user-defined fonts.
156/// - Safe to call multiple times; subsequent calls after the font is already registered are no-ops
157///   (idempotent).
158///
159/// ## Typical usage
160///
161/// Call this from `App::ui` during the first frame, or any time after
162/// egui has started rendering.
163pub fn merge_cjk_font(ctx: &Context) {
164    let locale = current_locale();
165    if !is_cjk(&locale) {
166        return;
167    }
168
169    let Some(font_data) = find_cjk_font(&locale) else {
170        return;
171    };
172
173    ctx.fonts(|f| {
174        let mut defs = f.definitions().clone();
175
176        // Idempotency guard: skip if the CJK font was already registered.
177        if defs.font_data.contains_key(CJK_FONT_NAME) {
178            return;
179        }
180
181        defs.font_data.insert(
182            CJK_FONT_NAME.to_string(),
183            Arc::new(FontData::from_owned(font_data)),
184        );
185
186        // Proportional: CJK at highest priority so all CJK glyphs render correctly.
187        if let Some(family) = defs.families.get_mut(&FontFamily::Proportional) {
188            family.insert(0, CJK_FONT_NAME.to_string());
189        }
190
191        // Monospace: CJK as a low-priority fallback to preserve the user's code font.
192        if let Some(family) = defs.families.get_mut(&FontFamily::Monospace) {
193            family.push(CJK_FONT_NAME.to_string());
194        }
195
196        ctx.set_fonts(defs);
197    });
198}
199
200/// Find the most suitable system CJK font for the given locale.
201///
202/// Returns the raw font bytes on success, or `None` if no suitable font
203/// could be located or loaded from the system font registry.
204fn find_cjk_font(locale: &LocaleInfo) -> Option<Vec<u8>> {
205    let source = SystemSource::new();
206
207    let priorities: &[&str] = match locale.lang.as_str() {
208        "zh" => match (locale.script.as_deref(), locale.region.as_deref()) {
209            (Some("hant"), _) | (_, Some("tw" | "hk" | "mo")) => &[
210                "Microsoft JhengHei",
211                "PingFang TC",
212                "PingFang HK",
213                "Noto Sans CJK TC",
214                "Source Han Sans TC",
215            ],
216            _ => &[
217                "Microsoft YaHei",
218                "PingFang SC",
219                "Noto Sans CJK SC",
220                "Source Han Sans SC",
221            ],
222        },
223        "ja" => &["Yu Gothic", "Meiryo", "Hiragino Sans", "Noto Sans CJK JP"],
224        "ko" => &["Malgun Gothic", "Apple SD Gothic Neo", "Noto Sans CJK KR"],
225        _ => &[],
226    };
227
228    for name in priorities {
229        if let Some(data) = load_by_family_name(&source, name) {
230            return Some(data);
231        }
232    }
233
234    // Generic CJK fallbacks tried regardless of locale.
235    for name in [
236        "Noto Sans CJK",
237        "Source Han Sans",
238        "WenQuanYi Micro Hei",
239        "Sarasa Gothic SC",
240        "Droid Sans Fallback",
241    ] {
242        if let Some(data) = load_by_family_name(&source, name) {
243            return Some(data);
244        }
245    }
246
247    None
248}
249
250fn load_by_family_name(source: &SystemSource, name: &str) -> Option<Vec<u8>> {
251    source
252        .select_best_match(&[FamilyName::Title(name.to_string())], &Properties::new())
253        .ok()
254        .and_then(load_handle)
255}
256
257fn load_handle(handle: Handle) -> Option<Vec<u8>> {
258    match handle {
259        Handle::Memory { bytes, .. } => Some(bytes.to_vec()),
260        Handle::Path { path, .. } => {
261            let mut buf = Vec::new();
262            File::open(path).ok()?.read_to_end(&mut buf).ok()?;
263            Some(buf)
264        }
265    }
266}
267
268fn apply_font(ctx: &Context, font_data: Vec<u8>) {
269    if font_data.is_empty() {
270        return;
271    }
272
273    let mut fonts = FontDefinitions::default();
274
275    fonts.font_data.insert(
276        CJK_FONT_NAME.to_string(),
277        Arc::new(FontData::from_owned(font_data)),
278    );
279
280    if let Some(family) = fonts.families.get_mut(&FontFamily::Proportional) {
281        family.insert(0, CJK_FONT_NAME.to_string());
282    }
283
284    if let Some(family) = fonts.families.get_mut(&FontFamily::Monospace) {
285        family.push(CJK_FONT_NAME.to_string());
286    }
287
288    ctx.set_fonts(fonts);
289}
290
291#[cfg(test)]
292mod tests {
293    use super::*;
294
295    #[test]
296    fn parse_locale_variants() {
297        let l = parse_locale("zh-Hans-CN.UTF-8");
298        assert_eq!(l.lang, "zh");
299        assert_eq!(l.script.as_deref(), Some("hans"));
300        assert_eq!(l.region.as_deref(), Some("cn"));
301
302        let l = parse_locale("ja_JP");
303        assert_eq!(l.lang, "ja");
304        assert_eq!(l.script.as_deref(), None);
305        assert_eq!(l.region.as_deref(), Some("jp"));
306
307        let l = parse_locale("zh_TW");
308        assert_eq!(l.lang, "zh");
309        assert_eq!(l.script.as_deref(), None);
310        assert_eq!(l.region.as_deref(), Some("tw"));
311    }
312
313    #[test]
314    fn parse_locale_hant_script_is_not_region() {
315        let l = parse_locale("zh-Hant");
316        assert_eq!(l.lang, "zh");
317        assert_eq!(l.script.as_deref(), Some("hant"));
318        assert_eq!(l.region, None);
319
320        let l = parse_locale("zh-Hans");
321        assert_eq!(l.lang, "zh");
322        assert_eq!(l.script.as_deref(), Some("hans"));
323        assert_eq!(l.region, None);
324    }
325
326    /// Regression: when parts[1] is a 2-char region (not a 4-char script), it
327    /// must be stored as region, not script. Prevents incorrect font selection
328    /// for locales like "zh-TW-variant" (Traditional Chinese must not fall back
329    /// to Simplified Chinese fonts).
330    #[test]
331    fn parse_locale_region_before_variant() {
332        let l = parse_locale("zh-TW-variant");
333        assert_eq!(l.lang, "zh");
334        assert_eq!(l.script, None);
335        assert_eq!(l.region.as_deref(), Some("tw"));
336
337        let l = parse_locale("zh-CN-variant");
338        assert_eq!(l.lang, "zh");
339        assert_eq!(l.script, None);
340        assert_eq!(l.region.as_deref(), Some("cn"));
341    }
342
343    #[test]
344    fn parse_locale_script_and_region() {
345        let l = parse_locale("zh-Hant-TW");
346        assert_eq!(l.lang, "zh");
347        assert_eq!(l.script.as_deref(), Some("hant"));
348        assert_eq!(l.region.as_deref(), Some("tw"));
349
350        let l = parse_locale("zh-Hans-CN");
351        assert_eq!(l.lang, "zh");
352        assert_eq!(l.script.as_deref(), Some("hans"));
353        assert_eq!(l.region.as_deref(), Some("cn"));
354    }
355
356    #[test]
357    fn parse_locale_single_part() {
358        let l = parse_locale("zh");
359        assert_eq!(l.lang, "zh");
360        assert_eq!(l.script, None);
361        assert_eq!(l.region, None);
362
363        let l = parse_locale("ja");
364        assert_eq!(l.lang, "ja");
365        assert_eq!(l.script, None);
366        assert_eq!(l.region, None);
367    }
368
369    #[test]
370    fn parse_locale_numeric_region() {
371        let l = parse_locale("es-419");
372        assert_eq!(l.lang, "es");
373        assert_eq!(l.script, None);
374        assert_eq!(l.region.as_deref(), Some("419"));
375    }
376
377    #[test]
378    fn parse_locale_posix_modifier_is_ignored() {
379        let l = parse_locale("zh_TW@calendar");
380        assert_eq!(l.lang, "zh");
381        assert_eq!(l.script, None);
382        assert_eq!(l.region.as_deref(), Some("tw"));
383
384        let l = parse_locale("zh_Hant@calendar");
385        assert_eq!(l.lang, "zh");
386        assert_eq!(l.script.as_deref(), Some("hant"));
387        assert_eq!(l.region, None);
388    }
389
390    #[test]
391    fn non_cjk_locale_detection() {
392        let l = parse_locale("en-US");
393        assert!(!is_cjk(&l));
394    }
395
396    #[test]
397    fn apply_font_no_panic_with_empty_data() { apply_font(&Context::default(), Vec::new()); }
398
399    #[test]
400    fn is_cjk_detection() {
401        assert!(is_cjk(&parse_locale("zh-CN")));
402        assert!(is_cjk(&parse_locale("ja-JP")));
403        assert!(is_cjk(&parse_locale("ko-KR")));
404        assert!(!is_cjk(&parse_locale("en-US")));
405        assert!(!is_cjk(&parse_locale("de-DE")));
406    }
407}