Skip to main content

rust_fontconfig/
lib.rs

1//! # rust-fontconfig
2//!
3//! Pure-Rust rewrite of the Linux fontconfig library (no system dependencies). Enable the `parsing` feature to parse `.woff`, `.woff2`, `.ttc`, `.otf` and `.ttf` with allsorts.
4//!
5//! **NOTE**: Also works on Windows, macOS and WASM - without external dependencies!
6//!
7//! ## Usage
8//!
9//! ### Basic Font Query
10//!
11//! ```rust,no_run
12//! use rust_fontconfig::{FcFontCache, FcPattern};
13//!
14//! fn main() {
15//!     // Build the font cache
16//!     let cache = FcFontCache::build();
17//!
18//!     // Query a font by name
19//!     let results = cache.query(
20//!         &FcPattern {
21//!             name: Some(String::from("Arial")),
22//!             ..Default::default()
23//!         },
24//!         &mut Vec::new() // Trace messages container
25//!     );
26//!
27//!     if let Some(font_match) = results {
28//!         println!("Font match ID: {:?}", font_match.id);
29//!         println!("Font unicode ranges: {:?}", font_match.unicode_ranges);
30//!     } else {
31//!         println!("No matching font found");
32//!     }
33//! }
34//! ```
35//!
36//! ### Resolve Font Chain and Query for Text
37//!
38//! ```rust,no_run
39//! use rust_fontconfig::{FcFontCache, FcWeight, PatternMatch};
40//!
41//! fn main() {
42//!     # #[cfg(feature = "std")]
43//!     # {
44//!     let cache = FcFontCache::build();
45//!
46//!     // Build font fallback chain (without text parameter)
47//!     let font_chain = cache.resolve_font_chain(
48//!         &["Arial".to_string(), "sans-serif".to_string()],
49//!         FcWeight::Normal,
50//!         PatternMatch::DontCare,
51//!         PatternMatch::DontCare,
52//!         &mut Vec::new(),
53//!     );
54//!
55//!     // Query which fonts to use for specific text
56//!     let text = "Hello 你好 Здравствуйте";
57//!     let font_runs = font_chain.query_for_text(&cache, text);
58//!
59//!     println!("Text split into {} font runs:", font_runs.len());
60//!     for run in font_runs {
61//!         println!("  '{}' -> font {:?}", run.text, run.font_id);
62//!     }
63//!     # }
64//! }
65//! ```
66
67#![allow(non_snake_case)]
68
69// As of v4.1 this crate is std-only. The v4.0 `no_std` path is gone —
70// it never supported the registry / multi-thread parsing anyway, and
71// the shared-state `FcFontCache` refactor depends on `std::sync::RwLock`
72// which is unavailable without std. Keeping the `alloc::` import paths
73// means the existing call sites in this file and submodules keep
74// compiling — in std builds `alloc` is just `core::alloc`'s companion
75// crate already linked by the standard library.
76extern crate alloc;
77
78use alloc::collections::btree_map::BTreeMap;
79use alloc::string::{String, ToString};
80use alloc::vec::Vec;
81#[cfg(all(feature = "std", feature = "parsing"))]
82use allsorts::binary::read::ReadScope;
83#[cfg(all(feature = "std", feature = "parsing"))]
84use allsorts::get_name::fontcode_get_name;
85#[cfg(all(feature = "std", feature = "parsing"))]
86use allsorts::tables::os2::Os2;
87#[cfg(all(feature = "std", feature = "parsing"))]
88use allsorts::tables::{FontTableProvider, HheaTable, HmtxTable, MaxpTable};
89#[cfg(all(feature = "std", feature = "parsing"))]
90use allsorts::tag;
91#[cfg(feature = "std")]
92use std::path::PathBuf;
93
94pub mod utils;
95#[cfg(feature = "std")]
96pub mod config;
97
98#[cfg(feature = "ffi")]
99pub mod ffi;
100
101#[cfg(feature = "async-registry")]
102pub mod scoring;
103#[cfg(feature = "async-registry")]
104pub mod registry;
105#[cfg(feature = "async-registry")]
106pub mod multithread;
107#[cfg(feature = "cache")]
108pub mod disk_cache;
109
110#[cfg(all(target_os = "ios", feature = "std", feature = "parsing"))]
111mod mobile_ios;
112
113/// Operating system type for generic font family resolution
114#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash)]
115pub enum OperatingSystem {
116    Windows,
117    Linux,
118    MacOS,
119    IOS,
120    Android,
121    Wasm,
122}
123
124impl OperatingSystem {
125    /// Detect the current operating system at compile time
126    pub fn current() -> Self {
127        #[cfg(target_os = "windows")]
128        return OperatingSystem::Windows;
129
130        #[cfg(target_os = "linux")]
131        return OperatingSystem::Linux;
132
133        #[cfg(target_os = "macos")]
134        return OperatingSystem::MacOS;
135
136        #[cfg(target_os = "ios")]
137        return OperatingSystem::IOS;
138
139        #[cfg(target_os = "android")]
140        return OperatingSystem::Android;
141
142        #[cfg(target_family = "wasm")]
143        return OperatingSystem::Wasm;
144
145        #[cfg(not(any(target_os = "windows", target_os = "linux", target_os = "macos", target_os = "ios", target_os = "android", target_family = "wasm")))]
146        return OperatingSystem::Linux; // Default fallback
147    }
148    
149    /// Get system-specific fonts for the "serif" generic family
150    /// Prioritizes fonts based on Unicode range coverage
151    pub fn get_serif_fonts(&self, unicode_ranges: &[UnicodeRange]) -> Vec<String> {
152        let has_cjk = has_cjk_ranges(unicode_ranges);
153        let has_arabic = has_arabic_ranges(unicode_ranges);
154        let _has_cyrillic = has_cyrillic_ranges(unicode_ranges);
155        
156        match self {
157            OperatingSystem::Windows => {
158                let mut fonts = Vec::new();
159                if has_cjk {
160                    fonts.extend_from_slice(&["MS Mincho", "SimSun", "MingLiU"]);
161                }
162                if has_arabic {
163                    fonts.push("Traditional Arabic");
164                }
165                fonts.push("Times New Roman");
166                fonts.iter().map(|s| s.to_string()).collect()
167            }
168            OperatingSystem::Linux => {
169                let mut fonts = Vec::new();
170                if has_cjk {
171                    fonts.extend_from_slice(&["Noto Serif CJK SC", "Noto Serif CJK JP", "Noto Serif CJK KR"]);
172                }
173                if has_arabic {
174                    fonts.push("Noto Serif Arabic");
175                }
176                fonts.extend_from_slice(&[
177                    "Times", "Times New Roman", "DejaVu Serif", "Free Serif", 
178                    "Noto Serif", "Bitstream Vera Serif", "Roman", "Regular"
179                ]);
180                fonts.iter().map(|s| s.to_string()).collect()
181            }
182            OperatingSystem::MacOS | OperatingSystem::IOS => {
183                let mut fonts = Vec::new();
184                if has_cjk {
185                    fonts.extend_from_slice(&["Hiragino Mincho ProN", "STSong", "AppleMyungjo"]);
186                }
187                if has_arabic {
188                    fonts.push("Geeza Pro");
189                }
190                fonts.extend_from_slice(&["Times New Roman", "Times", "New York", "Palatino"]);
191                fonts.iter().map(|s| s.to_string()).collect()
192            }
193            OperatingSystem::Android => {
194                let mut fonts = Vec::new();
195                if has_cjk {
196                    fonts.extend_from_slice(&["Noto Serif CJK SC", "Noto Serif CJK JP", "Noto Serif CJK KR"]);
197                }
198                if has_arabic {
199                    fonts.push("Noto Naskh Arabic");
200                }
201                fonts.extend_from_slice(&["Noto Serif", "Roboto Serif", "Droid Serif"]);
202                fonts.iter().map(|s| s.to_string()).collect()
203            }
204            OperatingSystem::Wasm => Vec::new(),
205        }
206    }
207
208    /// Get system-specific fonts for the "sans-serif" generic family
209    /// Prioritizes fonts based on Unicode range coverage
210    pub fn get_sans_serif_fonts(&self, unicode_ranges: &[UnicodeRange]) -> Vec<String> {
211        let has_cjk = has_cjk_ranges(unicode_ranges);
212        let has_arabic = has_arabic_ranges(unicode_ranges);
213        let _has_cyrillic = has_cyrillic_ranges(unicode_ranges);
214        let has_hebrew = has_hebrew_ranges(unicode_ranges);
215        let has_thai = has_thai_ranges(unicode_ranges);
216        
217        match self {
218            OperatingSystem::Windows => {
219                let mut fonts = Vec::new();
220                if has_cjk {
221                    fonts.extend_from_slice(&["Microsoft YaHei", "MS Gothic", "Malgun Gothic", "SimHei"]);
222                }
223                if has_arabic {
224                    fonts.push("Segoe UI Arabic");
225                }
226                if has_hebrew {
227                    fonts.push("Segoe UI Hebrew");
228                }
229                if has_thai {
230                    fonts.push("Leelawadee UI");
231                }
232                fonts.extend_from_slice(&["Segoe UI", "Tahoma", "Microsoft Sans Serif", "MS Sans Serif", "Helv"]);
233                fonts.iter().map(|s| s.to_string()).collect()
234            }
235            OperatingSystem::Linux => {
236                let mut fonts = Vec::new();
237                if has_cjk {
238                    fonts.extend_from_slice(&[
239                        "Noto Sans CJK SC", "Noto Sans CJK JP", "Noto Sans CJK KR",
240                        "WenQuanYi Micro Hei", "Droid Sans Fallback"
241                    ]);
242                }
243                if has_arabic {
244                    fonts.push("Noto Sans Arabic");
245                }
246                if has_hebrew {
247                    fonts.push("Noto Sans Hebrew");
248                }
249                if has_thai {
250                    fonts.push("Noto Sans Thai");
251                }
252                fonts.extend_from_slice(&["Ubuntu", "Arial", "DejaVu Sans", "Noto Sans", "Liberation Sans"]);
253                fonts.iter().map(|s| s.to_string()).collect()
254            }
255            OperatingSystem::MacOS | OperatingSystem::IOS => {
256                let mut fonts = Vec::new();
257                if has_cjk {
258                    fonts.extend_from_slice(&[
259                        "Hiragino Sans", "Hiragino Kaku Gothic ProN",
260                        "PingFang SC", "PingFang TC", "Apple SD Gothic Neo"
261                    ]);
262                }
263                if has_arabic {
264                    fonts.push("Geeza Pro");
265                }
266                if has_hebrew {
267                    fonts.push("Arial Hebrew");
268                }
269                if has_thai {
270                    fonts.push("Thonburi");
271                }
272                fonts.extend_from_slice(&[
273                    "San Francisco", ".AppleSystemUIFont", ".SFUIText", ".SFUI-Regular",
274                    "Helvetica Neue", "Helvetica", "Lucida Grande",
275                ]);
276                fonts.iter().map(|s| s.to_string()).collect()
277            }
278            OperatingSystem::Android => {
279                let mut fonts = Vec::new();
280                if has_cjk {
281                    fonts.extend_from_slice(&[
282                        "Noto Sans CJK SC", "Noto Sans CJK JP", "Noto Sans CJK KR",
283                        "Droid Sans Fallback",
284                    ]);
285                }
286                if has_arabic {
287                    fonts.push("Noto Sans Arabic");
288                }
289                if has_hebrew {
290                    fonts.push("Noto Sans Hebrew");
291                }
292                if has_thai {
293                    fonts.push("Noto Sans Thai");
294                }
295                fonts.extend_from_slice(&[
296                    "Roboto", "Roboto-Regular", "Noto Sans", "Droid Sans",
297                ]);
298                fonts.iter().map(|s| s.to_string()).collect()
299            }
300            OperatingSystem::Wasm => Vec::new(),
301        }
302    }
303
304    /// Get system-specific fonts for the "monospace" generic family
305    /// Prioritizes fonts based on Unicode range coverage
306    pub fn get_monospace_fonts(&self, unicode_ranges: &[UnicodeRange]) -> Vec<String> {
307        let has_cjk = has_cjk_ranges(unicode_ranges);
308        
309        match self {
310            OperatingSystem::Windows => {
311                let mut fonts = Vec::new();
312                if has_cjk {
313                    fonts.extend_from_slice(&["MS Gothic", "SimHei"]);
314                }
315                fonts.extend_from_slice(&["Segoe UI Mono", "Courier New", "Cascadia Code", "Cascadia Mono", "Consolas"]);
316                fonts.iter().map(|s| s.to_string()).collect()
317            }
318            OperatingSystem::Linux => {
319                let mut fonts = Vec::new();
320                if has_cjk {
321                    fonts.extend_from_slice(&["Noto Sans Mono CJK SC", "Noto Sans Mono CJK JP", "WenQuanYi Zen Hei Mono"]);
322                }
323                fonts.extend_from_slice(&[
324                    "Source Code Pro", "Cantarell", "DejaVu Sans Mono", 
325                    "Roboto Mono", "Ubuntu Monospace", "Droid Sans Mono"
326                ]);
327                fonts.iter().map(|s| s.to_string()).collect()
328            }
329            OperatingSystem::MacOS | OperatingSystem::IOS => {
330                let mut fonts = Vec::new();
331                if has_cjk {
332                    fonts.extend_from_slice(&["Hiragino Sans", "PingFang SC"]);
333                }
334                fonts.extend_from_slice(&["SF Mono", "Menlo", "Monaco", "Courier", "Oxygen Mono", "Source Code Pro", "Fira Mono"]);
335                fonts.iter().map(|s| s.to_string()).collect()
336            }
337            OperatingSystem::Android => {
338                let mut fonts = Vec::new();
339                if has_cjk {
340                    fonts.extend_from_slice(&["Noto Sans Mono CJK SC", "Noto Sans Mono CJK JP"]);
341                }
342                fonts.extend_from_slice(&["Roboto Mono", "Droid Sans Mono", "Noto Sans Mono", "DejaVu Sans Mono"]);
343                fonts.iter().map(|s| s.to_string()).collect()
344            }
345            OperatingSystem::Wasm => Vec::new(),
346        }
347    }
348    
349    /// Expand a generic CSS font family to system-specific font names
350    /// Returns the original name if not a generic family
351    /// Prioritizes fonts based on Unicode range coverage
352    pub fn expand_generic_family(&self, family: &str, unicode_ranges: &[UnicodeRange]) -> Vec<String> {
353        match family.to_ascii_lowercase().as_str() {
354            "serif" => self.get_serif_fonts(unicode_ranges),
355            "sans-serif" => self.get_sans_serif_fonts(unicode_ranges),
356            "monospace" => self.get_monospace_fonts(unicode_ranges),
357            "cursive" | "fantasy" | "system-ui" => {
358                // Use sans-serif as fallback for these
359                self.get_sans_serif_fonts(unicode_ranges)
360            }
361            _ => vec![family.to_string()],
362        }
363    }
364}
365
366/// Expand a CSS font-family stack with generic families resolved to OS-specific fonts
367/// Prioritizes fonts based on Unicode range coverage
368/// Example: ["Arial", "sans-serif"] on macOS with CJK ranges -> ["Arial", "PingFang SC", "Hiragino Sans", ...]
369pub fn expand_font_families(families: &[String], os: OperatingSystem, unicode_ranges: &[UnicodeRange]) -> Vec<String> {
370    let mut expanded = Vec::new();
371    
372    for family in families {
373        expanded.extend(os.expand_generic_family(family, unicode_ranges));
374    }
375    
376    expanded
377}
378
379/// UUID to identify a font (collections are broken up into separate fonts)
380#[derive(Clone, Copy, PartialOrd, Ord, PartialEq, Eq, Hash)]
381#[cfg_attr(feature = "cache", derive(serde::Serialize, serde::Deserialize))]
382pub struct FontId(pub u128);
383
384impl core::fmt::Debug for FontId {
385    fn fmt(&self, f: &mut core::fmt::Formatter<'_>) -> core::fmt::Result {
386        core::fmt::Display::fmt(self, f)
387    }
388}
389
390impl core::fmt::Display for FontId {
391    fn fmt(&self, f: &mut core::fmt::Formatter<'_>) -> core::fmt::Result {
392        let id = self.0;
393        write!(
394            f,
395            "{:08x}-{:04x}-{:04x}-{:04x}-{:012x}",
396            (id >> 96) & 0xFFFFFFFF,
397            (id >> 80) & 0xFFFF,
398            (id >> 64) & 0xFFFF,
399            (id >> 48) & 0xFFFF,
400            id & 0xFFFFFFFFFFFF
401        )
402    }
403}
404
405impl FontId {
406    /// Generate a new unique FontId using an atomic counter
407    pub fn new() -> Self {
408        use core::sync::atomic::{AtomicU64, Ordering};
409        static COUNTER: AtomicU64 = AtomicU64::new(1);
410        let id = COUNTER.fetch_add(1, Ordering::Relaxed) as u128;
411        FontId(id)
412    }
413}
414
415/// Whether a field is required to match (yes / no / don't care)
416#[derive(Debug, Default, Copy, Clone, PartialOrd, Ord, PartialEq, Eq, Hash)]
417#[cfg_attr(feature = "cache", derive(serde::Serialize, serde::Deserialize))]
418#[repr(C)]
419pub enum PatternMatch {
420    /// Default: don't particularly care whether the requirement matches
421    #[default]
422    DontCare,
423    /// Requirement has to be true for the selected font
424    True,
425    /// Requirement has to be false for the selected font
426    False,
427}
428
429impl PatternMatch {
430    fn needs_to_match(&self) -> bool {
431        matches!(self, PatternMatch::True | PatternMatch::False)
432    }
433
434    fn matches(&self, other: &PatternMatch) -> bool {
435        match (self, other) {
436            (PatternMatch::DontCare, _) => true,
437            (_, PatternMatch::DontCare) => true,
438            (a, b) => a == b,
439        }
440    }
441}
442
443/// Font weight values as defined in CSS specification
444#[derive(Debug, Clone, Copy, PartialOrd, Ord, PartialEq, Eq, Hash)]
445#[cfg_attr(feature = "cache", derive(serde::Serialize, serde::Deserialize))]
446#[repr(C)]
447pub enum FcWeight {
448    Thin = 100,
449    ExtraLight = 200,
450    Light = 300,
451    Normal = 400,
452    Medium = 500,
453    SemiBold = 600,
454    Bold = 700,
455    ExtraBold = 800,
456    Black = 900,
457}
458
459impl FcWeight {
460    pub fn from_u16(weight: u16) -> Self {
461        match weight {
462            0..=149 => FcWeight::Thin,
463            150..=249 => FcWeight::ExtraLight,
464            250..=349 => FcWeight::Light,
465            350..=449 => FcWeight::Normal,
466            450..=549 => FcWeight::Medium,
467            550..=649 => FcWeight::SemiBold,
468            650..=749 => FcWeight::Bold,
469            750..=849 => FcWeight::ExtraBold,
470            _ => FcWeight::Black,
471        }
472    }
473
474    pub fn find_best_match(&self, available: &[FcWeight]) -> Option<FcWeight> {
475        if available.is_empty() {
476            return None;
477        }
478
479        // Exact match
480        if available.contains(self) {
481            return Some(*self);
482        }
483
484        // Get numeric value
485        let self_value = *self as u16;
486
487        match *self {
488            FcWeight::Normal => {
489                // For Normal (400), try Medium (500) first
490                if available.contains(&FcWeight::Medium) {
491                    return Some(FcWeight::Medium);
492                }
493                // Then try lighter weights
494                for weight in &[FcWeight::Light, FcWeight::ExtraLight, FcWeight::Thin] {
495                    if available.contains(weight) {
496                        return Some(*weight);
497                    }
498                }
499                // Last, try heavier weights
500                for weight in &[
501                    FcWeight::SemiBold,
502                    FcWeight::Bold,
503                    FcWeight::ExtraBold,
504                    FcWeight::Black,
505                ] {
506                    if available.contains(weight) {
507                        return Some(*weight);
508                    }
509                }
510            }
511            FcWeight::Medium => {
512                // For Medium (500), try Normal (400) first
513                if available.contains(&FcWeight::Normal) {
514                    return Some(FcWeight::Normal);
515                }
516                // Then try lighter weights
517                for weight in &[FcWeight::Light, FcWeight::ExtraLight, FcWeight::Thin] {
518                    if available.contains(weight) {
519                        return Some(*weight);
520                    }
521                }
522                // Last, try heavier weights
523                for weight in &[
524                    FcWeight::SemiBold,
525                    FcWeight::Bold,
526                    FcWeight::ExtraBold,
527                    FcWeight::Black,
528                ] {
529                    if available.contains(weight) {
530                        return Some(*weight);
531                    }
532                }
533            }
534            FcWeight::Thin | FcWeight::ExtraLight | FcWeight::Light => {
535                // For lightweight fonts (<400), first try lighter or equal weights
536                let mut best_match = None;
537                let mut smallest_diff = u16::MAX;
538
539                // Find the closest lighter weight
540                for weight in available {
541                    let weight_value = *weight as u16;
542                    // Only consider weights <= self (per test expectation)
543                    if weight_value <= self_value {
544                        let diff = self_value - weight_value;
545                        if diff < smallest_diff {
546                            smallest_diff = diff;
547                            best_match = Some(*weight);
548                        }
549                    }
550                }
551
552                if best_match.is_some() {
553                    return best_match;
554                }
555
556                // If no lighter weight, find the closest heavier weight
557                best_match = None;
558                smallest_diff = u16::MAX;
559
560                for weight in available {
561                    let weight_value = *weight as u16;
562                    if weight_value > self_value {
563                        let diff = weight_value - self_value;
564                        if diff < smallest_diff {
565                            smallest_diff = diff;
566                            best_match = Some(*weight);
567                        }
568                    }
569                }
570
571                return best_match;
572            }
573            FcWeight::SemiBold | FcWeight::Bold | FcWeight::ExtraBold | FcWeight::Black => {
574                // For heavyweight fonts (>500), first try heavier or equal weights
575                let mut best_match = None;
576                let mut smallest_diff = u16::MAX;
577
578                // Find the closest heavier weight
579                for weight in available {
580                    let weight_value = *weight as u16;
581                    // Only consider weights >= self
582                    if weight_value >= self_value {
583                        let diff = weight_value - self_value;
584                        if diff < smallest_diff {
585                            smallest_diff = diff;
586                            best_match = Some(*weight);
587                        }
588                    }
589                }
590
591                if best_match.is_some() {
592                    return best_match;
593                }
594
595                // If no heavier weight, find the closest lighter weight
596                best_match = None;
597                smallest_diff = u16::MAX;
598
599                for weight in available {
600                    let weight_value = *weight as u16;
601                    if weight_value < self_value {
602                        let diff = self_value - weight_value;
603                        if diff < smallest_diff {
604                            smallest_diff = diff;
605                            best_match = Some(*weight);
606                        }
607                    }
608                }
609
610                return best_match;
611            }
612        }
613
614        // If nothing matches by now, return the first available weight
615        Some(available[0])
616    }
617}
618
619impl Default for FcWeight {
620    fn default() -> Self {
621        FcWeight::Normal
622    }
623}
624
625/// CSS font-stretch values
626#[derive(Debug, Clone, Copy, PartialOrd, Ord, PartialEq, Eq, Hash)]
627#[cfg_attr(feature = "cache", derive(serde::Serialize, serde::Deserialize))]
628#[repr(C)]
629pub enum FcStretch {
630    UltraCondensed = 1,
631    ExtraCondensed = 2,
632    Condensed = 3,
633    SemiCondensed = 4,
634    Normal = 5,
635    SemiExpanded = 6,
636    Expanded = 7,
637    ExtraExpanded = 8,
638    UltraExpanded = 9,
639}
640
641impl FcStretch {
642    pub fn is_condensed(&self) -> bool {
643        use self::FcStretch::*;
644        match self {
645            UltraCondensed => true,
646            ExtraCondensed => true,
647            Condensed => true,
648            SemiCondensed => true,
649            Normal => false,
650            SemiExpanded => false,
651            Expanded => false,
652            ExtraExpanded => false,
653            UltraExpanded => false,
654        }
655    }
656    pub fn from_u16(width_class: u16) -> Self {
657        match width_class {
658            1 => FcStretch::UltraCondensed,
659            2 => FcStretch::ExtraCondensed,
660            3 => FcStretch::Condensed,
661            4 => FcStretch::SemiCondensed,
662            5 => FcStretch::Normal,
663            6 => FcStretch::SemiExpanded,
664            7 => FcStretch::Expanded,
665            8 => FcStretch::ExtraExpanded,
666            9 => FcStretch::UltraExpanded,
667            _ => FcStretch::Normal,
668        }
669    }
670
671    /// Follows CSS spec for stretch matching
672    pub fn find_best_match(&self, available: &[FcStretch]) -> Option<FcStretch> {
673        if available.is_empty() {
674            return None;
675        }
676
677        if available.contains(self) {
678            return Some(*self);
679        }
680
681        // For 'normal' or condensed values, narrower widths are checked first, then wider values
682        if *self <= FcStretch::Normal {
683            // Find narrower values first
684            let mut closest_narrower = None;
685            for stretch in available.iter() {
686                if *stretch < *self
687                    && (closest_narrower.is_none() || *stretch > closest_narrower.unwrap())
688                {
689                    closest_narrower = Some(*stretch);
690                }
691            }
692
693            if closest_narrower.is_some() {
694                return closest_narrower;
695            }
696
697            // Otherwise, find wider values
698            let mut closest_wider = None;
699            for stretch in available.iter() {
700                if *stretch > *self
701                    && (closest_wider.is_none() || *stretch < closest_wider.unwrap())
702                {
703                    closest_wider = Some(*stretch);
704                }
705            }
706
707            return closest_wider;
708        } else {
709            // For expanded values, wider values are checked first, then narrower values
710            let mut closest_wider = None;
711            for stretch in available.iter() {
712                if *stretch > *self
713                    && (closest_wider.is_none() || *stretch < closest_wider.unwrap())
714                {
715                    closest_wider = Some(*stretch);
716                }
717            }
718
719            if closest_wider.is_some() {
720                return closest_wider;
721            }
722
723            // Otherwise, find narrower values
724            let mut closest_narrower = None;
725            for stretch in available.iter() {
726                if *stretch < *self
727                    && (closest_narrower.is_none() || *stretch > closest_narrower.unwrap())
728                {
729                    closest_narrower = Some(*stretch);
730                }
731            }
732
733            return closest_narrower;
734        }
735    }
736}
737
738impl Default for FcStretch {
739    fn default() -> Self {
740        FcStretch::Normal
741    }
742}
743
744/// Unicode range representation for font matching
745#[repr(C)]
746#[derive(Debug, Clone, Copy, PartialEq, Eq, PartialOrd, Ord, Hash)]
747#[cfg_attr(feature = "cache", derive(serde::Serialize, serde::Deserialize))]
748pub struct UnicodeRange {
749    pub start: u32,
750    pub end: u32,
751}
752
753/// The default set of Unicode-block fallback scripts that
754/// [`FcFontCache::resolve_font_chain`] pulls in when no explicit
755/// `scripts_hint` is supplied.
756///
757/// Keeping this exposed lets callers that *do* want the default
758/// behaviour build the set explicitly — typically by union-ing it
759/// with a detected-from-document set before calling
760/// [`FcFontCache::resolve_font_chain_with_scripts`].
761pub const DEFAULT_UNICODE_FALLBACK_SCRIPTS: &[UnicodeRange] = &[
762    UnicodeRange { start: 0x0400, end: 0x04FF }, // Cyrillic
763    UnicodeRange { start: 0x0600, end: 0x06FF }, // Arabic
764    UnicodeRange { start: 0x0900, end: 0x097F }, // Devanagari
765    UnicodeRange { start: 0x3040, end: 0x309F }, // Hiragana
766    UnicodeRange { start: 0x30A0, end: 0x30FF }, // Katakana
767    UnicodeRange { start: 0x4E00, end: 0x9FFF }, // CJK Unified Ideographs
768    UnicodeRange { start: 0xAC00, end: 0xD7A3 }, // Hangul Syllables
769];
770
771impl UnicodeRange {
772    pub fn contains(&self, c: char) -> bool {
773        let c = c as u32;
774        c >= self.start && c <= self.end
775    }
776
777    pub fn overlaps(&self, other: &UnicodeRange) -> bool {
778        self.start <= other.end && other.start <= self.end
779    }
780
781    pub fn is_subset_of(&self, other: &UnicodeRange) -> bool {
782        self.start >= other.start && self.end <= other.end
783    }
784}
785
786/// Check if any range covers CJK Unified Ideographs, Hiragana, Katakana, or Hangul
787pub fn has_cjk_ranges(ranges: &[UnicodeRange]) -> bool {
788    ranges.iter().any(|r| {
789        (r.start >= 0x4E00 && r.start <= 0x9FFF) ||
790        (r.start >= 0x3040 && r.start <= 0x309F) ||
791        (r.start >= 0x30A0 && r.start <= 0x30FF) ||
792        (r.start >= 0xAC00 && r.start <= 0xD7AF)
793    })
794}
795
796/// Check if any range covers the Arabic block
797pub fn has_arabic_ranges(ranges: &[UnicodeRange]) -> bool {
798    ranges.iter().any(|r| r.start >= 0x0600 && r.start <= 0x06FF)
799}
800
801/// Check if any range covers the Cyrillic block
802pub fn has_cyrillic_ranges(ranges: &[UnicodeRange]) -> bool {
803    ranges.iter().any(|r| r.start >= 0x0400 && r.start <= 0x04FF)
804}
805
806/// Check if any range covers the Hebrew block
807pub fn has_hebrew_ranges(ranges: &[UnicodeRange]) -> bool {
808    ranges.iter().any(|r| r.start >= 0x0590 && r.start <= 0x05FF)
809}
810
811/// Check if any range covers the Thai block
812pub fn has_thai_ranges(ranges: &[UnicodeRange]) -> bool {
813    ranges.iter().any(|r| r.start >= 0x0E00 && r.start <= 0x0E7F)
814}
815
816/// Log levels for trace messages
817#[derive(Debug, Clone, Copy, PartialOrd, Ord, PartialEq, Eq, Hash)]
818pub enum TraceLevel {
819    Debug,
820    Info,
821    Warning,
822    Error,
823}
824
825/// Reason for font matching failure or success
826#[derive(Debug, Clone, PartialEq, Eq, Hash)]
827pub enum MatchReason {
828    NameMismatch {
829        requested: Option<String>,
830        found: Option<String>,
831    },
832    FamilyMismatch {
833        requested: Option<String>,
834        found: Option<String>,
835    },
836    StyleMismatch {
837        property: &'static str,
838        requested: String,
839        found: String,
840    },
841    WeightMismatch {
842        requested: FcWeight,
843        found: FcWeight,
844    },
845    StretchMismatch {
846        requested: FcStretch,
847        found: FcStretch,
848    },
849    UnicodeRangeMismatch {
850        character: char,
851        ranges: Vec<UnicodeRange>,
852    },
853    Success,
854}
855
856/// Trace message for debugging font matching
857#[derive(Debug, Clone, PartialEq, Eq)]
858pub struct TraceMsg {
859    pub level: TraceLevel,
860    pub path: String,
861    pub reason: MatchReason,
862}
863
864/// Hinting style for font rendering.
865#[repr(C)]
866#[derive(Debug, Default, Clone, Copy, PartialEq, Eq, PartialOrd, Ord, Hash)]
867#[cfg_attr(feature = "cache", derive(serde::Serialize, serde::Deserialize))]
868pub enum FcHintStyle {
869    #[default]
870    None = 0,
871    Slight = 1,
872    Medium = 2,
873    Full = 3,
874}
875
876/// Subpixel rendering order.
877#[repr(C)]
878#[derive(Debug, Default, Clone, Copy, PartialEq, Eq, PartialOrd, Ord, Hash)]
879#[cfg_attr(feature = "cache", derive(serde::Serialize, serde::Deserialize))]
880pub enum FcRgba {
881    #[default]
882    Unknown = 0,
883    Rgb = 1,
884    Bgr = 2,
885    Vrgb = 3,
886    Vbgr = 4,
887    None = 5,
888}
889
890/// LCD filter mode for subpixel rendering.
891#[repr(C)]
892#[derive(Debug, Default, Clone, Copy, PartialEq, Eq, PartialOrd, Ord, Hash)]
893#[cfg_attr(feature = "cache", derive(serde::Serialize, serde::Deserialize))]
894pub enum FcLcdFilter {
895    #[default]
896    None = 0,
897    Default = 1,
898    Light = 2,
899    Legacy = 3,
900}
901
902/// Per-font rendering configuration from system font config (Linux fonts.conf).
903///
904/// All fields are `Option<T>` -- `None` means "use system default".
905/// On non-Linux platforms, this is always all-None (no per-font overrides).
906#[derive(Debug, Default, Clone, PartialEq, PartialOrd)]
907#[cfg_attr(feature = "cache", derive(serde::Serialize, serde::Deserialize))]
908pub struct FcFontRenderConfig {
909    pub antialias: Option<bool>,
910    pub hinting: Option<bool>,
911    pub hintstyle: Option<FcHintStyle>,
912    pub autohint: Option<bool>,
913    pub rgba: Option<FcRgba>,
914    pub lcdfilter: Option<FcLcdFilter>,
915    pub embeddedbitmap: Option<bool>,
916    pub embolden: Option<bool>,
917    pub dpi: Option<f64>,
918    pub scale: Option<f64>,
919    pub minspace: Option<bool>,
920}
921
922/// Helper newtype to provide Eq/Ord for Option<f64> via total-order bit comparison.
923/// This allows FcFontRenderConfig to be used inside FcPattern which derives Eq + Ord.
924impl Eq for FcFontRenderConfig {}
925
926impl Ord for FcFontRenderConfig {
927    fn cmp(&self, other: &Self) -> core::cmp::Ordering {
928        // Compare all non-f64 fields first
929        let ord = self.antialias.cmp(&other.antialias)
930            .then_with(|| self.hinting.cmp(&other.hinting))
931            .then_with(|| self.hintstyle.cmp(&other.hintstyle))
932            .then_with(|| self.autohint.cmp(&other.autohint))
933            .then_with(|| self.rgba.cmp(&other.rgba))
934            .then_with(|| self.lcdfilter.cmp(&other.lcdfilter))
935            .then_with(|| self.embeddedbitmap.cmp(&other.embeddedbitmap))
936            .then_with(|| self.embolden.cmp(&other.embolden))
937            .then_with(|| self.minspace.cmp(&other.minspace));
938
939        // For f64 fields, use to_bits() for total ordering
940        let ord = ord.then_with(|| {
941            let a = self.dpi.map(|v| v.to_bits());
942            let b = other.dpi.map(|v| v.to_bits());
943            a.cmp(&b)
944        });
945        ord.then_with(|| {
946            let a = self.scale.map(|v| v.to_bits());
947            let b = other.scale.map(|v| v.to_bits());
948            a.cmp(&b)
949        })
950    }
951}
952
953/// Font pattern for matching
954#[derive(Default, Clone, PartialOrd, Ord, PartialEq, Eq)]
955#[cfg_attr(feature = "cache", derive(serde::Serialize, serde::Deserialize))]
956#[repr(C)]
957pub struct FcPattern {
958    // font name
959    pub name: Option<String>,
960    // family name
961    pub family: Option<String>,
962    // "italic" property
963    pub italic: PatternMatch,
964    // "oblique" property
965    pub oblique: PatternMatch,
966    // "bold" property
967    pub bold: PatternMatch,
968    // "monospace" property
969    pub monospace: PatternMatch,
970    // "condensed" property
971    pub condensed: PatternMatch,
972    // font weight
973    pub weight: FcWeight,
974    // font stretch
975    pub stretch: FcStretch,
976    // unicode ranges to match
977    pub unicode_ranges: Vec<UnicodeRange>,
978    // extended font metadata
979    pub metadata: FcFontMetadata,
980    // per-font rendering configuration (from system fonts.conf on Linux)
981    pub render_config: FcFontRenderConfig,
982}
983
984impl core::fmt::Debug for FcPattern {
985    fn fmt(&self, f: &mut core::fmt::Formatter<'_>) -> core::fmt::Result {
986        let mut d = f.debug_struct("FcPattern");
987
988        if let Some(name) = &self.name {
989            d.field("name", name);
990        }
991
992        if let Some(family) = &self.family {
993            d.field("family", family);
994        }
995
996        if self.italic != PatternMatch::DontCare {
997            d.field("italic", &self.italic);
998        }
999
1000        if self.oblique != PatternMatch::DontCare {
1001            d.field("oblique", &self.oblique);
1002        }
1003
1004        if self.bold != PatternMatch::DontCare {
1005            d.field("bold", &self.bold);
1006        }
1007
1008        if self.monospace != PatternMatch::DontCare {
1009            d.field("monospace", &self.monospace);
1010        }
1011
1012        if self.condensed != PatternMatch::DontCare {
1013            d.field("condensed", &self.condensed);
1014        }
1015
1016        if self.weight != FcWeight::Normal {
1017            d.field("weight", &self.weight);
1018        }
1019
1020        if self.stretch != FcStretch::Normal {
1021            d.field("stretch", &self.stretch);
1022        }
1023
1024        if !self.unicode_ranges.is_empty() {
1025            d.field("unicode_ranges", &self.unicode_ranges);
1026        }
1027
1028        // Only show non-empty metadata fields
1029        let empty_metadata = FcFontMetadata::default();
1030        if self.metadata != empty_metadata {
1031            d.field("metadata", &self.metadata);
1032        }
1033
1034        // Only show render_config when it differs from default
1035        let empty_render_config = FcFontRenderConfig::default();
1036        if self.render_config != empty_render_config {
1037            d.field("render_config", &self.render_config);
1038        }
1039
1040        d.finish()
1041    }
1042}
1043
1044/// Font metadata from the OS/2 table
1045#[derive(Debug, Default, Clone, PartialEq, Eq, PartialOrd, Ord)]
1046#[cfg_attr(feature = "cache", derive(serde::Serialize, serde::Deserialize))]
1047pub struct FcFontMetadata {
1048    pub copyright: Option<String>,
1049    pub designer: Option<String>,
1050    pub designer_url: Option<String>,
1051    pub font_family: Option<String>,
1052    pub font_subfamily: Option<String>,
1053    pub full_name: Option<String>,
1054    pub id_description: Option<String>,
1055    pub license: Option<String>,
1056    pub license_url: Option<String>,
1057    pub manufacturer: Option<String>,
1058    pub manufacturer_url: Option<String>,
1059    pub postscript_name: Option<String>,
1060    pub preferred_family: Option<String>,
1061    pub preferred_subfamily: Option<String>,
1062    pub trademark: Option<String>,
1063    pub unique_id: Option<String>,
1064    pub version: Option<String>,
1065}
1066
1067impl FcPattern {
1068    /// Check if this pattern would match the given character
1069    pub fn contains_char(&self, c: char) -> bool {
1070        if self.unicode_ranges.is_empty() {
1071            return true; // No ranges specified means match all characters
1072        }
1073
1074        for range in &self.unicode_ranges {
1075            if range.contains(c) {
1076                return true;
1077            }
1078        }
1079
1080        false
1081    }
1082}
1083
1084/// Font match result with UUID
1085#[derive(Debug, Clone, PartialEq, Eq)]
1086pub struct FontMatch {
1087    pub id: FontId,
1088    pub unicode_ranges: Vec<UnicodeRange>,
1089    pub fallbacks: Vec<FontMatchNoFallback>,
1090}
1091
1092/// Font match result with UUID (without fallback)
1093#[derive(Debug, Clone, PartialEq, Eq)]
1094pub struct FontMatchNoFallback {
1095    pub id: FontId,
1096    pub unicode_ranges: Vec<UnicodeRange>,
1097}
1098
1099/// A run of text that uses the same font
1100/// Returned by FontFallbackChain::query_for_text()
1101#[derive(Debug, Clone, PartialEq, Eq)]
1102pub struct ResolvedFontRun {
1103    /// The text content of this run
1104    pub text: String,
1105    /// Start byte index in the original text
1106    pub start_byte: usize,
1107    /// End byte index in the original text (exclusive)
1108    pub end_byte: usize,
1109    /// The font to use for this run (None if no font found)
1110    pub font_id: Option<FontId>,
1111    /// Which CSS font-family this came from
1112    pub css_source: String,
1113}
1114
1115/// Resolved font fallback chain for a CSS font-family stack
1116/// This represents the complete chain of fonts to use for rendering text
1117#[derive(Debug, Clone, PartialEq, Eq)]
1118pub struct FontFallbackChain {
1119    /// CSS-based fallbacks: Each CSS font expanded to its system fallbacks
1120    /// Example: ["NotoSansJP" -> [Hiragino Sans, PingFang SC], "sans-serif" -> [Helvetica]]
1121    pub css_fallbacks: Vec<CssFallbackGroup>,
1122    
1123    /// Unicode-based fallbacks: Fonts added to cover missing Unicode ranges
1124    /// Only populated if css_fallbacks don't cover all requested characters
1125    pub unicode_fallbacks: Vec<FontMatch>,
1126    
1127    /// The original CSS font-family stack that was requested
1128    pub original_stack: Vec<String>,
1129}
1130
1131impl FontFallbackChain {
1132    /// Resolve which font should be used for a specific character
1133    /// Returns (FontId, css_source_name) where css_source_name indicates which CSS font matched
1134    /// Returns None if no font in the chain can render this character
1135    pub fn resolve_char(&self, cache: &FcFontCache, ch: char) -> Option<(FontId, String)> {
1136        let codepoint = ch as u32;
1137
1138        // Check CSS fallbacks in order
1139        for group in &self.css_fallbacks {
1140            for font in &group.fonts {
1141                let Some(meta) = cache.get_metadata_by_id(&font.id) else { continue };
1142                if meta.unicode_ranges.is_empty() {
1143                    continue; // No range info — don't assume it covers everything
1144                }
1145                if meta.unicode_ranges.iter().any(|r| codepoint >= r.start && codepoint <= r.end) {
1146                    return Some((font.id, group.css_name.clone()));
1147                }
1148            }
1149        }
1150
1151        // Check Unicode fallbacks
1152        for font in &self.unicode_fallbacks {
1153            let Some(meta) = cache.get_metadata_by_id(&font.id) else { continue };
1154            if meta.unicode_ranges.iter().any(|r| codepoint >= r.start && codepoint <= r.end) {
1155                return Some((font.id, "(unicode-fallback)".to_string()));
1156            }
1157        }
1158
1159        // WEB-LIFT LAST-RESORT (re-added 2026-06-03; the `with_memory_fonts` trap that
1160        // previously made touching this file fatal is now fixed by the byte-atomic remill
1161        // fork support). The lifted web path fails coverage-based resolution above for TWO
1162        // reasons that both mis-lift: the chain mis-builds to empty AND/OR `get_metadata_by_id`
1163        // (a HashMap<FontId,_> lookup) returns None in the lift. So instead of gating on the
1164        // chain being empty, fire whenever NOTHING matched above AND the cache holds exactly
1165        // the single registered fallback font — the headless/web case. This bypasses BOTH the
1166        // chain and the metadata HashMap, returning the only font's id directly. Native caches
1167        // hold many system fonts, so `len()==1` is false there → native is unaffected.
1168        let registered = cache.list();
1169        if registered.len() == 1 {
1170            return Some((registered[0].1, "(web-last-resort)".to_string()));
1171        }
1172
1173        None
1174    }
1175    
1176    /// Resolve all characters in a text string to their fonts
1177    /// Returns a vector of (character, FontId, css_source) tuples
1178    pub fn resolve_text(&self, cache: &FcFontCache, text: &str) -> Vec<(char, Option<(FontId, String)>)> {
1179        text.chars()
1180            .map(|ch| (ch, self.resolve_char(cache, ch)))
1181            .collect()
1182    }
1183    
1184    /// Query which fonts should be used for a text string, grouped by font
1185    /// Returns runs of consecutive characters that use the same font
1186    /// This is the main API for text shaping - call this to get font runs, then shape each run
1187    pub fn query_for_text(&self, cache: &FcFontCache, text: &str) -> Vec<ResolvedFontRun> {
1188        if text.is_empty() {
1189            return Vec::new();
1190        }
1191        
1192        let mut runs: Vec<ResolvedFontRun> = Vec::new();
1193        let mut current_font: Option<FontId> = None;
1194        let mut current_css_source: Option<String> = None;
1195        let mut current_start_byte: usize = 0;
1196        
1197        for (byte_idx, ch) in text.char_indices() {
1198            let resolved = self.resolve_char(cache, ch);
1199            let (font_id, css_source) = match &resolved {
1200                Some((id, source)) => (Some(*id), Some(source.clone())),
1201                None => (None, None),
1202            };
1203            
1204            // Check if we need to start a new run
1205            let font_changed = font_id != current_font;
1206            
1207            if font_changed && byte_idx > 0 {
1208                // Finalize the current run
1209                let run_text = &text[current_start_byte..byte_idx];
1210                runs.push(ResolvedFontRun {
1211                    text: run_text.to_string(),
1212                    start_byte: current_start_byte,
1213                    end_byte: byte_idx,
1214                    font_id: current_font,
1215                    css_source: current_css_source.clone().unwrap_or_default(),
1216                });
1217                current_start_byte = byte_idx;
1218            }
1219            
1220            current_font = font_id;
1221            current_css_source = css_source;
1222        }
1223        
1224        // Finalize the last run
1225        if current_start_byte < text.len() {
1226            let run_text = &text[current_start_byte..];
1227            runs.push(ResolvedFontRun {
1228                text: run_text.to_string(),
1229                start_byte: current_start_byte,
1230                end_byte: text.len(),
1231                font_id: current_font,
1232                css_source: current_css_source.unwrap_or_default(),
1233            });
1234        }
1235        
1236        runs
1237    }
1238}
1239
1240/// A group of fonts that are fallbacks for a single CSS font-family name
1241#[derive(Debug, Clone, PartialEq, Eq)]
1242pub struct CssFallbackGroup {
1243    /// The CSS font name (e.g., "NotoSansJP", "sans-serif")
1244    pub css_name: String,
1245    
1246    /// System fonts that match this CSS name
1247    /// First font in list is the best match
1248    pub fonts: Vec<FontMatch>,
1249}
1250
1251/// Cache key for font fallback chain queries
1252///
1253/// IMPORTANT: This key intentionally does NOT include per-text unicode
1254/// ranges — fallback chains are cached by CSS properties only. Different
1255/// texts with the same CSS font-stack share the same chain.
1256///
1257/// `scripts_hint_hash` distinguishes *which set of Unicode-fallback
1258/// scripts* the caller asked for. `None` means "the default set of 7
1259/// major scripts" (Cyrillic/Arabic/Devanagari/Hiragana/Katakana/CJK/Hangul,
1260/// back-compat behaviour of `resolve_font_chain`). `Some(h)` is a
1261/// stable hash of a caller-supplied script list so an ASCII-only
1262/// query doesn't collide with a CJK-aware one.
1263#[cfg(feature = "std")]
1264#[derive(Debug, Clone, PartialEq, Eq, Hash)]
1265pub(crate) struct FontChainCacheKey {
1266    /// CSS font stack (expanded to OS-specific fonts)
1267    pub(crate) font_families: Vec<String>,
1268    /// Font weight
1269    pub(crate) weight: FcWeight,
1270    /// Font style flags
1271    pub(crate) italic: PatternMatch,
1272    pub(crate) oblique: PatternMatch,
1273    /// Hash of the caller-supplied script hint (or `None` for the default set).
1274    pub(crate) scripts_hint_hash: Option<u64>,
1275}
1276
1277/// Hash a `scripts_hint` slice into a stable u64 for use as a
1278/// [`FontChainCacheKey`] component. Order-insensitive: we sort a
1279/// local copy before hashing so `[CJK, Arabic]` and `[Arabic, CJK]`
1280/// key into the same cache slot.
1281#[cfg(feature = "std")]
1282fn hash_scripts_hint(ranges: &[UnicodeRange]) -> u64 {
1283    let mut sorted: Vec<UnicodeRange> = ranges.to_vec();
1284    sorted.sort();
1285    let mut buf = Vec::with_capacity(sorted.len() * 8);
1286    for r in &sorted {
1287        buf.extend_from_slice(&r.start.to_le_bytes());
1288        buf.extend_from_slice(&r.end.to_le_bytes());
1289    }
1290    crate::utils::content_hash_u64(&buf)
1291}
1292
1293/// Path to a font file
1294///
1295/// `bytes_hash` is a deterministic 64-bit hash of the file's full
1296/// byte contents (see [`crate::utils::content_hash_u64`]). All faces
1297/// of a given `.ttc` file share the same `bytes_hash`, and two
1298/// different paths pointing at the same file contents also do —
1299/// so the cache can share a single `Arc<[u8]>` across them via
1300/// [`FcFontCache::get_font_bytes`]. A value of `0` means "hash
1301/// not computed" (e.g. built from a filename-only scan, or loaded
1302/// from a legacy v1 disk cache); callers must treat `0` as opaque
1303/// and fall back to unshared reads.
1304#[derive(Debug, Clone, PartialOrd, Ord, PartialEq, Eq)]
1305#[cfg_attr(feature = "cache", derive(serde::Serialize, serde::Deserialize))]
1306#[repr(C)]
1307pub struct FcFontPath {
1308    pub path: String,
1309    pub font_index: usize,
1310    /// 64-bit content hash of the file's bytes. 0 = not computed.
1311    #[cfg_attr(feature = "cache", serde(default))]
1312    pub bytes_hash: u64,
1313}
1314
1315/// In-memory font data
1316#[derive(Debug, Clone, PartialEq, Eq)]
1317#[repr(C)]
1318pub struct FcFont {
1319    pub bytes: Vec<u8>,
1320    pub font_index: usize,
1321    pub id: String, // For identification in tests
1322}
1323
1324/// Owned font-source descriptor, returned by
1325/// [`FcFontCache::get_font_by_id`].
1326///
1327/// In v4.0 this was a borrowed enum (`FontSource<'a>` with refs into
1328/// the pattern map). With v4.1's shared-state cache, the map lives
1329/// behind an `RwLock`, so returning a reference would require the
1330/// caller to hold a read guard for the full lifetime of the result —
1331/// which bleeds the locking strategy into every call site. The owned
1332/// variant clones the small `FcFont` / `FcFontPath` struct and
1333/// releases the lock immediately. Bytes/mmap are not cloned — those
1334/// go through `get_font_bytes` which hands out `Arc<FontBytes>`.
1335#[derive(Debug, Clone)]
1336pub enum OwnedFontSource {
1337    /// Font loaded from memory (small metadata + owned `Vec<u8>`).
1338    Memory(FcFont),
1339    /// Font loaded from disk.
1340    Disk(FcFontPath),
1341}
1342
1343/// A handle to font bytes returned by [`FcFontCache::get_font_bytes`].
1344///
1345/// On disk, an `Mmap` is used so untouched pages don't count toward
1346/// process RSS. In-memory fonts (`FcFont`) come back as `Owned` since
1347/// they're already on the heap.
1348///
1349/// `FontBytes` derefs to `[u8]` and implements `AsRef<[u8]>`, so any
1350/// existing API that wants `&[u8]` (allsorts, ttf-parser, …) can
1351/// accept it without code changes.
1352///
1353/// Both variants are `Send + Sync` (mmaps and `Arc<[u8]>` are both
1354/// safe to share across threads).
1355#[cfg(feature = "std")]
1356pub enum FontBytes {
1357    /// Heap-owned bytes. Used for `FontSource::Memory` and as a
1358    /// fallback when mmap is unavailable.
1359    Owned(std::sync::Arc<[u8]>),
1360    /// File-backed mmap. Read-only; pages are demand-loaded by the
1361    /// kernel. Absent on wasm targets, where `mmapio` is unavailable
1362    /// (the optional dep is gated to `cfg(not(target_family="wasm"))`).
1363    #[cfg(not(target_family = "wasm"))]
1364    Mmapped(mmapio::Mmap),
1365}
1366
1367#[cfg(feature = "std")]
1368impl FontBytes {
1369    /// Borrow the underlying byte slice.
1370    #[inline]
1371    pub fn as_slice(&self) -> &[u8] {
1372        match self {
1373            FontBytes::Owned(arc) => arc,
1374            #[cfg(not(target_family = "wasm"))]
1375            FontBytes::Mmapped(m) => &m[..],
1376        }
1377    }
1378}
1379
1380#[cfg(feature = "std")]
1381impl core::ops::Deref for FontBytes {
1382    type Target = [u8];
1383    #[inline]
1384    fn deref(&self) -> &[u8] {
1385        self.as_slice()
1386    }
1387}
1388
1389#[cfg(feature = "std")]
1390impl AsRef<[u8]> for FontBytes {
1391    #[inline]
1392    fn as_ref(&self) -> &[u8] {
1393        self.as_slice()
1394    }
1395}
1396
1397#[cfg(feature = "std")]
1398impl core::fmt::Debug for FontBytes {
1399    fn fmt(&self, f: &mut core::fmt::Formatter<'_>) -> core::fmt::Result {
1400        let kind = match self {
1401            FontBytes::Owned(_) => "Owned",
1402            #[cfg(not(target_family = "wasm"))]
1403            FontBytes::Mmapped(_) => "Mmapped",
1404        };
1405        write!(f, "FontBytes::{}({} bytes)", kind, self.as_slice().len())
1406    }
1407}
1408
1409/// Open a font file as an mmap-backed [`FontBytes`]. Falls back to a
1410/// heap read if mmap fails (e.g. the file is on a network share that
1411/// doesn't support mmap, or we're on a target without `std`-mmap).
1412#[cfg(feature = "std")]
1413fn open_font_bytes_mmap(path: &str) -> Option<std::sync::Arc<FontBytes>> {
1414    use std::fs::File;
1415    use std::sync::Arc;
1416
1417    #[cfg(not(target_family = "wasm"))]
1418    {
1419        if let Ok(file) = File::open(path) {
1420            // Safety: `Mmap::map` requires that the file is not
1421            // mutated while mapped. For system fonts that's the
1422            // overwhelming common case; if a user replaces the file
1423            // we accept reading the snapshot we mapped earlier.
1424            if let Ok(mmap) = unsafe { mmapio::MmapOptions::new().map(&file) } {
1425                return Some(Arc::new(FontBytes::Mmapped(mmap)));
1426            }
1427        }
1428    }
1429    let bytes = std::fs::read(path).ok()?;
1430    Some(Arc::new(FontBytes::Owned(Arc::from(bytes))))
1431}
1432
1433/// A named font to be added to the font cache from memory.
1434/// This is the primary way to supply custom fonts to the application.
1435#[derive(Debug, Clone)]
1436pub struct NamedFont {
1437    /// Human-readable name for this font (e.g., "My Custom Font")
1438    pub name: String,
1439    /// The raw font file bytes (TTF, OTF, WOFF, WOFF2, TTC)
1440    pub bytes: Vec<u8>,
1441}
1442
1443impl NamedFont {
1444    /// Create a new named font from bytes
1445    pub fn new(name: impl Into<String>, bytes: Vec<u8>) -> Self {
1446        Self {
1447            name: name.into(),
1448            bytes,
1449        }
1450    }
1451}
1452
1453/// Font cache, initialized at startup.
1454///
1455/// Thread-safe, shared font cache.
1456///
1457/// As of v4.1 the cache internally owns its state via
1458/// `Arc<RwLock<FcFontCacheInner>>`: cloning an `FcFontCache` returns
1459/// a handle that shares the same underlying data. Writes by one holder
1460/// (typically the background builder inside `FcFontRegistry`) become
1461/// immediately visible to every other holder (layout engines,
1462/// shape-time resolvers, etc.).
1463///
1464/// Before 4.1 the clone deep-copied every map, so external holders
1465/// were frozen at the moment they took the snapshot — the mismatch
1466/// between "live registry cache" and "frozen font manager cache"
1467/// was the root of the silent-text regression when lazy scout mode
1468/// was enabled. The shared-state design eliminates that entire class
1469/// of staleness bugs by construction.
1470pub struct FcFontCache {
1471    pub(crate) shared: std::sync::Arc<FcFontCacheShared>,
1472}
1473
1474/// Shared interior of `FcFontCache`. Always accessed through an
1475/// `Arc` — never referenced directly by external callers.
1476// Internal lock wrapper for the cache state. Two implementations selected by feature:
1477//
1478// DEFAULT (general builds): backed by std `RwLock`. `read`/`write`/`lock` return
1479// `Result<_, Infallible>` for a uniform call site (a poisoned lock is recovered via
1480// `into_inner` — a memoisation cache is still valid to read after a panic).
1481//
1482// `single-thread-unsafe-locks` feature: a bare `UnsafeCell` with NO atomics; `read`/`write`/
1483// `lock` hand out a guard immediately. UNSOUND in a multi-threaded program — enable ONLY for a
1484// known single-threaded environment. Exists for the azul remill-lifted web backend
1485// (single-threaded wasm), where std's queue-based RwLock `lock_contended` path spins forever
1486// (no other thread ever unparks it) and hangs the layout solver.
1487
1488#[cfg(not(feature = "single-thread-unsafe-locks"))]
1489pub struct StLock<T> {
1490    lock: std::sync::RwLock<T>,
1491}
1492#[cfg(not(feature = "single-thread-unsafe-locks"))]
1493impl<T> core::fmt::Debug for StLock<T> {
1494    fn fmt(&self, f: &mut core::fmt::Formatter<'_>) -> core::fmt::Result {
1495        f.write_str("StLock(..)")
1496    }
1497}
1498#[cfg(not(feature = "single-thread-unsafe-locks"))]
1499impl<T> StLock<T> {
1500    pub fn new(v: T) -> Self {
1501        Self { lock: std::sync::RwLock::new(v) }
1502    }
1503    pub fn read(&self) -> Result<StReadGuard<'_, T>, core::convert::Infallible> {
1504        Ok(StReadGuard { g: self.lock.read().unwrap_or_else(|e| e.into_inner()) })
1505    }
1506    pub fn write(&self) -> Result<StWriteGuard<'_, T>, core::convert::Infallible> {
1507        Ok(StWriteGuard { g: self.lock.write().unwrap_or_else(|e| e.into_inner()) })
1508    }
1509    pub fn lock(&self) -> Result<StWriteGuard<'_, T>, core::convert::Infallible> {
1510        self.write()
1511    }
1512}
1513#[cfg(not(feature = "single-thread-unsafe-locks"))]
1514pub struct StReadGuard<'a, T> {
1515    g: std::sync::RwLockReadGuard<'a, T>,
1516}
1517#[cfg(not(feature = "single-thread-unsafe-locks"))]
1518impl<'a, T> core::ops::Deref for StReadGuard<'a, T> {
1519    type Target = T;
1520    fn deref(&self) -> &T { &self.g }
1521}
1522#[cfg(not(feature = "single-thread-unsafe-locks"))]
1523pub struct StWriteGuard<'a, T> {
1524    g: std::sync::RwLockWriteGuard<'a, T>,
1525}
1526#[cfg(not(feature = "single-thread-unsafe-locks"))]
1527impl<'a, T> core::ops::Deref for StWriteGuard<'a, T> {
1528    type Target = T;
1529    fn deref(&self) -> &T { &self.g }
1530}
1531#[cfg(not(feature = "single-thread-unsafe-locks"))]
1532impl<'a, T> core::ops::DerefMut for StWriteGuard<'a, T> {
1533    fn deref_mut(&mut self) -> &mut T { &mut self.g }
1534}
1535
1536#[cfg(feature = "single-thread-unsafe-locks")]
1537pub struct StLock<T> {
1538    cell: std::cell::UnsafeCell<T>,
1539}
1540#[cfg(feature = "single-thread-unsafe-locks")]
1541unsafe impl<T> Sync for StLock<T> {}
1542#[cfg(feature = "single-thread-unsafe-locks")]
1543unsafe impl<T> Send for StLock<T> {}
1544#[cfg(feature = "single-thread-unsafe-locks")]
1545impl<T> core::fmt::Debug for StLock<T> {
1546    fn fmt(&self, f: &mut core::fmt::Formatter<'_>) -> core::fmt::Result {
1547        f.write_str("StLock(..)")
1548    }
1549}
1550#[cfg(feature = "single-thread-unsafe-locks")]
1551impl<T> StLock<T> {
1552    pub fn new(v: T) -> Self {
1553        Self { cell: std::cell::UnsafeCell::new(v) }
1554    }
1555    pub fn read(&self) -> Result<StReadGuard<'_, T>, core::convert::Infallible> {
1556        Ok(StReadGuard { r: unsafe { &*self.cell.get() } })
1557    }
1558    pub fn write(&self) -> Result<StWriteGuard<'_, T>, core::convert::Infallible> {
1559        Ok(StWriteGuard { r: unsafe { &mut *self.cell.get() } })
1560    }
1561    pub fn lock(&self) -> Result<StWriteGuard<'_, T>, core::convert::Infallible> {
1562        Ok(StWriteGuard { r: unsafe { &mut *self.cell.get() } })
1563    }
1564}
1565#[cfg(feature = "single-thread-unsafe-locks")]
1566pub struct StReadGuard<'a, T> {
1567    r: &'a T,
1568}
1569#[cfg(feature = "single-thread-unsafe-locks")]
1570impl<'a, T> core::ops::Deref for StReadGuard<'a, T> {
1571    type Target = T;
1572    fn deref(&self) -> &T { self.r }
1573}
1574#[cfg(feature = "single-thread-unsafe-locks")]
1575pub struct StWriteGuard<'a, T> {
1576    r: &'a mut T,
1577}
1578#[cfg(feature = "single-thread-unsafe-locks")]
1579impl<'a, T> core::ops::Deref for StWriteGuard<'a, T> {
1580    type Target = T;
1581    fn deref(&self) -> &T { self.r }
1582}
1583#[cfg(feature = "single-thread-unsafe-locks")]
1584impl<'a, T> core::ops::DerefMut for StWriteGuard<'a, T> {
1585    fn deref_mut(&mut self) -> &mut T { self.r }
1586}
1587
1588pub(crate) struct FcFontCacheShared {
1589    /// Main pattern/metadata state, guarded by a reader-writer lock.
1590    /// Builder threads take the write lock to insert a parsed font;
1591    /// all query paths take the read lock.
1592    pub(crate) state: StLock<FcFontCacheInner>,
1593    /// Font fallback chain cache. Not part of the RwLock-guarded
1594    /// state because cache insertions happen under `&self` on read
1595    /// paths (they're a memoisation, not observable state).
1596    pub(crate) chain_cache: StLock<std::collections::HashMap<FontChainCacheKey, FontFallbackChain>>,
1597    /// Shared file-bytes cache: content-hash → weak [`FontBytes`].
1598    ///
1599    /// [`FcFontCache::get_font_bytes`] populates this so that multiple
1600    /// FontIds backed by the same file (e.g. every face of a `.ttc`)
1601    /// return the same `Arc<FontBytes>` — and therefore the same mmap
1602    /// — instead of each allocating their own buffer. We hold `Weak`
1603    /// references so the mmap unmap as soon as no parsed font holds
1604    /// it alive.
1605    pub(crate) shared_bytes: StLock<std::collections::HashMap<u64, std::sync::Weak<FontBytes>>>,
1606}
1607
1608/// The actual font-pattern state, held behind the RwLock in
1609/// `FcFontCacheShared`. Private — all access goes through
1610/// `FcFontCache` methods which lock transparently.
1611#[derive(Default, Debug)]
1612pub(crate) struct FcFontCacheInner {
1613    /// Pattern to FontId mapping (query index)
1614    pub(crate) patterns: BTreeMap<FcPattern, FontId>,
1615    /// On-disk font paths
1616    pub(crate) disk_fonts: BTreeMap<FontId, FcFontPath>,
1617    /// In-memory fonts
1618    pub(crate) memory_fonts: BTreeMap<FontId, FcFont>,
1619    /// Metadata cache (patterns stored by ID for quick lookup)
1620    pub(crate) metadata: BTreeMap<FontId, FcPattern>,
1621    /// Token index: maps lowercase tokens ("noto", "sans", "jp") to sets of FontIds.
1622    /// Enables fast fuzzy search by intersecting token sets.
1623    pub(crate) token_index: BTreeMap<String, alloc::collections::BTreeSet<FontId>>,
1624    /// Pre-tokenized font names (lowercase): FontId -> Vec<lowercase tokens>.
1625    /// Avoids re-tokenization during fuzzy search.
1626    pub(crate) font_tokens: BTreeMap<FontId, Vec<String>>,
1627}
1628
1629impl FcFontCacheInner {
1630    /// Add a font pattern to the token index. Called under the
1631    /// write lock by insertion paths.
1632    pub(crate) fn index_pattern_tokens(&mut self, _pattern: &FcPattern, _id: FontId) {
1633        // WEB-LIFT (2026-06-02): no-op on the azul web fork. The tokenizer
1634        // (`extract_font_name_tokens` char-classification + lowercasing) pulls unicode tables
1635        // whose jump-tables the remill/web lift leaves un-devirt'd → MISSING_BLOCK trap inside
1636        // `with_memory_fonts`. `token_index`/`font_tokens` feed ONLY the separate token-fuzzy
1637        // search path (query_fuzzy); the main `query`→`query_internal_locked` scores by
1638        // unicode-compatibility + style over the registered patterns/metadata (populated before
1639        // this call), so leaving the token index empty does not affect normal font matching.
1640    }
1641}
1642
1643impl Clone for FcFontCache {
1644    /// Shallow clone — the returned handle shares the same underlying
1645    /// state as `self`. Writes through either are visible to both.
1646    /// This is the whole point of the v4.1 redesign; callers that need
1647    /// an isolated frozen copy must explicitly request one (e.g. via
1648    /// `snapshot_state`, which is intentionally not provided because
1649    /// we no longer have a use case for it).
1650    fn clone(&self) -> Self {
1651        Self {
1652            shared: std::sync::Arc::clone(&self.shared),
1653        }
1654    }
1655}
1656
1657impl core::fmt::Debug for FcFontCache {
1658    fn fmt(&self, f: &mut core::fmt::Formatter<'_>) -> core::fmt::Result {
1659        let state = self.state_read();
1660        f.debug_struct("FcFontCache")
1661            .field("patterns_len", &state.patterns.len())
1662            .field("metadata_len", &state.metadata.len())
1663            .field("disk_fonts_len", &state.disk_fonts.len())
1664            .field("memory_fonts_len", &state.memory_fonts.len())
1665            .finish()
1666    }
1667}
1668
1669impl Default for FcFontCache {
1670    fn default() -> Self {
1671        Self {
1672            shared: std::sync::Arc::new(FcFontCacheShared {
1673                state: StLock::new(FcFontCacheInner::default()),
1674                chain_cache: StLock::new(std::collections::HashMap::new()),
1675                shared_bytes: StLock::new(std::collections::HashMap::new()),
1676            }),
1677        }
1678    }
1679}
1680
1681impl FcFontCache {
1682    /// Acquire a read guard on the cache's state. Panics if the lock
1683    /// was poisoned by a panic inside the write guard — same
1684    /// contract as `RwLock::read().expect(..)`.
1685    #[inline]
1686    pub(crate) fn state_read(
1687        &self,
1688    ) -> StReadGuard<'_, FcFontCacheInner> {
1689        // [az-web-lift] StLock::read() is Infallible (never poisons/spins).
1690        match self.shared.state.read() {
1691            Ok(g) => g,
1692            Err(e) => match e {},
1693        }
1694    }
1695
1696    /// Acquire a write guard on the cache's state. Panics on
1697    /// poisoning, same as `state_read`.
1698    #[inline]
1699    pub(crate) fn state_write(
1700        &self,
1701    ) -> StWriteGuard<'_, FcFontCacheInner> {
1702        // [az-web-lift] StLock::write() is Infallible (never poisons/spins).
1703        match self.shared.state.write() {
1704            Ok(g) => g,
1705            Err(e) => match e {},
1706        }
1707    }
1708
1709    /// Adds in-memory font files.
1710    ///
1711    /// Note: takes `&self` — the shared cache handles interior
1712    /// mutability via the RwLock.
1713    pub fn with_memory_fonts(&self, fonts: Vec<(FcPattern, FcFont)>) -> &Self {
1714        // Auto-detect Unicode coverage for any naively-registered font
1715        // (empty `unicode_ranges`) BEFORE taking the write lock, so we don't
1716        // hold it across font parsing. See `populate_memory_font_ranges`.
1717        let fonts: Vec<(FcPattern, FcFont)> = fonts
1718            .into_iter()
1719            .map(|(pattern, font)| (Self::populate_memory_font_ranges(pattern, &font), font))
1720            .collect();
1721        let mut state = self.state_write();
1722        for (pattern, font) in fonts {
1723            let id = FontId::new();
1724            state.patterns.insert(pattern.clone(), id);
1725            state.metadata.insert(id, pattern.clone());
1726            state.memory_fonts.insert(id, font);
1727            state.index_pattern_tokens(&pattern, id);
1728        }
1729        self
1730    }
1731
1732    /// Adds a memory font with a specific ID (for testing).
1733    pub fn with_memory_font_with_id(
1734        &self,
1735        id: FontId,
1736        pattern: FcPattern,
1737        font: FcFont,
1738    ) -> &Self {
1739        let pattern = Self::populate_memory_font_ranges(pattern, &font);
1740        let mut state = self.state_write();
1741        state.patterns.insert(pattern.clone(), id);
1742        state.metadata.insert(id, pattern.clone());
1743        state.memory_fonts.insert(id, font);
1744        state.index_pattern_tokens(&pattern, id);
1745        self
1746    }
1747
1748    /// Fill in a memory font's `unicode_ranges` from its raw bytes when the
1749    /// caller left them empty.
1750    ///
1751    /// A normal caller of [`FcFontCache::with_memory_fonts`] just hands over
1752    /// a name and the font bytes — they don't hand-compute the cmap. But
1753    /// [`FontFallbackChain::resolve_char`] deliberately skips any font that
1754    /// reports *no* coverage (it refuses to assume a blank range list means
1755    /// "covers everything"). Without this step a naively-registered bundled
1756    /// font could never be selected for any character — the exact bug that
1757    /// bites headless / wasm / embedder-bundled-font setups.
1758    ///
1759    /// With the `parsing` feature we reuse the *same* OS/2 + cmap detection
1760    /// pipeline the on-disk builder uses (via [`FcParseFontBytes`] →
1761    /// `parse_font_faces`). Without `parsing` the pattern is returned
1762    /// unchanged and the caller must populate `unicode_ranges` themselves.
1763    #[cfg(all(feature = "std", feature = "parsing"))]
1764    fn populate_memory_font_ranges(mut pattern: FcPattern, font: &FcFont) -> FcPattern {
1765        if !pattern.unicode_ranges.is_empty() {
1766            return pattern;
1767        }
1768        if let Some(faces) = FcParseFontBytes(&font.bytes, &font.id) {
1769            // A `.ttc` yields several faces; pick the one matching this
1770            // font's index, else fall back to the first parsed face. All
1771            // patterns of a single face share the same `unicode_ranges`.
1772            let ranges = faces
1773                .iter()
1774                .find(|(_, f)| f.font_index == font.font_index)
1775                .or_else(|| faces.first())
1776                .map(|(p, _)| p.unicode_ranges.clone())
1777                .unwrap_or_default();
1778            if !ranges.is_empty() {
1779                pattern.unicode_ranges = ranges;
1780            }
1781        }
1782        pattern
1783    }
1784
1785    /// Without the `parsing` feature there is no cmap/OS2 parser available,
1786    /// so the caller-provided pattern is stored verbatim.
1787    #[cfg(not(all(feature = "std", feature = "parsing")))]
1788    fn populate_memory_font_ranges(pattern: FcPattern, _font: &FcFont) -> FcPattern {
1789        pattern
1790    }
1791
1792    /// Register a newly-parsed on-disk font. Called by the builder
1793    /// thread inside `FcFontRegistry`. Allocates a fresh `FontId`,
1794    /// inserts the pattern + path + metadata in one write lock, and
1795    /// invalidates the chain cache so subsequent resolutions pick
1796    /// up the new font.
1797    pub fn insert_builder_font(&self, pattern: FcPattern, path: FcFontPath) {
1798        let id = FontId::new();
1799        {
1800            let mut state = self.state_write();
1801            state.index_pattern_tokens(&pattern, id);
1802            state.patterns.insert(pattern.clone(), id);
1803            state.disk_fonts.insert(id, path);
1804            state.metadata.insert(id, pattern);
1805        }
1806        // Invalidate chain cache so callers see the new font on the
1807        // next resolve. Scoped after the state write to keep lock
1808        // nesting shallow.
1809        if let Ok(mut cc) = self.shared.chain_cache.lock() {
1810            cc.clear();
1811        }
1812    }
1813
1814    #[cfg(feature = "std")]
1815    #[doc(hidden)]
1816    pub fn chain_cache_len(&self) -> usize {
1817        self.shared.chain_cache.lock().map(|c| c.len()).unwrap_or(0)
1818    }
1819
1820    /// Insert a *fast-probed* pattern into the cache and return its
1821    /// fresh `FontId`. Used by [`FcFontRegistry::request_fonts_fast`]
1822    /// when a cmap probe discovers a font that covers some subset of
1823    /// the requested codepoints. Unlike [`insert_builder_font`] this
1824    /// does **not** populate the token index (we don't have NAME
1825    /// table data), so fuzzy-name lookups on fast-probed fonts fall
1826    /// through to the filename-guess in `known_paths`.
1827    pub fn insert_fast_pattern(&self, pattern: FcPattern, path: FcFontPath) -> FontId {
1828        let id = FontId::new();
1829        let mut state = self.state_write();
1830        state.patterns.insert(pattern.clone(), id);
1831        state.disk_fonts.insert(id, path);
1832        state.metadata.insert(id, pattern);
1833        id
1834    }
1835
1836    /// Look up all `FontId`s whose `FcFontPath` matches `path`.
1837    /// Cheap way for `request_fonts_fast` to reuse fast-probed
1838    /// entries across layout passes without re-reading the cmap.
1839    ///
1840    /// O(n) over the disk_fonts map; fine for the typical case of
1841    /// <100 parsed fonts, and we skip the scan entirely when a
1842    /// stack's first candidate covers.
1843    pub fn lookup_paths_cached(&self, path: &str) -> Option<Vec<FontId>> {
1844        let state = self.state_read();
1845        let mut out = Vec::new();
1846        for (id, font_path) in &state.disk_fonts {
1847            if font_path.path == path {
1848                out.push(*id);
1849            }
1850        }
1851        if out.is_empty() { None } else { Some(out) }
1852    }
1853
1854    /// Get font data for a given font ID.
1855    ///
1856    /// Returns owned values (not references) because the underlying
1857    /// maps live behind an RwLock — a reference could not outlive
1858    /// the read guard. In-memory fonts come back as cloned `FcFont`
1859    /// instances; disk fonts return their `FcFontPath`.
1860    pub fn get_font_by_id(&self, id: &FontId) -> Option<OwnedFontSource> {
1861        let state = self.state_read();
1862        if let Some(font) = state.memory_fonts.get(id) {
1863            return Some(OwnedFontSource::Memory(font.clone()));
1864        }
1865        if let Some(path) = state.disk_fonts.get(id) {
1866            return Some(OwnedFontSource::Disk(path.clone()));
1867        }
1868        None
1869    }
1870
1871    /// Get metadata for a font ID. Returns an owned `FcPattern`
1872    /// (cloned out of the shared map) because we can't return a
1873    /// reference across the RwLock boundary.
1874    pub fn get_metadata_by_id(&self, id: &FontId) -> Option<FcPattern> {
1875        self.state_read().metadata.get(id).cloned()
1876    }
1877
1878    /// Get the font bytes for `id` as a shared [`FontBytes`].
1879    ///
1880    /// On disk the returned `Arc<FontBytes>` wraps an mmap of the file
1881    /// (`FontBytes::Mmapped`). Untouched pages of the file never count
1882    /// toward the process's RSS — for a font where layout shapes only
1883    /// a handful of glyphs, this is the difference between paying for
1884    /// the whole 4 MiB `.ttc` and paying for the cmap + a few glyf
1885    /// pages.
1886    ///
1887    /// In-memory fonts (`FontSource::Memory`) come back as
1888    /// `FontBytes::Owned`, since the bytes are already on the heap.
1889    ///
1890    /// Multiple `FontId`s backed by the same file content (every face
1891    /// of a `.ttc`, or two paths with identical bytes) return the
1892    /// *same* `Arc<FontBytes>` thanks to a content-hash → `Weak`
1893    /// cache. Bytes get unmapped automatically when the last consumer
1894    /// drops the Arc.
1895    ///
1896    /// `FontBytes` derefs to `[u8]`, so callers that only need
1897    /// `&[u8]` (allsorts, ttf-parser, …) can pass it through without
1898    /// thinking about the backing.
1899    ///
1900    /// Failure modes: returns `None` if the path is unknown, or the
1901    /// file no longer exists / cannot be opened, or the mmap call
1902    /// fails. Callers may retry with a fresh `get_font_bytes` if they
1903    /// suspect the file was replaced underneath them; the next call
1904    /// re-opens cleanly.
1905    #[cfg(feature = "std")]
1906    pub fn get_font_bytes(&self, id: &FontId) -> Option<std::sync::Arc<FontBytes>> {
1907        use std::sync::Arc;
1908        match self.get_font_by_id(id)? {
1909            OwnedFontSource::Memory(font) => Some(Arc::new(FontBytes::Owned(
1910                Arc::from(font.bytes.as_slice()),
1911            ))),
1912            OwnedFontSource::Disk(path) => {
1913                let hash = path.bytes_hash;
1914                if hash != 0 {
1915                    if let Ok(guard) = self.shared.shared_bytes.lock() {
1916                        if let Some(weak) = guard.get(&hash) {
1917                            if let Some(arc) = weak.upgrade() {
1918                                return Some(arc);
1919                            }
1920                        }
1921                    }
1922                }
1923
1924                let arc = open_font_bytes_mmap(&path.path)?;
1925                if hash != 0 {
1926                    if let Ok(mut guard) = self.shared.shared_bytes.lock() {
1927                        // Overwrite any stale weak ref that failed to upgrade.
1928                        guard.insert(hash, Arc::downgrade(&arc));
1929                    }
1930                }
1931                Some(arc)
1932            }
1933        }
1934    }
1935
1936    /// Returns an empty font cache (no_std / no filesystem).
1937    #[cfg(not(feature = "std"))]
1938    pub fn build() -> Self { Self::default() }
1939
1940    /// Scans system font directories using filename heuristics (no allsorts).
1941    #[cfg(all(feature = "std", not(feature = "parsing")))]
1942    pub fn build() -> Self { Self::build_from_filenames() }
1943
1944    /// Scans and parses all system fonts via allsorts for full metadata.
1945    #[cfg(all(feature = "std", feature = "parsing"))]
1946    pub fn build() -> Self { Self::build_inner(None) }
1947
1948    /// Filename-only scan: discovers fonts on disk, guesses metadata from
1949    /// the filename using [`config::tokenize_font_stem`].
1950    #[cfg(all(feature = "std", not(feature = "parsing")))]
1951    fn build_from_filenames() -> Self {
1952        let cache = Self::default();
1953        {
1954            let mut state = cache.state_write();
1955            for dir in crate::config::font_directories(OperatingSystem::current()) {
1956                for path in FcCollectFontFilesRecursive(dir) {
1957                    let pattern = match pattern_from_filename(&path) {
1958                        Some(p) => p,
1959                        None => continue,
1960                    };
1961                    let id = FontId::new();
1962                    state.disk_fonts.insert(id, FcFontPath {
1963                        path: path.to_string_lossy().to_string(),
1964                        font_index: 0,
1965                        // Filename-only scan — we never read the bytes,
1966                        // so there's no dedup key. Leave as 0.
1967                        bytes_hash: 0,
1968                    });
1969                    state.index_pattern_tokens(&pattern, id);
1970                    state.metadata.insert(id, pattern.clone());
1971                    state.patterns.insert(pattern, id);
1972                }
1973            }
1974        }
1975        cache
1976    }
1977    
1978    /// Builds a font cache with only specific font families (and their fallbacks).
1979    /// 
1980    /// This is a performance optimization for applications that know ahead of time
1981    /// which fonts they need. Instead of scanning all system fonts (which can be slow
1982    /// on systems with many fonts), only fonts matching the specified families are loaded.
1983    /// 
1984    /// Generic family names like "sans-serif", "serif", "monospace" are expanded
1985    /// to OS-specific font names (e.g., "sans-serif" on macOS becomes "Helvetica Neue", 
1986    /// "San Francisco", etc.).
1987    /// 
1988    /// **Note**: This will NOT automatically load fallback fonts for scripts not covered
1989    /// by the requested families. If you need Arabic, CJK, or emoji support, either:
1990    /// - Add those families explicitly to the filter
1991    /// - Use `with_memory_fonts()` to add bundled fonts
1992    /// - Use `build()` to load all system fonts
1993    /// 
1994    /// # Arguments
1995    /// * `families` - Font family names to load (e.g., ["Arial", "sans-serif"])
1996    /// 
1997    /// # Example
1998    /// ```ignore
1999    /// // Only load Arial and sans-serif fallback fonts
2000    /// let cache = FcFontCache::build_with_families(&["Arial", "sans-serif"]);
2001    /// ```
2002    #[cfg(all(feature = "std", feature = "parsing"))]
2003    pub fn build_with_families(families: &[impl AsRef<str>]) -> Self {
2004        // Expand generic families to OS-specific names
2005        let os = OperatingSystem::current();
2006        let mut target_families: Vec<String> = Vec::new();
2007        
2008        for family in families {
2009            let family_str = family.as_ref();
2010            let expanded = os.expand_generic_family(family_str, &[]);
2011            if expanded.is_empty() || (expanded.len() == 1 && expanded[0] == family_str) {
2012                target_families.push(family_str.to_string());
2013            } else {
2014                target_families.extend(expanded);
2015            }
2016        }
2017        
2018        Self::build_inner(Some(&target_families))
2019    }
2020    
2021    /// Inner build function that handles both filtered and unfiltered font loading.
2022    /// 
2023    /// # Arguments
2024    /// * `family_filter` - If Some, only load fonts matching these family names.
2025    ///                     If None, load all fonts.
2026    #[cfg(all(feature = "std", feature = "parsing"))]
2027    fn build_inner(family_filter: Option<&[String]>) -> Self {
2028        let cache = FcFontCache::default();
2029
2030        // Normalize filter families for matching
2031        let filter_normalized: Option<Vec<String>> = family_filter.map(|families| {
2032            families
2033                .iter()
2034                .map(|f| crate::utils::normalize_family_name(f))
2035                .collect()
2036        });
2037
2038        // Helper closure to check if a pattern matches the filter
2039        let matches_filter = |pattern: &FcPattern| -> bool {
2040            match &filter_normalized {
2041                None => true, // No filter = accept all
2042                Some(targets) => {
2043                    pattern.name.as_ref().map_or(false, |name| {
2044                        let name_norm = crate::utils::normalize_family_name(name);
2045                        targets.iter().any(|target| name_norm.contains(target))
2046                    }) || pattern.family.as_ref().map_or(false, |family| {
2047                        let family_norm = crate::utils::normalize_family_name(family);
2048                        targets.iter().any(|target| family_norm.contains(target))
2049                    })
2050                }
2051            }
2052        };
2053
2054        let mut state = cache.state_write();
2055
2056        #[cfg(target_os = "linux")]
2057        {
2058            if let Some((font_entries, render_configs)) = FcScanDirectories() {
2059                for (mut pattern, path) in font_entries {
2060                    if matches_filter(&pattern) {
2061                        // Apply per-font render config if a matching family rule exists
2062                        if let Some(family) = pattern.name.as_ref().or(pattern.family.as_ref()) {
2063                            if let Some(rc) = render_configs.get(family) {
2064                                pattern.render_config = rc.clone();
2065                            }
2066                        }
2067                        let id = FontId::new();
2068                        state.patterns.insert(pattern.clone(), id);
2069                        state.metadata.insert(id, pattern.clone());
2070                        state.disk_fonts.insert(id, path);
2071                        state.index_pattern_tokens(&pattern, id);
2072                    }
2073                }
2074            }
2075        }
2076
2077        #[cfg(target_os = "windows")]
2078        {
2079            let system_root = std::env::var("SystemRoot")
2080                .or_else(|_| std::env::var("WINDIR"))
2081                .unwrap_or_else(|_| "C:\\Windows".to_string());
2082
2083            let user_profile = std::env::var("USERPROFILE")
2084                .unwrap_or_else(|_| "C:\\Users\\Default".to_string());
2085
2086            let font_dirs = vec![
2087                (None, format!("{}\\Fonts\\", system_root)),
2088                (None, format!("{}\\AppData\\Local\\Microsoft\\Windows\\Fonts\\", user_profile)),
2089            ];
2090
2091            let font_entries = FcScanDirectoriesInner(&font_dirs);
2092            for (pattern, path) in font_entries {
2093                if matches_filter(&pattern) {
2094                    let id = FontId::new();
2095                    state.patterns.insert(pattern.clone(), id);
2096                    state.metadata.insert(id, pattern.clone());
2097                    state.disk_fonts.insert(id, path);
2098                    state.index_pattern_tokens(&pattern, id);
2099                }
2100            }
2101        }
2102
2103        #[cfg(target_os = "macos")]
2104        {
2105            let font_dirs = vec![
2106                (None, "~/Library/Fonts".to_owned()),
2107                (None, "/System/Library/Fonts".to_owned()),
2108                (None, "/Library/Fonts".to_owned()),
2109                (None, "/System/Library/AssetsV2".to_owned()),
2110            ];
2111
2112            let font_entries = FcScanDirectoriesInner(&font_dirs);
2113            for (pattern, path) in font_entries {
2114                if matches_filter(&pattern) {
2115                    let id = FontId::new();
2116                    state.patterns.insert(pattern.clone(), id);
2117                    state.metadata.insert(id, pattern.clone());
2118                    state.disk_fonts.insert(id, path);
2119                    state.index_pattern_tokens(&pattern, id);
2120                }
2121            }
2122        }
2123
2124        // iOS: the app sandbox denies a plain `read_dir` on `/System/Library/...`,
2125        // but `CTFontManagerCopyAvailableFontURLs` returns sandbox-mediated
2126        // `CFURL`s that *are* openable. We enumerate via CoreText, then feed
2127        // each URL into the same `FcParseFont` path the desktop arms use.
2128        #[cfg(target_os = "ios")]
2129        {
2130            let font_files = crate::mobile_ios::copy_available_font_urls();
2131            let font_entries = FcParseFontFiles(&font_files);
2132            for (pattern, path) in font_entries {
2133                if matches_filter(&pattern) {
2134                    let id = FontId::new();
2135                    state.patterns.insert(pattern.clone(), id);
2136                    state.metadata.insert(id, pattern.clone());
2137                    state.disk_fonts.insert(id, path);
2138                    state.index_pattern_tokens(&pattern, id);
2139                }
2140            }
2141        }
2142
2143        // Android: system fonts live at world-readable paths. Vendor partitions
2144        // (`/product/fonts`, `/system_ext/fonts`) carry OEM-specific families
2145        // on Samsung One UI / MIUI / EMUI; `/data/fonts` is the per-user font
2146        // dir on recent ROMs.
2147        #[cfg(target_os = "android")]
2148        {
2149            let font_dirs = vec![
2150                (None, "/system/fonts".to_owned()),
2151                (None, "/product/fonts".to_owned()),
2152                (None, "/system_ext/fonts".to_owned()),
2153                (None, "/data/fonts".to_owned()),
2154            ];
2155
2156            let font_entries = FcScanDirectoriesInner(&font_dirs);
2157            for (pattern, path) in font_entries {
2158                if matches_filter(&pattern) {
2159                    let id = FontId::new();
2160                    state.patterns.insert(pattern.clone(), id);
2161                    state.metadata.insert(id, pattern.clone());
2162                    state.disk_fonts.insert(id, path);
2163                    state.index_pattern_tokens(&pattern, id);
2164                }
2165            }
2166        }
2167
2168        drop(state);
2169        cache
2170    }
2171    
2172    /// Check if a font ID is a memory font (preferred over disk fonts)
2173    pub fn is_memory_font(&self, id: &FontId) -> bool {
2174        self.state_read().memory_fonts.contains_key(id)
2175    }
2176
2177    /// Returns the list of fonts and font patterns.
2178    ///
2179    /// Returns owned `FcPattern` values (cloned out of the shared
2180    /// state) — this is the v4.1 API change described on
2181    /// [`FcFontCache`]. Callers that need to iterate without
2182    /// cloning should use [`FcFontCache::for_each_pattern`].
2183    pub fn list(&self) -> Vec<(FcPattern, FontId)> {
2184        self.state_read()
2185            .patterns
2186            .iter()
2187            .map(|(pattern, id)| (pattern.clone(), *id))
2188            .collect()
2189    }
2190
2191    /// Iterate over every `(pattern, id)` pair under a single read
2192    /// guard. `f` is called once per entry — avoids the per-entry
2193    /// clone that [`list`] incurs.
2194    pub fn for_each_pattern<F: FnMut(&FcPattern, &FontId)>(&self, mut f: F) {
2195        let state = self.state_read();
2196        for (pattern, id) in &state.patterns {
2197            f(pattern, id);
2198        }
2199    }
2200
2201    /// Returns true if the cache contains no font patterns
2202    pub fn is_empty(&self) -> bool {
2203        self.state_read().patterns.is_empty()
2204    }
2205
2206    /// Returns the number of font patterns in the cache
2207    pub fn len(&self) -> usize {
2208        self.state_read().patterns.len()
2209    }
2210
2211    /// Queries a font from the in-memory cache, returns the first found font (early return)
2212    /// Memory fonts are always preferred over disk fonts with the same match quality.
2213    pub fn query(&self, pattern: &FcPattern, trace: &mut Vec<TraceMsg>) -> Option<FontMatch> {
2214        let state = self.state_read();
2215        let mut matches = Vec::new();
2216
2217        for (stored_pattern, id) in &state.patterns {
2218            if Self::query_matches_internal(stored_pattern, pattern, trace) {
2219                let metadata = state.metadata.get(id).unwrap_or(stored_pattern);
2220
2221                // Calculate Unicode compatibility score
2222                let unicode_compatibility = if pattern.unicode_ranges.is_empty() {
2223                    // No specific Unicode requirements, use general coverage
2224                    Self::calculate_unicode_coverage(&metadata.unicode_ranges) as i32
2225                } else {
2226                    // Calculate how well this font covers the requested Unicode ranges
2227                    Self::calculate_unicode_compatibility(&pattern.unicode_ranges, &metadata.unicode_ranges)
2228                };
2229
2230                let style_score = Self::calculate_style_score(pattern, metadata);
2231
2232                // Memory fonts get a bonus to prefer them over disk fonts
2233                let is_memory = state.memory_fonts.contains_key(id);
2234
2235                matches.push((*id, unicode_compatibility, style_score, metadata.clone(), is_memory));
2236            }
2237        }
2238
2239        // Sort by: 1. Memory font (preferred), 2. Unicode compatibility, 3. Style score
2240        matches.sort_by(|a, b| {
2241            // Memory fonts first
2242            b.4.cmp(&a.4)
2243                .then_with(|| b.1.cmp(&a.1)) // Unicode compatibility (higher is better)
2244                .then_with(|| a.2.cmp(&b.2)) // Style score (lower is better)
2245        });
2246
2247        matches.first().map(|(id, _, _, metadata, _)| {
2248            FontMatch {
2249                id: *id,
2250                unicode_ranges: metadata.unicode_ranges.clone(),
2251                fallbacks: Vec::new(), // Fallbacks computed lazily via compute_fallbacks()
2252            }
2253        })
2254    }
2255
2256    /// Queries all fonts matching a pattern (internal use only).
2257    ///
2258    /// Note: This function is now private. Use resolve_font_chain() to build a font fallback chain,
2259    /// then call FontFallbackChain::query_for_text() to resolve fonts for specific text.
2260    fn query_internal(&self, pattern: &FcPattern, trace: &mut Vec<TraceMsg>) -> Vec<FontMatch> {
2261        let state = self.state_read();
2262        self.query_internal_locked(&state, pattern, trace)
2263    }
2264
2265    /// Internal variant used when the caller already holds a read
2266    /// guard on the state. Avoids re-locking.
2267    fn query_internal_locked(
2268        &self,
2269        state: &FcFontCacheInner,
2270        pattern: &FcPattern,
2271        trace: &mut Vec<TraceMsg>,
2272    ) -> Vec<FontMatch> {
2273        let mut matches = Vec::new();
2274
2275        for (stored_pattern, id) in &state.patterns {
2276            if Self::query_matches_internal(stored_pattern, pattern, trace) {
2277                let metadata = state.metadata.get(id).unwrap_or(stored_pattern);
2278
2279                // Calculate Unicode compatibility score
2280                let unicode_compatibility = if pattern.unicode_ranges.is_empty() {
2281                    Self::calculate_unicode_coverage(&metadata.unicode_ranges) as i32
2282                } else {
2283                    Self::calculate_unicode_compatibility(&pattern.unicode_ranges, &metadata.unicode_ranges)
2284                };
2285
2286                let style_score = Self::calculate_style_score(pattern, metadata);
2287                matches.push((*id, unicode_compatibility, style_score, metadata.clone()));
2288            }
2289        }
2290
2291        // Sort by style score (lowest first), THEN by Unicode compatibility (highest first)
2292        // Style matching (weight, italic, etc.) is now the primary criterion
2293        // Deterministic tiebreaker: prefer non-italic, then alphabetical by name
2294        matches.sort_by(|a, b| {
2295            a.2.cmp(&b.2) // Style score (lower is better)
2296                .then_with(|| b.1.cmp(&a.1)) // Unicode compatibility (higher is better)
2297                .then_with(|| a.3.italic.cmp(&b.3.italic)) // Prefer non-italic
2298                .then_with(|| a.3.name.cmp(&b.3.name)) // Alphabetical tiebreaker
2299        });
2300
2301        matches
2302            .into_iter()
2303            .map(|(id, _, _, metadata)| {
2304                FontMatch {
2305                    id,
2306                    unicode_ranges: metadata.unicode_ranges.clone(),
2307                    fallbacks: Vec::new(), // Fallbacks computed lazily via compute_fallbacks()
2308                }
2309            })
2310            .collect()
2311    }
2312
2313    /// Compute fallback fonts for a given font
2314    /// This is a lazy operation that can be expensive - only call when actually needed
2315    /// (e.g., for FFI or debugging, not needed for resolve_char)
2316    pub fn compute_fallbacks(
2317        &self,
2318        font_id: &FontId,
2319        trace: &mut Vec<TraceMsg>,
2320    ) -> Vec<FontMatchNoFallback> {
2321        let state = self.state_read();
2322        let pattern = match state.metadata.get(font_id) {
2323            Some(p) => p.clone(),
2324            None => return Vec::new(),
2325        };
2326        drop(state);
2327
2328        self.compute_fallbacks_for_pattern(&pattern, Some(font_id), trace)
2329    }
2330
2331    fn compute_fallbacks_for_pattern(
2332        &self,
2333        pattern: &FcPattern,
2334        exclude_id: Option<&FontId>,
2335        _trace: &mut Vec<TraceMsg>,
2336    ) -> Vec<FontMatchNoFallback> {
2337        let state = self.state_read();
2338        let mut candidates = Vec::new();
2339
2340        // Collect all potential fallbacks (excluding original pattern)
2341        for (stored_pattern, id) in &state.patterns {
2342            // Skip if this is the original font
2343            if exclude_id.is_some() && exclude_id.unwrap() == id {
2344                continue;
2345            }
2346
2347            // Check if this font supports any of the unicode ranges
2348            if !stored_pattern.unicode_ranges.is_empty() && !pattern.unicode_ranges.is_empty() {
2349                // Calculate Unicode compatibility
2350                let unicode_compatibility = Self::calculate_unicode_compatibility(
2351                    &pattern.unicode_ranges,
2352                    &stored_pattern.unicode_ranges
2353                );
2354
2355                // Only include if there's actual overlap
2356                if unicode_compatibility > 0 {
2357                    let style_score = Self::calculate_style_score(pattern, stored_pattern);
2358                    candidates.push((
2359                        FontMatchNoFallback {
2360                            id: *id,
2361                            unicode_ranges: stored_pattern.unicode_ranges.clone(),
2362                        },
2363                        unicode_compatibility,
2364                        style_score,
2365                        stored_pattern.clone(),
2366                    ));
2367                }
2368            } else if pattern.unicode_ranges.is_empty() && !stored_pattern.unicode_ranges.is_empty() {
2369                // No specific Unicode requirements, use general coverage
2370                let coverage = Self::calculate_unicode_coverage(&stored_pattern.unicode_ranges) as i32;
2371                let style_score = Self::calculate_style_score(pattern, stored_pattern);
2372                candidates.push((
2373                    FontMatchNoFallback {
2374                        id: *id,
2375                        unicode_ranges: stored_pattern.unicode_ranges.clone(),
2376                    },
2377                    coverage,
2378                    style_score,
2379                    stored_pattern.clone(),
2380                ));
2381            }
2382        }
2383
2384        drop(state);
2385
2386        // Sort by Unicode compatibility (highest first), THEN by style score (lowest first)
2387        candidates.sort_by(|a, b| {
2388            b.1.cmp(&a.1)
2389                .then_with(|| a.2.cmp(&b.2))
2390        });
2391
2392        // Deduplicate by keeping only the best match per unique unicode range
2393        let mut seen_ranges = Vec::new();
2394        let mut deduplicated = Vec::new();
2395
2396        for (id, _, _, pattern) in candidates {
2397            let mut is_new_range = false;
2398
2399            for range in &pattern.unicode_ranges {
2400                if !seen_ranges.iter().any(|r: &UnicodeRange| r.overlaps(range)) {
2401                    seen_ranges.push(*range);
2402                    is_new_range = true;
2403                }
2404            }
2405
2406            if is_new_range {
2407                deduplicated.push(id);
2408            }
2409        }
2410
2411        deduplicated
2412    }
2413
2414    /// Get in-memory font data (cloned out of the shared state).
2415    pub fn get_memory_font(&self, id: &FontId) -> Option<FcFont> {
2416        self.state_read().memory_fonts.get(id).cloned()
2417    }
2418
2419    /// Check if a pattern matches the query, with detailed tracing
2420    fn trace_path(k: &FcPattern) -> String {
2421        k.name.as_ref().cloned().unwrap_or_else(|| "<unknown>".to_string())
2422    }
2423
2424    pub fn query_matches_internal(
2425        k: &FcPattern,
2426        pattern: &FcPattern,
2427        trace: &mut Vec<TraceMsg>,
2428    ) -> bool {
2429        // Check name - substring match
2430        if let Some(ref name) = pattern.name {
2431            if !k.name.as_ref().map_or(false, |kn| kn.contains(name)) {
2432                trace.push(TraceMsg {
2433                    level: TraceLevel::Info,
2434                    path: Self::trace_path(k),
2435                    reason: MatchReason::NameMismatch {
2436                        requested: pattern.name.clone(),
2437                        found: k.name.clone(),
2438                    },
2439                });
2440                return false;
2441            }
2442        }
2443
2444        // Check family - substring match
2445        if let Some(ref family) = pattern.family {
2446            if !k.family.as_ref().map_or(false, |kf| kf.contains(family)) {
2447                trace.push(TraceMsg {
2448                    level: TraceLevel::Info,
2449                    path: Self::trace_path(k),
2450                    reason: MatchReason::FamilyMismatch {
2451                        requested: pattern.family.clone(),
2452                        found: k.family.clone(),
2453                    },
2454                });
2455                return false;
2456            }
2457        }
2458
2459        // Check style properties
2460        let style_properties = [
2461            (
2462                "italic",
2463                pattern.italic.needs_to_match(),
2464                pattern.italic.matches(&k.italic),
2465            ),
2466            (
2467                "oblique",
2468                pattern.oblique.needs_to_match(),
2469                pattern.oblique.matches(&k.oblique),
2470            ),
2471            (
2472                "bold",
2473                pattern.bold.needs_to_match(),
2474                pattern.bold.matches(&k.bold),
2475            ),
2476            (
2477                "monospace",
2478                pattern.monospace.needs_to_match(),
2479                pattern.monospace.matches(&k.monospace),
2480            ),
2481            (
2482                "condensed",
2483                pattern.condensed.needs_to_match(),
2484                pattern.condensed.matches(&k.condensed),
2485            ),
2486        ];
2487
2488        for (property_name, needs_to_match, matches) in style_properties {
2489            if needs_to_match && !matches {
2490                let (requested, found) = match property_name {
2491                    "italic" => (format!("{:?}", pattern.italic), format!("{:?}", k.italic)),
2492                    "oblique" => (format!("{:?}", pattern.oblique), format!("{:?}", k.oblique)),
2493                    "bold" => (format!("{:?}", pattern.bold), format!("{:?}", k.bold)),
2494                    "monospace" => (
2495                        format!("{:?}", pattern.monospace),
2496                        format!("{:?}", k.monospace),
2497                    ),
2498                    "condensed" => (
2499                        format!("{:?}", pattern.condensed),
2500                        format!("{:?}", k.condensed),
2501                    ),
2502                    _ => (String::new(), String::new()),
2503                };
2504
2505                trace.push(TraceMsg {
2506                    level: TraceLevel::Info,
2507                    path: Self::trace_path(k),
2508                    reason: MatchReason::StyleMismatch {
2509                        property: property_name,
2510                        requested,
2511                        found,
2512                    },
2513                });
2514                return false;
2515            }
2516        }
2517
2518        // Check weight - hard filter if non-normal weight is requested
2519        if pattern.weight != FcWeight::Normal && pattern.weight != k.weight {
2520            trace.push(TraceMsg {
2521                level: TraceLevel::Info,
2522                path: Self::trace_path(k),
2523                reason: MatchReason::WeightMismatch {
2524                    requested: pattern.weight,
2525                    found: k.weight,
2526                },
2527            });
2528            return false;
2529        }
2530
2531        // Check stretch - hard filter if non-normal stretch is requested
2532        if pattern.stretch != FcStretch::Normal && pattern.stretch != k.stretch {
2533            trace.push(TraceMsg {
2534                level: TraceLevel::Info,
2535                path: Self::trace_path(k),
2536                reason: MatchReason::StretchMismatch {
2537                    requested: pattern.stretch,
2538                    found: k.stretch,
2539                },
2540            });
2541            return false;
2542        }
2543
2544        // Check unicode ranges if specified
2545        if !pattern.unicode_ranges.is_empty() {
2546            let mut has_overlap = false;
2547
2548            for p_range in &pattern.unicode_ranges {
2549                for k_range in &k.unicode_ranges {
2550                    if p_range.overlaps(k_range) {
2551                        has_overlap = true;
2552                        break;
2553                    }
2554                }
2555                if has_overlap {
2556                    break;
2557                }
2558            }
2559
2560            if !has_overlap {
2561                trace.push(TraceMsg {
2562                    level: TraceLevel::Info,
2563                    path: Self::trace_path(k),
2564                    reason: MatchReason::UnicodeRangeMismatch {
2565                        character: '\0', // No specific character to report
2566                        ranges: k.unicode_ranges.clone(),
2567                    },
2568                });
2569                return false;
2570            }
2571        }
2572
2573        true
2574    }
2575    
2576    /// Resolve a complete font fallback chain for a CSS font-family stack
2577    /// This is the main entry point for font resolution with caching
2578    /// Automatically expands generic CSS families (serif, sans-serif, monospace) to OS-specific fonts
2579    /// 
2580    /// # Arguments
2581    /// * `font_families` - CSS font-family stack (e.g., ["Arial", "sans-serif"])
2582    /// * `text` - The text to render (used to extract Unicode ranges)
2583    /// * `weight` - Font weight
2584    /// * `italic` - Italic style requirement
2585    /// * `oblique` - Oblique style requirement
2586    /// * `trace` - Debug trace messages
2587    /// 
2588    /// # Returns
2589    /// A complete font fallback chain with CSS fallbacks and Unicode fallbacks
2590    /// 
2591    /// # Example
2592    /// ```no_run
2593    /// # use rust_fontconfig::{FcFontCache, FcWeight, PatternMatch};
2594    /// let cache = FcFontCache::build();
2595    /// let families = vec!["Arial".to_string(), "sans-serif".to_string()];
2596    /// let chain = cache.resolve_font_chain(&families, FcWeight::Normal, 
2597    ///                                       PatternMatch::DontCare, PatternMatch::DontCare, 
2598    ///                                       &mut Vec::new());
2599    /// // On macOS: families expanded to ["Arial", "San Francisco", "Helvetica Neue", "Lucida Grande"]
2600    /// ```
2601    #[cfg(feature = "std")]
2602    pub fn resolve_font_chain(
2603        &self,
2604        font_families: &[String],
2605        weight: FcWeight,
2606        italic: PatternMatch,
2607        oblique: PatternMatch,
2608        trace: &mut Vec<TraceMsg>,
2609    ) -> FontFallbackChain {
2610        self.resolve_font_chain_with_os(font_families, weight, italic, oblique, trace, OperatingSystem::current())
2611    }
2612    
2613    /// Resolve font chain with explicit OS specification (useful for testing)
2614    #[cfg(feature = "std")]
2615    pub fn resolve_font_chain_with_os(
2616        &self,
2617        font_families: &[String],
2618        weight: FcWeight,
2619        italic: PatternMatch,
2620        oblique: PatternMatch,
2621        trace: &mut Vec<TraceMsg>,
2622        os: OperatingSystem,
2623    ) -> FontFallbackChain {
2624        self.resolve_font_chain_impl(font_families, weight, italic, oblique, None, trace, os)
2625    }
2626
2627    /// Resolve a font fallback chain, restricting Unicode fallbacks to the
2628    /// caller-supplied set of scripts (usually derived from the actual
2629    /// text content of the document).
2630    ///
2631    /// - `scripts_hint: None` → back-compat behaviour, equivalent to
2632    ///   [`FcFontCache::resolve_font_chain`]: pulls in fallback fonts for
2633    ///   the full [`DEFAULT_UNICODE_FALLBACK_SCRIPTS`] set.
2634    /// - `scripts_hint: Some(&[])` → no Unicode fallbacks attached. For
2635    ///   an ASCII-only page this avoids pulling Arial Unicode MS,
2636    ///   CJK fonts, etc. into memory when they're not needed.
2637    /// - `scripts_hint: Some(&[CJK])` → only CJK fallback attached.
2638    ///
2639    /// The chain cache is keyed so an ASCII-only resolution cannot be
2640    /// served from a slot populated by a default/all-scripts resolution.
2641    #[cfg(feature = "std")]
2642    pub fn resolve_font_chain_with_scripts(
2643        &self,
2644        font_families: &[String],
2645        weight: FcWeight,
2646        italic: PatternMatch,
2647        oblique: PatternMatch,
2648        scripts_hint: Option<&[UnicodeRange]>,
2649        trace: &mut Vec<TraceMsg>,
2650    ) -> FontFallbackChain {
2651        self.resolve_font_chain_impl(
2652            font_families, weight, italic, oblique, scripts_hint,
2653            trace, OperatingSystem::current(),
2654        )
2655    }
2656
2657    /// Shared entry used by [`resolve_font_chain_with_os`] and
2658    /// [`resolve_font_chain_with_scripts`]. Handles the cache lookup,
2659    /// generic-family expansion, and delegation to the uncached builder.
2660    #[cfg(feature = "std")]
2661    fn resolve_font_chain_impl(
2662        &self,
2663        font_families: &[String],
2664        weight: FcWeight,
2665        italic: PatternMatch,
2666        oblique: PatternMatch,
2667        scripts_hint: Option<&[UnicodeRange]>,
2668        trace: &mut Vec<TraceMsg>,
2669        os: OperatingSystem,
2670    ) -> FontFallbackChain {
2671        // Check cache FIRST - key uses original (unexpanded) families
2672        // plus a hash over the scripts_hint so ASCII-only callers don't
2673        // consume a slot filled by a default-scripts caller.
2674        let scripts_hint_hash = scripts_hint.map(hash_scripts_hint);
2675        let cache_key = FontChainCacheKey {
2676            font_families: font_families.to_vec(),
2677            weight,
2678            italic,
2679            oblique,
2680            scripts_hint_hash,
2681        };
2682
2683        if let Some(cached) = self
2684            .shared
2685            .chain_cache
2686            .lock()
2687            .ok()
2688            .and_then(|c| c.get(&cache_key).cloned())
2689        {
2690            return cached;
2691        }
2692
2693        // Expand generic CSS families to OS-specific fonts
2694        let expanded_families = expand_font_families(font_families, os, &[]);
2695
2696        // Keep the originally-requested generic families ("serif",
2697        // "sans-serif", "monospace", ...) around. The expansion above turns
2698        // them into a hardcoded list of real OS font names and drops the
2699        // generic name itself; the chain builder uses this list to fall back
2700        // to *registered* fonts when none of those OS names exist (wasm,
2701        // headless caches, or an embedder that only registered an in-memory
2702        // bundled font). See `resolve_font_chain_uncached`.
2703        let generic_fallbacks: Vec<String> = font_families
2704            .iter()
2705            .filter(|f| config::is_generic_family(f))
2706            .cloned()
2707            .collect();
2708
2709        // Build the chain
2710        let chain = self.resolve_font_chain_uncached(
2711            &expanded_families,
2712            &generic_fallbacks,
2713            weight,
2714            italic,
2715            oblique,
2716            scripts_hint,
2717            trace,
2718        );
2719
2720        // Cache the result
2721        if let Ok(mut cache) = self.shared.chain_cache.lock() {
2722            cache.insert(cache_key, chain.clone());
2723        }
2724
2725        chain
2726    }
2727    
2728    /// Internal implementation without caching.
2729    ///
2730    /// `scripts_hint`:
2731    /// - `None` pulls in the full [`DEFAULT_UNICODE_FALLBACK_SCRIPTS`]
2732    ///   set (the original, back-compat behaviour).
2733    /// - `Some(&[])` attaches no Unicode fallbacks.
2734    /// - `Some(ranges)` attaches fallbacks only for those ranges.
2735    #[cfg(feature = "std")]
2736    fn resolve_font_chain_uncached(
2737        &self,
2738        font_families: &[String],
2739        generic_fallbacks: &[String],
2740        weight: FcWeight,
2741        italic: PatternMatch,
2742        oblique: PatternMatch,
2743        scripts_hint: Option<&[UnicodeRange]>,
2744        trace: &mut Vec<TraceMsg>,
2745    ) -> FontFallbackChain {
2746        let mut css_fallbacks = Vec::new();
2747        
2748        // Resolve each CSS font-family to its system fallbacks
2749        for (_i, family) in font_families.iter().enumerate() {
2750            // Check if this is a generic font family
2751            let (pattern, is_generic) = if config::is_generic_family(family) {
2752                let monospace = if family.eq_ignore_ascii_case("monospace") {
2753                    PatternMatch::True
2754                } else {
2755                    PatternMatch::False
2756                };
2757                let pattern = FcPattern {
2758                    name: None,
2759                    weight,
2760                    italic,
2761                    oblique,
2762                    monospace,
2763                    unicode_ranges: Vec::new(),
2764                    ..Default::default()
2765                };
2766                (pattern, true)
2767            } else {
2768                // Specific font family name
2769                let pattern = FcPattern {
2770                    name: Some(family.clone()),
2771                    weight,
2772                    italic,
2773                    oblique,
2774                    unicode_ranges: Vec::new(),
2775                    ..Default::default()
2776                };
2777                (pattern, false)
2778            };
2779            
2780            // Use fuzzy matching for specific fonts (fast token-based lookup)
2781            // For generic families, use query (slower but necessary for property matching)
2782            let mut matches = if is_generic {
2783                // Generic families need full pattern matching
2784                self.query_internal(&pattern, trace)
2785            } else {
2786                // Specific font names: use fast token-based fuzzy matching
2787                self.fuzzy_query_by_name(family, weight, italic, oblique, &[], trace)
2788            };
2789            
2790            // For generic families, limit to top 5 fonts to avoid too many matches
2791            if is_generic && matches.len() > 5 {
2792                matches.truncate(5);
2793            }
2794            
2795            // Always add the CSS fallback group to preserve CSS ordering
2796            // even if no fonts were found for this family
2797            css_fallbacks.push(CssFallbackGroup {
2798                css_name: family.clone(),
2799                fonts: matches,
2800            });
2801        }
2802
2803        // Headless / wasm / memory-only fallback.
2804        //
2805        // Generic CSS families ("serif"/"sans-serif"/"monospace"/...) were
2806        // expanded by the caller to a hardcoded list of real OS font names.
2807        // On a system that actually has those fonts the loop above matched
2808        // them and we're done. But on wasm, a headless cache, or an embedder
2809        // that only registered an in-memory bundled font, NONE of those OS
2810        // names exist — and the original generic name was dropped, so a
2811        // registered font (whatever its family name) would never be reached.
2812        //
2813        // So: if the whole expanded stack matched nothing at all, retry each
2814        // originally-requested generic family as a generic `name: None`
2815        // query, which any registered font can satisfy. This runs ONLY when
2816        // nothing else matched, so on systems with real fonts it adds nothing
2817        // and never reorders real matches (any such fallback must come AFTER
2818        // real matches).
2819        if !generic_fallbacks.is_empty()
2820            && css_fallbacks.iter().all(|g| g.fonts.is_empty())
2821        {
2822            for generic in generic_fallbacks {
2823                let monospace = if generic.eq_ignore_ascii_case("monospace") {
2824                    PatternMatch::True
2825                } else {
2826                    PatternMatch::False
2827                };
2828                let pattern = FcPattern {
2829                    name: None,
2830                    weight,
2831                    italic,
2832                    oblique,
2833                    monospace,
2834                    unicode_ranges: Vec::new(),
2835                    ..Default::default()
2836                };
2837                let mut matches = self.query_internal(&pattern, trace);
2838                if matches.len() > 5 {
2839                    matches.truncate(5);
2840                }
2841                if !matches.is_empty() {
2842                    css_fallbacks.push(CssFallbackGroup {
2843                        css_name: generic.clone(),
2844                        fonts: matches,
2845                    });
2846                }
2847            }
2848        }
2849
2850        // Populate unicode_fallbacks. CSS fallback fonts may falsely claim
2851        // coverage of a script via the OS/2 unicode-range bits without
2852        // actually having glyphs, so we supplement the CSS chain with an
2853        // explicit lookup for each requested script block. resolve_char()
2854        // prefers CSS fallbacks first (earlier in the chain wins).
2855        //
2856        // The set of script blocks to cover is caller-controlled via
2857        // `scripts_hint`: `None` keeps the back-compat DEFAULT_UNICODE_FALLBACK_SCRIPTS
2858        // behaviour (7 scripts) so existing `resolve_font_chain` consumers
2859        // stay unchanged; `Some(&[])` opts into "no unicode fallbacks at all"
2860        // for ASCII-only documents, eliminating the big CJK / Arabic fonts
2861        // from the resolved chain (and therefore from eager downstream parses).
2862        let important_ranges: &[UnicodeRange] =
2863            scripts_hint.unwrap_or(DEFAULT_UNICODE_FALLBACK_SCRIPTS);
2864        let unicode_fallbacks = if important_ranges.is_empty() {
2865            Vec::new()
2866        } else {
2867            let all_uncovered = vec![false; important_ranges.len()];
2868            self.find_unicode_fallbacks(
2869                important_ranges,
2870                &all_uncovered,
2871                &css_fallbacks,
2872                weight,
2873                italic,
2874                oblique,
2875                trace,
2876            )
2877        };
2878
2879        // WEB-LIFT LAST-RESORT (2026-06-03; the `with_memory_fonts` trap that previously made
2880        // editing this file fatal is now fixed by the byte-atomic remill fork support). In the
2881        // lifted web backend `find_unicode_fallbacks` returns 0 fonts even though one IS
2882        // registered (the matching/iteration mis-lifts), so BOTH chain lists come back empty →
2883        // every consumer (resolve_char, query_for_text, prune_chain_to_used_chars) sees no font
2884        // → the layout unwraps a None → OOB. When the chain would be empty, append the first
2885        // registered font so the chain is non-empty. Native chains are never empty here.
2886        let mut unicode_fallbacks = unicode_fallbacks;
2887        if css_fallbacks.is_empty() && unicode_fallbacks.is_empty() {
2888            let st = self.state_read();
2889            if let Some((pat, id)) = st.patterns.iter().next() {
2890                unicode_fallbacks.push(FontMatch {
2891                    id: *id,
2892                    unicode_ranges: pat.unicode_ranges.clone(),
2893                    fallbacks: Vec::new(),
2894                });
2895            }
2896        }
2897
2898        FontFallbackChain {
2899            css_fallbacks,
2900            unicode_fallbacks,
2901            original_stack: font_families.to_vec(),
2902        }
2903    }
2904
2905    /// Extract Unicode ranges from text
2906    #[allow(dead_code)]
2907    fn extract_unicode_ranges(text: &str) -> Vec<UnicodeRange> {
2908        let mut chars: Vec<char> = text.chars().collect();
2909        chars.sort_unstable();
2910        chars.dedup();
2911        
2912        if chars.is_empty() {
2913            return Vec::new();
2914        }
2915        
2916        let mut ranges = Vec::new();
2917        let mut range_start = chars[0] as u32;
2918        let mut range_end = range_start;
2919        
2920        for &c in &chars[1..] {
2921            let codepoint = c as u32;
2922            if codepoint == range_end + 1 {
2923                range_end = codepoint;
2924            } else {
2925                ranges.push(UnicodeRange { start: range_start, end: range_end });
2926                range_start = codepoint;
2927                range_end = codepoint;
2928            }
2929        }
2930        
2931        ranges.push(UnicodeRange { start: range_start, end: range_end });
2932        ranges
2933    }
2934    
2935    /// Fuzzy query for fonts by name when exact match fails
2936    /// Uses intelligent token-based matching with inverted index for speed:
2937    /// 1. Break name into tokens (e.g., "NotoSansJP" -> ["noto", "sans", "jp"])
2938    /// 2. Use token_index to find candidate fonts via BTreeSet intersection
2939    /// 3. Score only the candidate fonts (instead of all 800+ patterns)
2940    /// 4. Prioritize fonts matching more tokens + Unicode coverage
2941    #[cfg(feature = "std")]
2942    fn fuzzy_query_by_name(
2943        &self,
2944        requested_name: &str,
2945        weight: FcWeight,
2946        italic: PatternMatch,
2947        oblique: PatternMatch,
2948        unicode_ranges: &[UnicodeRange],
2949        _trace: &mut Vec<TraceMsg>,
2950    ) -> Vec<FontMatch> {
2951        // Extract tokens from the requested name (e.g., "NotoSansJP" -> ["noto", "sans", "jp"])
2952        let tokens = Self::extract_font_name_tokens(requested_name);
2953        
2954        if tokens.is_empty() {
2955            return Vec::new();
2956        }
2957        
2958        // Convert tokens to lowercase for case-insensitive lookup
2959        let tokens_lower: Vec<String> = tokens.iter().map(|t| t.to_ascii_lowercase()).collect();
2960        
2961        // Progressive token matching strategy:
2962        // Start with first token, then progressively narrow down with each additional token
2963        // If adding a token results in 0 matches, use the previous (broader) set
2964        // Example: ["Noto"] -> 10 fonts, ["Noto","Sans"] -> 2 fonts, ["Noto","Sans","JP"] -> 0 fonts => use 2 fonts
2965        
2966        let state = self.state_read();
2967
2968        // Start with the first token
2969        let first_token = &tokens_lower[0];
2970        let mut candidate_ids = match state.token_index.get(first_token) {
2971            Some(ids) if !ids.is_empty() => ids.clone(),
2972            _ => {
2973                // First token not found - no fonts match, quit immediately
2974                return Vec::new();
2975            }
2976        };
2977
2978        // Progressively narrow down with each additional token
2979        for token in &tokens_lower[1..] {
2980            if let Some(token_ids) = state.token_index.get(token) {
2981                // Calculate intersection
2982                let intersection: alloc::collections::BTreeSet<FontId> =
2983                    candidate_ids.intersection(token_ids).copied().collect();
2984
2985                if intersection.is_empty() {
2986                    // Adding this token results in 0 matches - keep previous set and stop
2987                    break;
2988                } else {
2989                    // Successfully narrowed down - use intersection
2990                    candidate_ids = intersection;
2991                }
2992            } else {
2993                // Token not in index - keep current set and stop
2994                break;
2995            }
2996        }
2997
2998        // Now score only the candidate fonts (HUGE speedup!)
2999        let mut candidates = Vec::new();
3000
3001        for id in candidate_ids {
3002            let pattern = match state.metadata.get(&id) {
3003                Some(p) => p,
3004                None => continue,
3005            };
3006            
3007            // Get pre-tokenized font name (already lowercase)
3008            let font_tokens_lower = match state.font_tokens.get(&id) {
3009                Some(tokens) => tokens,
3010                None => continue,
3011            };
3012            
3013            if font_tokens_lower.is_empty() {
3014                continue;
3015            }
3016            
3017            // Calculate token match score (how many requested tokens appear in font name)
3018            // Both tokens_lower and font_tokens_lower are already lowercase, so direct comparison
3019            let token_matches = tokens_lower.iter()
3020                .filter(|req_token| {
3021                    font_tokens_lower.iter().any(|font_token| {
3022                        // Both already lowercase — exact token match (index guarantees candidates)
3023                        font_token == *req_token
3024                    })
3025                })
3026                .count();
3027            
3028            // Skip if no tokens match (shouldn't happen due to index, but safety check)
3029            if token_matches == 0 {
3030                continue;
3031            }
3032            
3033            // Calculate token similarity score (0-100)
3034            let token_similarity = (token_matches * 100 / tokens.len()) as i32;
3035            
3036            // Calculate Unicode range similarity
3037            let unicode_similarity = if !unicode_ranges.is_empty() && !pattern.unicode_ranges.is_empty() {
3038                Self::calculate_unicode_compatibility(unicode_ranges, &pattern.unicode_ranges)
3039            } else {
3040                0
3041            };
3042            
3043            // CRITICAL: If we have Unicode requirements, ONLY accept fonts that cover them
3044            // A font with great name match but no Unicode coverage is useless
3045            if !unicode_ranges.is_empty() && unicode_similarity == 0 {
3046                continue;
3047            }
3048            
3049            let style_score = Self::calculate_style_score(&FcPattern {
3050                weight,
3051                italic,
3052                oblique,
3053                ..Default::default()
3054            }, pattern);
3055            
3056            candidates.push((
3057                id,
3058                token_similarity,
3059                unicode_similarity,
3060                style_score,
3061                pattern.clone(),
3062            ));
3063        }
3064        
3065        // Sort by:
3066        // 1. Token matches (more matches = better)
3067        // 2. Unicode compatibility (if ranges provided)
3068        // 3. Style score (lower is better)
3069        // 4. Deterministic tiebreaker: prefer non-italic, then by font name
3070        candidates.sort_by(|a, b| {
3071            if !unicode_ranges.is_empty() {
3072                // When we have Unicode requirements, prioritize coverage
3073                b.1.cmp(&a.1) // Token similarity (higher is better) - PRIMARY
3074                    .then_with(|| b.2.cmp(&a.2)) // Unicode similarity (higher is better) - SECONDARY
3075                    .then_with(|| a.3.cmp(&b.3)) // Style score (lower is better) - TERTIARY
3076                    .then_with(|| a.4.italic.cmp(&b.4.italic)) // Prefer non-italic (False < True)
3077                    .then_with(|| a.4.name.cmp(&b.4.name)) // Alphabetical by name
3078            } else {
3079                // No Unicode requirements, token similarity is primary
3080                b.1.cmp(&a.1) // Token similarity (higher is better)
3081                    .then_with(|| a.3.cmp(&b.3)) // Style score (lower is better)
3082                    .then_with(|| a.4.italic.cmp(&b.4.italic)) // Prefer non-italic (False < True)
3083                    .then_with(|| a.4.name.cmp(&b.4.name)) // Alphabetical by name
3084            }
3085        });
3086        
3087        // Take top 5 matches
3088        candidates.truncate(5);
3089        
3090        // Convert to FontMatch
3091        candidates
3092            .into_iter()
3093            .map(|(id, _token_sim, _unicode_sim, _style, pattern)| {
3094                FontMatch {
3095                    id,
3096                    unicode_ranges: pattern.unicode_ranges.clone(),
3097                    fallbacks: Vec::new(), // Fallbacks computed lazily via compute_fallbacks()
3098                }
3099            })
3100            .collect()
3101    }
3102    
3103    /// Extract tokens from a font name
3104    /// E.g., "NotoSansJP" -> ["Noto", "Sans", "JP"]
3105    /// E.g., "Noto Sans CJK JP" -> ["Noto", "Sans", "CJK", "JP"]
3106    pub fn extract_font_name_tokens(name: &str) -> Vec<String> {
3107        let mut tokens = Vec::new();
3108        let mut current_token = String::new();
3109        let mut last_was_lower = false;
3110        
3111        for c in name.chars() {
3112            if c.is_whitespace() || c == '-' || c == '_' {
3113                // Word separator
3114                if !current_token.is_empty() {
3115                    tokens.push(current_token.clone());
3116                    current_token.clear();
3117                }
3118                last_was_lower = false;
3119            } else if c.is_uppercase() && last_was_lower && !current_token.is_empty() {
3120                // CamelCase boundary (e.g., "Noto" | "Sans")
3121                tokens.push(current_token.clone());
3122                current_token.clear();
3123                current_token.push(c);
3124                last_was_lower = false;
3125            } else {
3126                current_token.push(c);
3127                last_was_lower = c.is_lowercase();
3128            }
3129        }
3130        
3131        if !current_token.is_empty() {
3132            tokens.push(current_token);
3133        }
3134        
3135        tokens
3136    }
3137    
3138    /// Find fonts to cover missing Unicode ranges
3139    /// Uses intelligent matching: prefers fonts with similar names to existing ones
3140    /// Early quits once all Unicode ranges are covered for performance
3141    fn find_unicode_fallbacks(
3142        &self,
3143        unicode_ranges: &[UnicodeRange],
3144        covered_chars: &[bool],
3145        existing_groups: &[CssFallbackGroup],
3146        _weight: FcWeight,
3147        _italic: PatternMatch,
3148        _oblique: PatternMatch,
3149        trace: &mut Vec<TraceMsg>,
3150    ) -> Vec<FontMatch> {
3151        // Extract uncovered ranges
3152        let mut uncovered_ranges = Vec::new();
3153        for (i, &covered) in covered_chars.iter().enumerate() {
3154            if !covered && i < unicode_ranges.len() {
3155                uncovered_ranges.push(unicode_ranges[i].clone());
3156            }
3157        }
3158        
3159        if uncovered_ranges.is_empty() {
3160            return Vec::new();
3161        }
3162
3163        // Query for fonts that cover these ranges.
3164        // Use DontCare for weight/italic/oblique — we want ANY font that covers
3165        // the missing characters, regardless of style. The similarity sort below
3166        // will prefer fonts matching the existing chain's style anyway.
3167        let pattern = FcPattern {
3168            name: None,
3169            weight: FcWeight::Normal, // Normal weight is not filtered by query_matches_internal (line 1836)
3170            italic: PatternMatch::DontCare,
3171            oblique: PatternMatch::DontCare,
3172            unicode_ranges: uncovered_ranges.clone(),
3173            ..Default::default()
3174        };
3175        
3176        let mut candidates = self.query_internal(&pattern, trace);
3177
3178        // Intelligent sorting: prefer fonts with similar names to existing ones
3179        // Extract font family prefixes from existing fonts (e.g., "Noto Sans" from "Noto Sans JP")
3180        let existing_prefixes: Vec<String> = existing_groups
3181            .iter()
3182            .flat_map(|group| {
3183                group.fonts.iter().filter_map(|font| {
3184                    self.get_metadata_by_id(&font.id)
3185                        .and_then(|meta| meta.family.clone())
3186                        .and_then(|family| {
3187                            // Extract prefix (e.g., "Noto Sans" from "Noto Sans JP")
3188                            family.split_whitespace()
3189                                .take(2)
3190                                .collect::<Vec<_>>()
3191                                .join(" ")
3192                                .into()
3193                        })
3194                })
3195            })
3196            .collect();
3197        
3198        // Sort candidates by:
3199        // 1. Name similarity to existing fonts (highest priority)
3200        // 2. Unicode coverage (secondary)
3201        candidates.sort_by(|a, b| {
3202            let a_meta = self.get_metadata_by_id(&a.id);
3203            let b_meta = self.get_metadata_by_id(&b.id);
3204
3205            let a_score = Self::calculate_font_similarity_score(a_meta.as_ref(), &existing_prefixes);
3206            let b_score = Self::calculate_font_similarity_score(b_meta.as_ref(), &existing_prefixes);
3207            
3208            b_score.cmp(&a_score) // Higher score = better match
3209                .then_with(|| {
3210                    let a_coverage = Self::calculate_unicode_compatibility(&uncovered_ranges, &a.unicode_ranges);
3211                    let b_coverage = Self::calculate_unicode_compatibility(&uncovered_ranges, &b.unicode_ranges);
3212                    b_coverage.cmp(&a_coverage)
3213                })
3214        });
3215        
3216        // Early quit optimization: only take fonts until all ranges are covered
3217        let mut result = Vec::new();
3218        let mut remaining_uncovered: Vec<bool> = vec![true; uncovered_ranges.len()];
3219        
3220        for candidate in candidates {
3221            // Check which ranges this font covers
3222            let mut covers_new_range = false;
3223            
3224            for (i, range) in uncovered_ranges.iter().enumerate() {
3225                if remaining_uncovered[i] {
3226                    // Check if this font covers this range
3227                    for font_range in &candidate.unicode_ranges {
3228                        if font_range.overlaps(range) {
3229                            remaining_uncovered[i] = false;
3230                            covers_new_range = true;
3231                            break;
3232                        }
3233                    }
3234                }
3235            }
3236            
3237            // Only add fonts that cover at least one new range
3238            if covers_new_range {
3239                result.push(candidate);
3240                
3241                // Early quit: if all ranges are covered, stop
3242                if remaining_uncovered.iter().all(|&uncovered| !uncovered) {
3243                    break;
3244                }
3245            }
3246        }
3247        
3248        result
3249    }
3250    
3251    /// Calculate similarity score between a font and existing font prefixes
3252    /// Higher score = more similar
3253    fn calculate_font_similarity_score(
3254        font_meta: Option<&FcPattern>,
3255        existing_prefixes: &[String],
3256    ) -> i32 {
3257        let Some(meta) = font_meta else { return 0; };
3258        let Some(family) = &meta.family else { return 0; };
3259        
3260        // Check if this font's family matches any existing prefix
3261        for prefix in existing_prefixes {
3262            if family.starts_with(prefix) {
3263                return 100; // Strong match
3264            }
3265            if family.contains(prefix) {
3266                return 50; // Partial match
3267            }
3268        }
3269        
3270        0 // No match
3271    }
3272    
3273    /// Find fallback fonts for a given pattern
3274    // Helper to calculate total unicode coverage
3275    pub fn calculate_unicode_coverage(ranges: &[UnicodeRange]) -> u64 {
3276        ranges
3277            .iter()
3278            .map(|range| (range.end - range.start + 1) as u64)
3279            .sum()
3280    }
3281
3282    /// Calculate how well a font's Unicode ranges cover the requested ranges
3283    /// Returns a compatibility score (higher is better, 0 means no overlap)
3284    pub fn calculate_unicode_compatibility(
3285        requested: &[UnicodeRange],
3286        available: &[UnicodeRange],
3287    ) -> i32 {
3288        if requested.is_empty() {
3289            // No specific requirements, return total coverage
3290            return Self::calculate_unicode_coverage(available) as i32;
3291        }
3292        
3293        let mut total_coverage = 0u32;
3294        
3295        for req_range in requested {
3296            for avail_range in available {
3297                // Calculate overlap between requested and available ranges
3298                let overlap_start = req_range.start.max(avail_range.start);
3299                let overlap_end = req_range.end.min(avail_range.end);
3300                
3301                if overlap_start <= overlap_end {
3302                    // There is overlap
3303                    let overlap_size = overlap_end - overlap_start + 1;
3304                    total_coverage += overlap_size;
3305                }
3306            }
3307        }
3308        
3309        total_coverage as i32
3310    }
3311
3312    pub fn calculate_style_score(original: &FcPattern, candidate: &FcPattern) -> i32 {
3313
3314        let mut score = 0_i32;
3315
3316        // Weight calculation with special handling for bold property
3317        if (original.bold == PatternMatch::True && candidate.weight == FcWeight::Bold)
3318            || (original.bold == PatternMatch::False && candidate.weight != FcWeight::Bold)
3319        {
3320            // No weight penalty when bold is requested and font has Bold weight
3321            // No weight penalty when non-bold is requested and font has non-Bold weight
3322        } else {
3323            // Apply normal weight difference penalty
3324            let weight_diff = (original.weight as i32 - candidate.weight as i32).abs();
3325            score += weight_diff as i32;
3326        }
3327
3328        // Exact weight match bonus: reward fonts whose weight matches the request exactly,
3329        // with an extra bonus when both are Normal (the most common case for body text)
3330        if original.weight == candidate.weight {
3331            score -= 15;
3332            if original.weight == FcWeight::Normal {
3333                score -= 10; // Extra bonus for Normal-Normal match
3334            }
3335        }
3336
3337        // Stretch calculation with special handling for condensed property
3338        if (original.condensed == PatternMatch::True && candidate.stretch.is_condensed())
3339            || (original.condensed == PatternMatch::False && !candidate.stretch.is_condensed())
3340        {
3341            // No stretch penalty when condensed is requested and font has condensed stretch
3342            // No stretch penalty when non-condensed is requested and font has non-condensed stretch
3343        } else {
3344            // Apply normal stretch difference penalty
3345            let stretch_diff = (original.stretch as i32 - candidate.stretch as i32).abs();
3346            score += (stretch_diff * 100) as i32;
3347        }
3348
3349        // Handle style properties with standard penalties and bonuses
3350        let style_props = [
3351            (original.italic, candidate.italic, 300, 150),
3352            (original.oblique, candidate.oblique, 200, 100),
3353            (original.bold, candidate.bold, 300, 150),
3354            (original.monospace, candidate.monospace, 100, 50),
3355            (original.condensed, candidate.condensed, 100, 50),
3356        ];
3357
3358        for (orig, cand, mismatch_penalty, dontcare_penalty) in style_props {
3359            if orig.needs_to_match() {
3360                if orig == PatternMatch::False && cand == PatternMatch::DontCare {
3361                    // Requesting non-italic but font doesn't declare: small penalty
3362                    // (less than a full mismatch but more than a perfect match)
3363                    score += dontcare_penalty / 2;
3364                } else if !orig.matches(&cand) {
3365                    if cand == PatternMatch::DontCare {
3366                        score += dontcare_penalty;
3367                    } else {
3368                        score += mismatch_penalty;
3369                    }
3370                } else if orig == PatternMatch::True && cand == PatternMatch::True {
3371                    // Give bonus for exact True match
3372                    score -= 20;
3373                } else if orig == PatternMatch::False && cand == PatternMatch::False {
3374                    // Give bonus for exact False match (prefer explicitly non-italic
3375                    // over fonts with unknown/DontCare italic status)
3376                    score -= 20;
3377                }
3378            } else {
3379                // orig == DontCare: prefer "normal" fonts over styled ones.
3380                // When the caller doesn't specify italic/bold/etc., a font
3381                // that IS italic/bold should score slightly worse than one
3382                // that isn't, so Regular is chosen over Italic by default.
3383                if cand == PatternMatch::True {
3384                    score += dontcare_penalty / 3;
3385                }
3386            }
3387        }
3388
3389        // ── Name-based "base font" detection ──
3390        // The shorter the font name relative to its family, the more "basic" the
3391        // variant.  E.g. "System Font" (the base) should score better than
3392        // "System Font Regular Italic" (a variant) when the user hasn't
3393        // explicitly requested italic.
3394        if let (Some(name), Some(family)) = (&candidate.name, &candidate.family) {
3395            let name_lower = name.to_ascii_lowercase();
3396            let family_lower = family.to_ascii_lowercase();
3397
3398            // Strip the family prefix from the name to get the "extra" part
3399            let extra = if name_lower.starts_with(&family_lower) {
3400                name_lower[family_lower.len()..].to_string()
3401            } else {
3402                String::new()
3403            };
3404
3405            // Strip common neutral descriptors that don't indicate a style variant
3406            let stripped = extra
3407                .replace("regular", "")
3408                .replace("normal", "")
3409                .replace("book", "")
3410                .replace("roman", "");
3411            let stripped = stripped.trim();
3412
3413            if stripped.is_empty() {
3414                // This is a "base font" – name is just the family (± "Regular")
3415                score -= 50;
3416            } else {
3417                // Name has extra style descriptors – add a penalty per extra word
3418                let extra_words = stripped.split_whitespace().count();
3419                score += (extra_words as i32) * 25;
3420            }
3421        }
3422
3423        // ── Subfamily "Regular" bonus ──
3424        // Fonts whose OpenType subfamily is exactly "Regular" are the canonical
3425        // base variant and should be strongly preferred.
3426        if let Some(ref subfamily) = candidate.metadata.font_subfamily {
3427            let sf_lower = subfamily.to_ascii_lowercase();
3428            if sf_lower == "regular" {
3429                score -= 30;
3430            }
3431        }
3432
3433        score
3434    }
3435}
3436
3437#[cfg(all(feature = "std", feature = "parsing", target_os = "linux"))]
3438fn FcScanDirectories() -> Option<(Vec<(FcPattern, FcFontPath)>, BTreeMap<String, FcFontRenderConfig>)> {
3439    use std::fs;
3440    use std::path::Path;
3441
3442    const BASE_FONTCONFIG_PATH: &str = "/etc/fonts/fonts.conf";
3443
3444    if !Path::new(BASE_FONTCONFIG_PATH).exists() {
3445        return None;
3446    }
3447
3448    let mut font_paths = Vec::with_capacity(32);
3449    let mut paths_to_visit = vec![(None, PathBuf::from(BASE_FONTCONFIG_PATH))];
3450    let mut render_configs: BTreeMap<String, FcFontRenderConfig> = BTreeMap::new();
3451
3452    while let Some((prefix, path_to_visit)) = paths_to_visit.pop() {
3453        let path = match process_path(&prefix, path_to_visit, true) {
3454            Some(path) => path,
3455            None => continue,
3456        };
3457
3458        let metadata = match fs::metadata(&path) {
3459            Ok(metadata) => metadata,
3460            Err(_) => continue,
3461        };
3462
3463        if metadata.is_file() {
3464            let xml_utf8 = match fs::read_to_string(&path) {
3465                Ok(xml_utf8) => xml_utf8,
3466                Err(_) => continue,
3467            };
3468
3469            if ParseFontsConf(&xml_utf8, &mut paths_to_visit, &mut font_paths).is_none() {
3470                continue;
3471            }
3472
3473            // Also parse render config blocks from this file
3474            ParseFontsConfRenderConfig(&xml_utf8, &mut render_configs);
3475        } else if metadata.is_dir() {
3476            let dir_entries = match fs::read_dir(&path) {
3477                Ok(dir_entries) => dir_entries,
3478                Err(_) => continue,
3479            };
3480
3481            for entry_result in dir_entries {
3482                let entry = match entry_result {
3483                    Ok(entry) => entry,
3484                    Err(_) => continue,
3485                };
3486
3487                let entry_path = entry.path();
3488
3489                // `fs::metadata` traverses symbolic links
3490                let entry_metadata = match fs::metadata(&entry_path) {
3491                    Ok(metadata) => metadata,
3492                    Err(_) => continue,
3493                };
3494
3495                if !entry_metadata.is_file() {
3496                    continue;
3497                }
3498
3499                let file_name = match entry_path.file_name() {
3500                    Some(name) => name,
3501                    None => continue,
3502                };
3503
3504                let file_name_str = file_name.to_string_lossy();
3505                if file_name_str.starts_with(|c: char| c.is_ascii_digit())
3506                    && file_name_str.ends_with(".conf")
3507                {
3508                    paths_to_visit.push((None, entry_path));
3509                }
3510            }
3511        }
3512    }
3513
3514    if font_paths.is_empty() {
3515        return None;
3516    }
3517
3518    Some((FcScanDirectoriesInner(&font_paths), render_configs))
3519}
3520
3521// Parses the fonts.conf file
3522#[cfg(all(feature = "std", feature = "parsing", target_os = "linux"))]
3523fn ParseFontsConf(
3524    input: &str,
3525    paths_to_visit: &mut Vec<(Option<String>, PathBuf)>,
3526    font_paths: &mut Vec<(Option<String>, String)>,
3527) -> Option<()> {
3528    use xmlparser::Token::*;
3529    use xmlparser::Tokenizer;
3530
3531    const TAG_INCLUDE: &str = "include";
3532    const TAG_DIR: &str = "dir";
3533    const ATTRIBUTE_PREFIX: &str = "prefix";
3534
3535    let mut current_prefix: Option<&str> = None;
3536    let mut current_path: Option<&str> = None;
3537    let mut is_in_include = false;
3538    let mut is_in_dir = false;
3539
3540    for token_result in Tokenizer::from(input) {
3541        let token = match token_result {
3542            Ok(token) => token,
3543            Err(_) => return None,
3544        };
3545
3546        match token {
3547            ElementStart { local, .. } => {
3548                if is_in_include || is_in_dir {
3549                    return None; /* error: nested tags */
3550                }
3551
3552                match local.as_str() {
3553                    TAG_INCLUDE => {
3554                        is_in_include = true;
3555                    }
3556                    TAG_DIR => {
3557                        is_in_dir = true;
3558                    }
3559                    _ => continue,
3560                }
3561
3562                current_path = None;
3563            }
3564            Text { text, .. } => {
3565                let text = text.as_str().trim();
3566                if text.is_empty() {
3567                    continue;
3568                }
3569                if is_in_include || is_in_dir {
3570                    current_path = Some(text);
3571                }
3572            }
3573            Attribute { local, value, .. } => {
3574                if !is_in_include && !is_in_dir {
3575                    continue;
3576                }
3577                // attribute on <include> or <dir> node
3578                if local.as_str() == ATTRIBUTE_PREFIX {
3579                    current_prefix = Some(value.as_str());
3580                }
3581            }
3582            ElementEnd { end, .. } => {
3583                let end_tag = match end {
3584                    xmlparser::ElementEnd::Close(_, a) => a,
3585                    _ => continue,
3586                };
3587
3588                match end_tag.as_str() {
3589                    TAG_INCLUDE => {
3590                        if !is_in_include {
3591                            continue;
3592                        }
3593
3594                        if let Some(current_path) = current_path.as_ref() {
3595                            paths_to_visit.push((
3596                                current_prefix.map(ToOwned::to_owned),
3597                                PathBuf::from(*current_path),
3598                            ));
3599                        }
3600                    }
3601                    TAG_DIR => {
3602                        if !is_in_dir {
3603                            continue;
3604                        }
3605
3606                        if let Some(current_path) = current_path.as_ref() {
3607                            font_paths.push((
3608                                current_prefix.map(ToOwned::to_owned),
3609                                (*current_path).to_owned(),
3610                            ));
3611                        }
3612                    }
3613                    _ => continue,
3614                }
3615
3616                is_in_include = false;
3617                is_in_dir = false;
3618                current_path = None;
3619                current_prefix = None;
3620            }
3621            _ => {}
3622        }
3623    }
3624
3625    Some(())
3626}
3627
3628/// Parses `<match target="font">` blocks from fonts.conf XML and returns
3629/// a map from family name to per-font rendering configuration.
3630///
3631/// Example fonts.conf snippet that this handles:
3632/// ```xml
3633/// <match target="font">
3634///   <test name="family"><string>Inconsolata</string></test>
3635///   <edit name="antialias" mode="assign"><bool>true</bool></edit>
3636///   <edit name="hintstyle" mode="assign"><const>hintslight</const></edit>
3637/// </match>
3638/// ```
3639#[cfg(all(feature = "std", feature = "parsing", target_os = "linux"))]
3640fn ParseFontsConfRenderConfig(
3641    input: &str,
3642    configs: &mut BTreeMap<String, FcFontRenderConfig>,
3643) {
3644    use xmlparser::Token::*;
3645    use xmlparser::Tokenizer;
3646
3647    // Parser state machine
3648    #[derive(Clone, Copy, PartialEq)]
3649    enum State {
3650        /// Outside any relevant block
3651        Idle,
3652        /// Inside <match target="font">
3653        InMatchFont,
3654        /// Inside <test name="family"> within a match block
3655        InTestFamily,
3656        /// Inside <edit name="..."> within a match block
3657        InEdit,
3658        /// Inside a value element (<bool>, <double>, <const>, <string>) within <edit> or <test>
3659        InValue,
3660    }
3661
3662    let mut state = State::Idle;
3663    let mut match_is_font_target = false;
3664    let mut current_family: Option<String> = None;
3665    let mut current_edit_name: Option<String> = None;
3666    let mut current_value: Option<String> = None;
3667    let mut value_tag: Option<String> = None;
3668    let mut config = FcFontRenderConfig::default();
3669    let mut in_test = false;
3670    let mut test_name: Option<String> = None;
3671
3672    for token_result in Tokenizer::from(input) {
3673        let token = match token_result {
3674            Ok(token) => token,
3675            Err(_) => continue,
3676        };
3677
3678        match token {
3679            ElementStart { local, .. } => {
3680                let tag = local.as_str();
3681                match tag {
3682                    "match" => {
3683                        // Reset state for a new match block
3684                        match_is_font_target = false;
3685                        current_family = None;
3686                        config = FcFontRenderConfig::default();
3687                    }
3688                    "test" if state == State::InMatchFont => {
3689                        in_test = true;
3690                        test_name = None;
3691                    }
3692                    "edit" if state == State::InMatchFont => {
3693                        current_edit_name = None;
3694                    }
3695                    "bool" | "double" | "const" | "string" | "int" => {
3696                        if state == State::InTestFamily || state == State::InEdit {
3697                            value_tag = Some(tag.to_owned());
3698                            current_value = None;
3699                        }
3700                    }
3701                    _ => {}
3702                }
3703            }
3704            Attribute { local, value, .. } => {
3705                let attr_name = local.as_str();
3706                let attr_value = value.as_str();
3707
3708                match attr_name {
3709                    "target" => {
3710                        if attr_value == "font" {
3711                            match_is_font_target = true;
3712                        }
3713                    }
3714                    "name" => {
3715                        if in_test && state == State::InMatchFont {
3716                            test_name = Some(attr_value.to_owned());
3717                        } else if state == State::InMatchFont {
3718                            current_edit_name = Some(attr_value.to_owned());
3719                        }
3720                    }
3721                    _ => {}
3722                }
3723            }
3724            Text { text, .. } => {
3725                let text = text.as_str().trim();
3726                if !text.is_empty() && (state == State::InTestFamily || state == State::InEdit) {
3727                    current_value = Some(text.to_owned());
3728                }
3729            }
3730            ElementEnd { end, .. } => {
3731                match end {
3732                    xmlparser::ElementEnd::Open => {
3733                        // Tag just opened (after attributes processed)
3734                        if match_is_font_target && state == State::Idle {
3735                            state = State::InMatchFont;
3736                            match_is_font_target = false;
3737                        } else if in_test {
3738                            if test_name.as_deref() == Some("family") {
3739                                state = State::InTestFamily;
3740                            }
3741                            in_test = false;
3742                        } else if current_edit_name.is_some() && state == State::InMatchFont {
3743                            state = State::InEdit;
3744                        }
3745                    }
3746                    xmlparser::ElementEnd::Close(_, local) => {
3747                        let tag = local.as_str();
3748                        match tag {
3749                            "match" => {
3750                                // End of match block: store config if we have a family
3751                                if let Some(family) = current_family.take() {
3752                                    let empty = FcFontRenderConfig::default();
3753                                    if config != empty {
3754                                        configs.insert(family, config.clone());
3755                                    }
3756                                }
3757                                state = State::Idle;
3758                                config = FcFontRenderConfig::default();
3759                            }
3760                            "test" => {
3761                                if state == State::InTestFamily {
3762                                    // Extract the family name from the value we collected
3763                                    if let Some(ref val) = current_value {
3764                                        current_family = Some(val.clone());
3765                                    }
3766                                    state = State::InMatchFont;
3767                                }
3768                                current_value = None;
3769                                value_tag = None;
3770                            }
3771                            "edit" => {
3772                                if state == State::InEdit {
3773                                    // Apply the collected value to the config
3774                                    if let (Some(ref name), Some(ref val)) = (&current_edit_name, &current_value) {
3775                                        apply_edit_value(&mut config, name, val, value_tag.as_deref());
3776                                    }
3777                                    state = State::InMatchFont;
3778                                }
3779                                current_edit_name = None;
3780                                current_value = None;
3781                                value_tag = None;
3782                            }
3783                            "bool" | "double" | "const" | "string" | "int" => {
3784                                // value_tag and current_value already set by Text handler
3785                            }
3786                            _ => {}
3787                        }
3788                    }
3789                    xmlparser::ElementEnd::Empty => {
3790                        // Self-closing tags: nothing to do
3791                    }
3792                }
3793            }
3794            _ => {}
3795        }
3796    }
3797}
3798
3799/// Apply a parsed edit value to the render config.
3800#[cfg(all(feature = "std", feature = "parsing", target_os = "linux"))]
3801fn apply_edit_value(
3802    config: &mut FcFontRenderConfig,
3803    edit_name: &str,
3804    value: &str,
3805    value_tag: Option<&str>,
3806) {
3807    match edit_name {
3808        "antialias" => {
3809            config.antialias = parse_bool_value(value);
3810        }
3811        "hinting" => {
3812            config.hinting = parse_bool_value(value);
3813        }
3814        "autohint" => {
3815            config.autohint = parse_bool_value(value);
3816        }
3817        "embeddedbitmap" => {
3818            config.embeddedbitmap = parse_bool_value(value);
3819        }
3820        "embolden" => {
3821            config.embolden = parse_bool_value(value);
3822        }
3823        "minspace" => {
3824            config.minspace = parse_bool_value(value);
3825        }
3826        "hintstyle" => {
3827            config.hintstyle = parse_hintstyle_const(value);
3828        }
3829        "rgba" => {
3830            config.rgba = parse_rgba_const(value);
3831        }
3832        "lcdfilter" => {
3833            config.lcdfilter = parse_lcdfilter_const(value);
3834        }
3835        "dpi" => {
3836            if let Ok(v) = value.parse::<f64>() {
3837                config.dpi = Some(v);
3838            }
3839        }
3840        "scale" => {
3841            if let Ok(v) = value.parse::<f64>() {
3842                config.scale = Some(v);
3843            }
3844        }
3845        _ => {
3846            // Unknown edit property, ignore
3847        }
3848    }
3849}
3850
3851#[cfg(all(feature = "std", feature = "parsing", target_os = "linux"))]
3852fn parse_bool_value(value: &str) -> Option<bool> {
3853    match value {
3854        "true" => Some(true),
3855        "false" => Some(false),
3856        _ => None,
3857    }
3858}
3859
3860#[cfg(all(feature = "std", feature = "parsing", target_os = "linux"))]
3861fn parse_hintstyle_const(value: &str) -> Option<FcHintStyle> {
3862    match value {
3863        "hintnone" => Some(FcHintStyle::None),
3864        "hintslight" => Some(FcHintStyle::Slight),
3865        "hintmedium" => Some(FcHintStyle::Medium),
3866        "hintfull" => Some(FcHintStyle::Full),
3867        _ => None,
3868    }
3869}
3870
3871#[cfg(all(feature = "std", feature = "parsing", target_os = "linux"))]
3872fn parse_rgba_const(value: &str) -> Option<FcRgba> {
3873    match value {
3874        "unknown" => Some(FcRgba::Unknown),
3875        "rgb" => Some(FcRgba::Rgb),
3876        "bgr" => Some(FcRgba::Bgr),
3877        "vrgb" => Some(FcRgba::Vrgb),
3878        "vbgr" => Some(FcRgba::Vbgr),
3879        "none" => Some(FcRgba::None),
3880        _ => None,
3881    }
3882}
3883
3884#[cfg(all(feature = "std", feature = "parsing", target_os = "linux"))]
3885fn parse_lcdfilter_const(value: &str) -> Option<FcLcdFilter> {
3886    match value {
3887        "lcdnone" => Some(FcLcdFilter::None),
3888        "lcddefault" => Some(FcLcdFilter::Default),
3889        "lcdlight" => Some(FcLcdFilter::Light),
3890        "lcdlegacy" => Some(FcLcdFilter::Legacy),
3891        _ => None,
3892    }
3893}
3894
3895// Unicode range bit positions to actual ranges (full table from OpenType spec).
3896// Based on: https://learn.microsoft.com/en-us/typography/opentype/spec/os2#ur
3897#[cfg(all(feature = "std", feature = "parsing"))]
3898const UNICODE_RANGE_MAPPINGS: &[(usize, u32, u32)] = &[
3899    // ulUnicodeRange1 (bits 0-31)
3900    (0, 0x0000, 0x007F), // Basic Latin
3901    (1, 0x0080, 0x00FF), // Latin-1 Supplement
3902    (2, 0x0100, 0x017F), // Latin Extended-A
3903    (3, 0x0180, 0x024F), // Latin Extended-B
3904    (4, 0x0250, 0x02AF), // IPA Extensions
3905    (5, 0x02B0, 0x02FF), // Spacing Modifier Letters
3906    (6, 0x0300, 0x036F), // Combining Diacritical Marks
3907    (7, 0x0370, 0x03FF), // Greek and Coptic
3908    (8, 0x2C80, 0x2CFF), // Coptic
3909    (9, 0x0400, 0x04FF), // Cyrillic
3910    (10, 0x0530, 0x058F), // Armenian
3911    (11, 0x0590, 0x05FF), // Hebrew
3912    (12, 0x0600, 0x06FF), // Arabic
3913    (13, 0x0700, 0x074F), // Syriac
3914    (14, 0x0780, 0x07BF), // Thaana
3915    (15, 0x0900, 0x097F), // Devanagari
3916    (16, 0x0980, 0x09FF), // Bengali
3917    (17, 0x0A00, 0x0A7F), // Gurmukhi
3918    (18, 0x0A80, 0x0AFF), // Gujarati
3919    (19, 0x0B00, 0x0B7F), // Oriya
3920    (20, 0x0B80, 0x0BFF), // Tamil
3921    (21, 0x0C00, 0x0C7F), // Telugu
3922    (22, 0x0C80, 0x0CFF), // Kannada
3923    (23, 0x0D00, 0x0D7F), // Malayalam
3924    (24, 0x0E00, 0x0E7F), // Thai
3925    (25, 0x0E80, 0x0EFF), // Lao
3926    (26, 0x10A0, 0x10FF), // Georgian
3927    (27, 0x1B00, 0x1B7F), // Balinese
3928    (28, 0x1100, 0x11FF), // Hangul Jamo
3929    (29, 0x1E00, 0x1EFF), // Latin Extended Additional
3930    (30, 0x1F00, 0x1FFF), // Greek Extended
3931    (31, 0x2000, 0x206F), // General Punctuation
3932    // ulUnicodeRange2 (bits 32-63)
3933    (32, 0x2070, 0x209F), // Superscripts And Subscripts
3934    (33, 0x20A0, 0x20CF), // Currency Symbols
3935    (34, 0x20D0, 0x20FF), // Combining Diacritical Marks For Symbols
3936    (35, 0x2100, 0x214F), // Letterlike Symbols
3937    (36, 0x2150, 0x218F), // Number Forms
3938    (37, 0x2190, 0x21FF), // Arrows
3939    (38, 0x2200, 0x22FF), // Mathematical Operators
3940    (39, 0x2300, 0x23FF), // Miscellaneous Technical
3941    (40, 0x2400, 0x243F), // Control Pictures
3942    (41, 0x2440, 0x245F), // Optical Character Recognition
3943    (42, 0x2460, 0x24FF), // Enclosed Alphanumerics
3944    (43, 0x2500, 0x257F), // Box Drawing
3945    (44, 0x2580, 0x259F), // Block Elements
3946    (45, 0x25A0, 0x25FF), // Geometric Shapes
3947    (46, 0x2600, 0x26FF), // Miscellaneous Symbols
3948    (47, 0x2700, 0x27BF), // Dingbats
3949    (48, 0x3000, 0x303F), // CJK Symbols And Punctuation
3950    (49, 0x3040, 0x309F), // Hiragana
3951    (50, 0x30A0, 0x30FF), // Katakana
3952    (51, 0x3100, 0x312F), // Bopomofo
3953    (52, 0x3130, 0x318F), // Hangul Compatibility Jamo
3954    (53, 0x3190, 0x319F), // Kanbun
3955    (54, 0x31A0, 0x31BF), // Bopomofo Extended
3956    (55, 0x31C0, 0x31EF), // CJK Strokes
3957    (56, 0x31F0, 0x31FF), // Katakana Phonetic Extensions
3958    (57, 0x3200, 0x32FF), // Enclosed CJK Letters And Months
3959    (58, 0x3300, 0x33FF), // CJK Compatibility
3960    (59, 0x4E00, 0x9FFF), // CJK Unified Ideographs
3961    (60, 0xA000, 0xA48F), // Yi Syllables
3962    (61, 0xA490, 0xA4CF), // Yi Radicals
3963    (62, 0xAC00, 0xD7AF), // Hangul Syllables
3964    (63, 0xD800, 0xDFFF), // Non-Plane 0 (note: surrogates, not directly usable)
3965    // ulUnicodeRange3 (bits 64-95)
3966    (64, 0x10000, 0x10FFFF), // Phoenician and other non-BMP (bit 64 indicates non-BMP support)
3967    (65, 0xF900, 0xFAFF), // CJK Compatibility Ideographs
3968    (66, 0xFB00, 0xFB4F), // Alphabetic Presentation Forms
3969    (67, 0xFB50, 0xFDFF), // Arabic Presentation Forms-A
3970    (68, 0xFE00, 0xFE0F), // Variation Selectors
3971    (69, 0xFE10, 0xFE1F), // Vertical Forms
3972    (70, 0xFE20, 0xFE2F), // Combining Half Marks
3973    (71, 0xFE30, 0xFE4F), // CJK Compatibility Forms
3974    (72, 0xFE50, 0xFE6F), // Small Form Variants
3975    (73, 0xFE70, 0xFEFF), // Arabic Presentation Forms-B
3976    (74, 0xFF00, 0xFFEF), // Halfwidth And Fullwidth Forms
3977    (75, 0xFFF0, 0xFFFF), // Specials
3978    (76, 0x0F00, 0x0FFF), // Tibetan
3979    (77, 0x0700, 0x074F), // Syriac
3980    (78, 0x0780, 0x07BF), // Thaana
3981    (79, 0x0D80, 0x0DFF), // Sinhala
3982    (80, 0x1000, 0x109F), // Myanmar
3983    (81, 0x1200, 0x137F), // Ethiopic
3984    (82, 0x13A0, 0x13FF), // Cherokee
3985    (83, 0x1400, 0x167F), // Unified Canadian Aboriginal Syllabics
3986    (84, 0x1680, 0x169F), // Ogham
3987    (85, 0x16A0, 0x16FF), // Runic
3988    (86, 0x1780, 0x17FF), // Khmer
3989    (87, 0x1800, 0x18AF), // Mongolian
3990    (88, 0x2800, 0x28FF), // Braille Patterns
3991    (89, 0xA000, 0xA48F), // Yi Syllables
3992    (90, 0x1680, 0x169F), // Ogham
3993    (91, 0x16A0, 0x16FF), // Runic
3994    (92, 0x1700, 0x171F), // Tagalog
3995    (93, 0x1720, 0x173F), // Hanunoo
3996    (94, 0x1740, 0x175F), // Buhid
3997    (95, 0x1760, 0x177F), // Tagbanwa
3998    // ulUnicodeRange4 (bits 96-127)
3999    (96, 0x1900, 0x194F), // Limbu
4000    (97, 0x1950, 0x197F), // Tai Le
4001    (98, 0x1980, 0x19DF), // New Tai Lue
4002    (99, 0x1A00, 0x1A1F), // Buginese
4003    (100, 0x2C00, 0x2C5F), // Glagolitic
4004    (101, 0x2D30, 0x2D7F), // Tifinagh
4005    (102, 0x4DC0, 0x4DFF), // Yijing Hexagram Symbols
4006    (103, 0xA800, 0xA82F), // Syloti Nagri
4007    (104, 0x10000, 0x1007F), // Linear B Syllabary
4008    (105, 0x10080, 0x100FF), // Linear B Ideograms
4009    (106, 0x10100, 0x1013F), // Aegean Numbers
4010    (107, 0x10140, 0x1018F), // Ancient Greek Numbers
4011    (108, 0x10300, 0x1032F), // Old Italic
4012    (109, 0x10330, 0x1034F), // Gothic
4013    (110, 0x10380, 0x1039F), // Ugaritic
4014    (111, 0x103A0, 0x103DF), // Old Persian
4015    (112, 0x10400, 0x1044F), // Deseret
4016    (113, 0x10450, 0x1047F), // Shavian
4017    (114, 0x10480, 0x104AF), // Osmanya
4018    (115, 0x10800, 0x1083F), // Cypriot Syllabary
4019    (116, 0x10A00, 0x10A5F), // Kharoshthi
4020    (117, 0x1D000, 0x1D0FF), // Byzantine Musical Symbols
4021    (118, 0x1D100, 0x1D1FF), // Musical Symbols
4022    (119, 0x1D200, 0x1D24F), // Ancient Greek Musical Notation
4023    (120, 0x1D300, 0x1D35F), // Tai Xuan Jing Symbols
4024    (121, 0x1D400, 0x1D7FF), // Mathematical Alphanumeric Symbols
4025    (122, 0x1F000, 0x1F02F), // Mahjong Tiles
4026    (123, 0x1F030, 0x1F09F), // Domino Tiles
4027    (124, 0x1F300, 0x1F9FF), // Miscellaneous Symbols And Pictographs (Emoji)
4028    (125, 0x1F680, 0x1F6FF), // Transport And Map Symbols
4029    (126, 0x1F700, 0x1F77F), // Alchemical Symbols
4030    (127, 0x1F900, 0x1F9FF), // Supplemental Symbols and Pictographs
4031];
4032
4033/// Intermediate parsed data from a single font face within a font file.
4034/// Used to share parsing logic between `FcParseFont` and `FcParseFontBytesInner`.
4035#[cfg(all(feature = "std", feature = "parsing"))]
4036struct ParsedFontFace {
4037    pattern: FcPattern,
4038    font_index: usize,
4039}
4040
4041/// Parse all font table data from a single font face and return the extracted patterns.
4042///
4043/// This is the shared core of `FcParseFont` and `FcParseFontBytesInner`:
4044/// TTC detection, font table parsing, OS/2/head/post reading, unicode range extraction,
4045/// CMAP verification, monospace detection, metadata extraction, and pattern creation.
4046#[cfg(all(feature = "std", feature = "parsing"))]
4047fn parse_font_faces(font_bytes: &[u8]) -> Option<Vec<ParsedFontFace>> {
4048    use allsorts::{
4049        binary::read::ReadScope,
4050        font_data::FontData,
4051        get_name::fontcode_get_name,
4052        post::PostTable,
4053        tables::{
4054            os2::Os2, HeadTable, NameTable,
4055        },
4056        tag,
4057    };
4058    use std::collections::BTreeSet;
4059
4060    const FONT_SPECIFIER_NAME_ID: u16 = 4;
4061    const FONT_SPECIFIER_FAMILY_ID: u16 = 1;
4062
4063    let max_fonts = if font_bytes.len() >= 12 && &font_bytes[0..4] == b"ttcf" {
4064        // Read numFonts from TTC header (offset 8, 4 bytes)
4065        let num_fonts =
4066            u32::from_be_bytes([font_bytes[8], font_bytes[9], font_bytes[10], font_bytes[11]]);
4067        // Cap at a reasonable maximum as a safety measure
4068        std::cmp::min(num_fonts as usize, 100)
4069    } else {
4070        // Not a collection, just one font
4071        1
4072    };
4073
4074    let scope = ReadScope::new(font_bytes);
4075    let font_file = scope.read::<FontData<'_>>().ok()?;
4076
4077    // Handle collections properly by iterating through all fonts
4078    let mut results = Vec::new();
4079
4080    for font_index in 0..max_fonts {
4081        let provider = font_file.table_provider(font_index).ok()?;
4082        let head_data = provider.table_data(tag::HEAD).ok()??.into_owned();
4083        let head_table = ReadScope::new(&head_data).read::<HeadTable>().ok()?;
4084
4085        let is_bold = head_table.is_bold();
4086        let is_italic = head_table.is_italic();
4087        let mut detected_monospace = None;
4088
4089        let post_data = provider.table_data(tag::POST).ok()??;
4090        if let Ok(post_table) = ReadScope::new(&post_data).read::<PostTable>() {
4091            // isFixedPitch here - https://learn.microsoft.com/en-us/typography/opentype/spec/post#header
4092            detected_monospace = Some(post_table.header.is_fixed_pitch != 0);
4093        }
4094
4095        // Get font properties from OS/2 table
4096        let os2_data = provider.table_data(tag::OS_2).ok()??;
4097        let os2_table = ReadScope::new(&os2_data)
4098            .read_dep::<Os2>(os2_data.len())
4099            .ok()?;
4100
4101        // Extract additional style information
4102        let is_oblique = os2_table
4103            .fs_selection
4104            .contains(allsorts::tables::os2::FsSelection::OBLIQUE);
4105        let weight = FcWeight::from_u16(os2_table.us_weight_class);
4106        let stretch = FcStretch::from_u16(os2_table.us_width_class);
4107
4108        // Extract unicode ranges from OS/2 table (fast, but may be inaccurate)
4109        // These are hints about what the font *should* support
4110        // For actual glyph coverage verification, query the font file directly
4111        let mut unicode_ranges = Vec::new();
4112
4113        // Process the 4 Unicode range bitfields from OS/2 table
4114        let os2_ranges = [
4115            os2_table.ul_unicode_range1,
4116            os2_table.ul_unicode_range2,
4117            os2_table.ul_unicode_range3,
4118            os2_table.ul_unicode_range4,
4119        ];
4120
4121        for &(bit, start, end) in UNICODE_RANGE_MAPPINGS {
4122            let range_idx = bit / 32;
4123            let bit_pos = bit % 32;
4124            if range_idx < 4 && (os2_ranges[range_idx] & (1 << bit_pos)) != 0 {
4125                unicode_ranges.push(UnicodeRange { start, end });
4126            }
4127        }
4128
4129        // Verify OS/2 reported ranges against actual CMAP support
4130        // OS/2 ulUnicodeRange bits can be unreliable - fonts may claim support
4131        // for ranges they don't actually have glyphs for
4132        unicode_ranges = verify_unicode_ranges_with_cmap(&provider, unicode_ranges);
4133
4134        // If still empty (OS/2 had no ranges or all were invalid), do full CMAP analysis
4135        if unicode_ranges.is_empty() {
4136            if let Some(cmap_ranges) = analyze_cmap_coverage(&provider) {
4137                unicode_ranges = cmap_ranges;
4138            }
4139        }
4140
4141        // Use the shared detect_monospace helper for PANOSE + hmtx fallback
4142        let is_monospace = detect_monospace(&provider, &os2_table, detected_monospace)
4143            .unwrap_or(false);
4144
4145        let name_data = provider.table_data(tag::NAME).ok()??.into_owned();
4146        let name_table = ReadScope::new(&name_data).read::<NameTable>().ok()?;
4147
4148        // Extract metadata from name table
4149        let mut metadata = FcFontMetadata::default();
4150
4151        const NAME_ID_COPYRIGHT: u16 = 0;
4152        const NAME_ID_FAMILY: u16 = 1;
4153        const NAME_ID_SUBFAMILY: u16 = 2;
4154        const NAME_ID_UNIQUE_ID: u16 = 3;
4155        const NAME_ID_FULL_NAME: u16 = 4;
4156        const NAME_ID_VERSION: u16 = 5;
4157        const NAME_ID_POSTSCRIPT_NAME: u16 = 6;
4158        const NAME_ID_TRADEMARK: u16 = 7;
4159        const NAME_ID_MANUFACTURER: u16 = 8;
4160        const NAME_ID_DESIGNER: u16 = 9;
4161        const NAME_ID_DESCRIPTION: u16 = 10;
4162        const NAME_ID_VENDOR_URL: u16 = 11;
4163        const NAME_ID_DESIGNER_URL: u16 = 12;
4164        const NAME_ID_LICENSE: u16 = 13;
4165        const NAME_ID_LICENSE_URL: u16 = 14;
4166        const NAME_ID_PREFERRED_FAMILY: u16 = 16;
4167        const NAME_ID_PREFERRED_SUBFAMILY: u16 = 17;
4168
4169        metadata.copyright = get_name_string(&name_data, NAME_ID_COPYRIGHT);
4170        metadata.font_family = get_name_string(&name_data, NAME_ID_FAMILY);
4171        metadata.font_subfamily = get_name_string(&name_data, NAME_ID_SUBFAMILY);
4172        metadata.full_name = get_name_string(&name_data, NAME_ID_FULL_NAME);
4173        metadata.unique_id = get_name_string(&name_data, NAME_ID_UNIQUE_ID);
4174        metadata.version = get_name_string(&name_data, NAME_ID_VERSION);
4175        metadata.postscript_name = get_name_string(&name_data, NAME_ID_POSTSCRIPT_NAME);
4176        metadata.trademark = get_name_string(&name_data, NAME_ID_TRADEMARK);
4177        metadata.manufacturer = get_name_string(&name_data, NAME_ID_MANUFACTURER);
4178        metadata.designer = get_name_string(&name_data, NAME_ID_DESIGNER);
4179        metadata.id_description = get_name_string(&name_data, NAME_ID_DESCRIPTION);
4180        metadata.designer_url = get_name_string(&name_data, NAME_ID_DESIGNER_URL);
4181        metadata.manufacturer_url = get_name_string(&name_data, NAME_ID_VENDOR_URL);
4182        metadata.license = get_name_string(&name_data, NAME_ID_LICENSE);
4183        metadata.license_url = get_name_string(&name_data, NAME_ID_LICENSE_URL);
4184        metadata.preferred_family = get_name_string(&name_data, NAME_ID_PREFERRED_FAMILY);
4185        metadata.preferred_subfamily = get_name_string(&name_data, NAME_ID_PREFERRED_SUBFAMILY);
4186
4187        // One font can support multiple patterns
4188        let mut f_family = None;
4189
4190        let patterns = name_table
4191            .name_records
4192            .iter()
4193            .filter_map(|name_record| {
4194                let name_id = name_record.name_id;
4195                if name_id == FONT_SPECIFIER_FAMILY_ID {
4196                    if let Ok(Some(family)) =
4197                        fontcode_get_name(&name_data, FONT_SPECIFIER_FAMILY_ID)
4198                    {
4199                        f_family = Some(family);
4200                    }
4201                    None
4202                } else if name_id == FONT_SPECIFIER_NAME_ID {
4203                    let family = f_family.as_ref()?;
4204                    let name = fontcode_get_name(&name_data, FONT_SPECIFIER_NAME_ID).ok()??;
4205                    if name.to_bytes().is_empty() {
4206                        None
4207                    } else {
4208                        let mut name_str =
4209                            String::from_utf8_lossy(name.to_bytes()).to_string();
4210                        let mut family_str =
4211                            String::from_utf8_lossy(family.as_bytes()).to_string();
4212                        if name_str.starts_with('.') {
4213                            name_str = name_str[1..].to_string();
4214                        }
4215                        if family_str.starts_with('.') {
4216                            family_str = family_str[1..].to_string();
4217                        }
4218                        Some((
4219                            FcPattern {
4220                                name: Some(name_str),
4221                                family: Some(family_str),
4222                                bold: if is_bold {
4223                                    PatternMatch::True
4224                                } else {
4225                                    PatternMatch::False
4226                                },
4227                                italic: if is_italic {
4228                                    PatternMatch::True
4229                                } else {
4230                                    PatternMatch::False
4231                                },
4232                                oblique: if is_oblique {
4233                                    PatternMatch::True
4234                                } else {
4235                                    PatternMatch::False
4236                                },
4237                                monospace: if is_monospace {
4238                                    PatternMatch::True
4239                                } else {
4240                                    PatternMatch::False
4241                                },
4242                                condensed: if stretch <= FcStretch::Condensed {
4243                                    PatternMatch::True
4244                                } else {
4245                                    PatternMatch::False
4246                                },
4247                                weight,
4248                                stretch,
4249                                unicode_ranges: unicode_ranges.clone(),
4250                                metadata: metadata.clone(),
4251                                render_config: FcFontRenderConfig::default(),
4252                            },
4253                            font_index,
4254                        ))
4255                    }
4256                } else {
4257                    None
4258                }
4259            })
4260            .collect::<BTreeSet<_>>();
4261
4262        results.extend(patterns.into_iter().map(|(pat, idx)| ParsedFontFace {
4263            pattern: pat,
4264            font_index: idx,
4265        }));
4266    }
4267
4268    if results.is_empty() {
4269        None
4270    } else {
4271        Some(results)
4272    }
4273}
4274
4275// Remaining implementation for font scanning, parsing, etc.
4276#[cfg(all(feature = "std", feature = "parsing"))]
4277pub(crate) fn FcParseFont(filepath: &PathBuf) -> Option<Vec<(FcPattern, FcFontPath)>> {
4278    #[cfg(all(not(target_family = "wasm"), feature = "std"))]
4279    use mmapio::MmapOptions;
4280    use std::fs::File;
4281
4282    // Try parsing the font file and see if the postscript name matches
4283    let file = File::open(filepath).ok()?;
4284
4285    #[cfg(all(not(target_family = "wasm"), feature = "std"))]
4286    let font_bytes = unsafe { MmapOptions::new().map(&file).ok()? };
4287
4288    #[cfg(not(all(not(target_family = "wasm"), feature = "std")))]
4289    let font_bytes = std::fs::read(filepath).ok()?;
4290
4291    let faces = parse_font_faces(&font_bytes[..])?;
4292    let path_str = filepath.to_string_lossy().to_string();
4293    // Hash once per file — every face of a .ttc shares this value,
4294    // so the shared-bytes cache can return the same Arc<[u8]> for
4295    // all of them. Use the cheap sampled variant so the scout doesn't
4296    // page-fault the full file into RSS just to produce a dedup key.
4297    let bytes_hash = crate::utils::content_dedup_hash_u64(&font_bytes[..]);
4298
4299    Some(
4300        faces
4301            .into_iter()
4302            .map(|face| {
4303                (
4304                    face.pattern,
4305                    FcFontPath {
4306                        path: path_str.clone(),
4307                        font_index: face.font_index,
4308                        bytes_hash,
4309                    },
4310                )
4311            })
4312            .collect(),
4313    )
4314}
4315
4316/// Coverage info returned by a fast-probe parse.
4317///
4318/// Produced by [`FcParseFontFaceFast`] / [`FcProbeCoverage`] — the
4319/// v4.2 "cheap cmap-only" entry point. Unlike `parse_font_faces`,
4320/// this path does **not** read NAME, OS/2, POST, HHEA, HMTX, HEAD's
4321/// style metadata, or anything else. It only reads the table
4322/// directory, `head.macStyle` (2 bytes), and the cmap subtable that
4323/// matches the codepoints we care about. ~1 ms/face on warm FS
4324/// cache vs ~13 ms for the full parse.
4325///
4326/// The `pattern.unicode_ranges` is populated from the *actual* cmap
4327/// contents (one `UnicodeRange` per covered codepoint in the input
4328/// set) rather than the OS/2 `ulUnicodeRange` bitfield. That's more
4329/// precise (OS/2 bits lie on many fonts — they're hints, not ground
4330/// truth) and means `FontFallbackChain::resolve_char`'s coverage
4331/// check matches what the shaper can actually render.
4332#[cfg(all(feature = "std", feature = "parsing"))]
4333#[derive(Debug, Clone)]
4334pub struct FastCoverage {
4335    /// Metadata pattern with `unicode_ranges` populated from the
4336    /// codepoints this face covered from the request set. `name` /
4337    /// `family` fields are left empty — callers already have the
4338    /// filename-guessed family in [`FcFontRegistry.known_paths`];
4339    /// we avoid the NAME table read entirely.
4340    pub pattern: FcPattern,
4341    /// Subset of the input codepoints that this face covers (maps
4342    /// to a non-zero gid via the best cmap subtable). May be empty
4343    /// if the face covers none, in which case callers should fall
4344    /// through to the next candidate path.
4345    pub covered: alloc::collections::BTreeSet<char>,
4346    /// `head.macStyle.bold` (bit 0).
4347    pub is_bold: bool,
4348    /// `head.macStyle.italic` (bit 1).
4349    pub is_italic: bool,
4350}
4351
4352/// Fast per-face coverage probe.
4353///
4354/// Opens the provided font bytes as a `FontData` (detects TTC
4355/// collections), walks the given face, reads `head.macStyle` for
4356/// bold/italic flags, picks the best cmap subtable, and records
4357/// which of the requested codepoints have a non-zero gid.
4358///
4359/// Cost: table-dir parse + head (54 bytes) + cmap (5-100 KiB,
4360/// faulted in from mmap). No heap allocation besides the
4361/// covered-codepoints set and the returned `FcPattern`.
4362///
4363/// Returns `None` only if the font bytes are structurally bad or
4364/// the face index is out of range — empty coverage returns
4365/// `Some` with `covered.is_empty()`, so the caller can distinguish
4366/// "this face doesn't have the char we want" (try next face) from
4367/// "this file is corrupt" (give up on the whole file).
4368#[cfg(all(feature = "std", feature = "parsing"))]
4369#[allow(non_snake_case)]
4370pub fn FcParseFontFaceFast(
4371    font_bytes: &[u8],
4372    font_index: usize,
4373    codepoints: &alloc::collections::BTreeSet<char>,
4374) -> Option<FastCoverage> {
4375    use allsorts::{
4376        binary::read::ReadScope,
4377        font_data::FontData,
4378        tables::{
4379            cmap::{Cmap, CmapSubtable},
4380            FontTableProvider, HeadTable,
4381        },
4382        tag,
4383    };
4384
4385    let scope = ReadScope::new(font_bytes);
4386    let font_file = scope.read::<FontData<'_>>().ok()?;
4387    let provider = font_file.table_provider(font_index).ok()?;
4388
4389    // head — 54 bytes, macStyle at offset 44. Cheap.
4390    let head_data = provider.table_data(tag::HEAD).ok()??;
4391    let head_table = ReadScope::new(&head_data).read::<HeadTable>().ok()?;
4392    let is_bold = head_table.is_bold();
4393    let is_italic = head_table.is_italic();
4394
4395    // cmap — find the best Unicode subtable, probe each codepoint.
4396    // The mmap page-cache only faults in the bytes we touch.
4397    let cmap_data = provider.table_data(tag::CMAP).ok()??;
4398    let cmap = ReadScope::new(&cmap_data).read::<Cmap<'_>>().ok()?;
4399    let encoding_record = find_best_cmap_subtable(&cmap)?;
4400    let cmap_subtable = ReadScope::new(&cmap_data)
4401        .offset(encoding_record.offset as usize)
4402        .read::<CmapSubtable<'_>>()
4403        .ok()?;
4404
4405    let mut covered: alloc::collections::BTreeSet<char> =
4406        alloc::collections::BTreeSet::new();
4407    let mut covered_ranges: Vec<UnicodeRange> = Vec::new();
4408    for ch in codepoints {
4409        let cp = *ch as u32;
4410        if let Ok(Some(gid)) = cmap_subtable.map_glyph(cp) {
4411            if gid != 0 {
4412                covered.insert(*ch);
4413                // Accumulate into ranges for the FcPattern. Merge
4414                // adjacent codepoints so `unicode_ranges` stays
4415                // compact (common case on Western text: one range).
4416                if let Some(last) = covered_ranges.last_mut() {
4417                    if cp == last.end + 1 {
4418                        last.end = cp;
4419                        continue;
4420                    }
4421                }
4422                covered_ranges.push(UnicodeRange { start: cp, end: cp });
4423            }
4424        }
4425    }
4426
4427    let weight = if is_bold {
4428        FcWeight::Bold
4429    } else {
4430        FcWeight::Normal
4431    };
4432    let italic_match = if is_italic {
4433        PatternMatch::True
4434    } else {
4435        PatternMatch::False
4436    };
4437
4438    let pattern = FcPattern {
4439        name: None,
4440        family: None,
4441        weight,
4442        italic: italic_match,
4443        oblique: PatternMatch::DontCare,
4444        monospace: PatternMatch::DontCare,
4445        unicode_ranges: covered_ranges,
4446        ..Default::default()
4447    };
4448
4449    Some(FastCoverage {
4450        pattern,
4451        covered,
4452        is_bold,
4453        is_italic,
4454    })
4455}
4456
4457/// Count the number of faces inside a TTC, or `1` for a single-face
4458/// font file. Used by [`FcFontRegistry::request_fonts_fast`] to
4459/// iterate every face in a `.ttc` without paying the full-parse
4460/// cost (the TTC header is 12 bytes).
4461#[cfg(all(feature = "std", feature = "parsing"))]
4462#[allow(non_snake_case)]
4463pub fn FcCountFontFaces(font_bytes: &[u8]) -> usize {
4464    if font_bytes.len() >= 12 && &font_bytes[0..4] == b"ttcf" {
4465        let num_fonts = u32::from_be_bytes([
4466            font_bytes[8], font_bytes[9], font_bytes[10], font_bytes[11],
4467        ]);
4468        // Same cap as parse_font_faces, for safety.
4469        std::cmp::min(num_fonts as usize, 100).max(1)
4470    } else {
4471        1
4472    }
4473}
4474
4475/// Parse font bytes and extract font patterns for in-memory fonts.
4476///
4477/// This is the public API for parsing in-memory font data to create
4478/// `(FcPattern, FcFont)` tuples that can be added to an `FcFontCache`
4479/// via `with_memory_fonts()`.
4480///
4481/// # Arguments
4482/// * `font_bytes` - The raw bytes of a TrueType/OpenType font file
4483/// * `font_id` - An identifier string for this font (used internally)
4484///
4485/// # Returns
4486/// A vector of `(FcPattern, FcFont)` tuples, one for each font face in the file.
4487/// Returns `None` if the font could not be parsed.
4488///
4489/// # Example
4490/// ```ignore
4491/// use rust_fontconfig::{FcFontCache, FcParseFontBytes};
4492///
4493/// let font_bytes = include_bytes!("path/to/font.ttf");
4494/// let mut cache = FcFontCache::default();
4495///
4496/// if let Some(fonts) = FcParseFontBytes(font_bytes, "MyFont") {
4497///     cache.with_memory_fonts(fonts);
4498/// }
4499/// ```
4500#[cfg(all(feature = "std", feature = "parsing"))]
4501#[allow(non_snake_case)]
4502pub fn FcParseFontBytes(font_bytes: &[u8], font_id: &str) -> Option<Vec<(FcPattern, FcFont)>> {
4503    FcParseFontBytesInner(font_bytes, font_id)
4504}
4505
4506/// Internal implementation for parsing font bytes.
4507/// Delegates to `parse_font_faces` for shared parsing logic and wraps results as `FcFont`.
4508#[cfg(all(feature = "std", feature = "parsing"))]
4509fn FcParseFontBytesInner(font_bytes: &[u8], font_id: &str) -> Option<Vec<(FcPattern, FcFont)>> {
4510    let faces = parse_font_faces(font_bytes)?;
4511    let id = font_id.to_string();
4512    let bytes = font_bytes.to_vec();
4513
4514    Some(
4515        faces
4516            .into_iter()
4517            .map(|face| {
4518                (
4519                    face.pattern,
4520                    FcFont {
4521                        bytes: bytes.clone(),
4522                        font_index: face.font_index,
4523                        id: id.clone(),
4524                    },
4525                )
4526            })
4527            .collect(),
4528    )
4529}
4530
4531#[cfg(all(feature = "std", feature = "parsing"))]
4532fn FcScanDirectoriesInner(paths: &[(Option<String>, String)]) -> Vec<(FcPattern, FcFontPath)> {
4533    #[cfg(all(feature = "multithreading", not(target_family = "wasm")))]
4534    {
4535        use rayon::prelude::*;
4536
4537        // scan directories in parallel
4538        paths
4539            .par_iter()
4540            .filter_map(|(prefix, p)| {
4541                process_path(prefix, PathBuf::from(p), false).map(FcScanSingleDirectoryRecursive)
4542            })
4543            .flatten()
4544            .collect()
4545    }
4546    // wasm has no rayon (it's target-gated off), so even with `multithreading`
4547    // enabled wasm falls back to the sequential path.
4548    #[cfg(not(all(feature = "multithreading", not(target_family = "wasm"))))]
4549    {
4550        paths
4551            .iter()
4552            .filter_map(|(prefix, p)| {
4553                process_path(prefix, PathBuf::from(p), false).map(FcScanSingleDirectoryRecursive)
4554            })
4555            .flatten()
4556            .collect()
4557    }
4558}
4559
4560/// Recursively collect all files from a directory (no parsing, no allsorts).
4561#[cfg(feature = "std")]
4562fn FcCollectFontFilesRecursive(dir: PathBuf) -> Vec<PathBuf> {
4563    let mut files = Vec::new();
4564    let mut dirs_to_parse = vec![dir];
4565
4566    loop {
4567        let mut new_dirs = Vec::new();
4568        for dir in &dirs_to_parse {
4569            let entries = match std::fs::read_dir(dir) {
4570                Ok(o) => o,
4571                Err(_) => continue,
4572            };
4573            for entry in entries.flatten() {
4574                let path = entry.path();
4575                if path.is_dir() {
4576                    new_dirs.push(path);
4577                } else {
4578                    files.push(path);
4579                }
4580            }
4581        }
4582        if new_dirs.is_empty() {
4583            break;
4584        }
4585        dirs_to_parse = new_dirs;
4586    }
4587
4588    files
4589}
4590
4591#[cfg(all(feature = "std", feature = "parsing"))]
4592fn FcScanSingleDirectoryRecursive(dir: PathBuf) -> Vec<(FcPattern, FcFontPath)> {
4593    let files = FcCollectFontFilesRecursive(dir);
4594    FcParseFontFiles(&files)
4595}
4596
4597#[cfg(all(feature = "std", feature = "parsing"))]
4598fn FcParseFontFiles(files_to_parse: &[PathBuf]) -> Vec<(FcPattern, FcFontPath)> {
4599    let result = {
4600        #[cfg(all(feature = "multithreading", not(target_family = "wasm")))]
4601        {
4602            use rayon::prelude::*;
4603
4604            files_to_parse
4605                .par_iter()
4606                .filter_map(|file| FcParseFont(file))
4607                .collect::<Vec<Vec<_>>>()
4608        }
4609        #[cfg(not(all(feature = "multithreading", not(target_family = "wasm"))))]
4610        {
4611            files_to_parse
4612                .iter()
4613                .filter_map(|file| FcParseFont(file))
4614                .collect::<Vec<Vec<_>>>()
4615        }
4616    };
4617
4618    result.into_iter().flat_map(|f| f.into_iter()).collect()
4619}
4620
4621#[cfg(all(feature = "std", feature = "parsing"))]
4622/// Takes a path & prefix and resolves them to a usable path, or `None` if they're unsupported/unavailable.
4623///
4624/// Behaviour is based on: https://www.freedesktop.org/software/fontconfig/fontconfig-user.html
4625fn process_path(
4626    prefix: &Option<String>,
4627    mut path: PathBuf,
4628    is_include_path: bool,
4629) -> Option<PathBuf> {
4630    use std::env::var;
4631
4632    const HOME_SHORTCUT: &str = "~";
4633    const CWD_PATH: &str = ".";
4634
4635    const HOME_ENV_VAR: &str = "HOME";
4636    const XDG_CONFIG_HOME_ENV_VAR: &str = "XDG_CONFIG_HOME";
4637    const XDG_CONFIG_HOME_DEFAULT_PATH_SUFFIX: &str = ".config";
4638    const XDG_DATA_HOME_ENV_VAR: &str = "XDG_DATA_HOME";
4639    const XDG_DATA_HOME_DEFAULT_PATH_SUFFIX: &str = ".local/share";
4640
4641    const PREFIX_CWD: &str = "cwd";
4642    const PREFIX_DEFAULT: &str = "default";
4643    const PREFIX_XDG: &str = "xdg";
4644
4645    // These three could, in theory, be cached, but the work required to do so outweighs the minor benefits
4646    fn get_home_value() -> Option<PathBuf> {
4647        var(HOME_ENV_VAR).ok().map(PathBuf::from)
4648    }
4649    fn get_xdg_config_home_value() -> Option<PathBuf> {
4650        var(XDG_CONFIG_HOME_ENV_VAR)
4651            .ok()
4652            .map(PathBuf::from)
4653            .or_else(|| {
4654                get_home_value()
4655                    .map(|home_path| home_path.join(XDG_CONFIG_HOME_DEFAULT_PATH_SUFFIX))
4656            })
4657    }
4658    fn get_xdg_data_home_value() -> Option<PathBuf> {
4659        var(XDG_DATA_HOME_ENV_VAR)
4660            .ok()
4661            .map(PathBuf::from)
4662            .or_else(|| {
4663                get_home_value().map(|home_path| home_path.join(XDG_DATA_HOME_DEFAULT_PATH_SUFFIX))
4664            })
4665    }
4666
4667    // Resolve the tilde character in the path, if present
4668    if path.starts_with(HOME_SHORTCUT) {
4669        if let Some(home_path) = get_home_value() {
4670            path = home_path.join(
4671                path.strip_prefix(HOME_SHORTCUT)
4672                    .expect("already checked that it starts with the prefix"),
4673            );
4674        } else {
4675            return None;
4676        }
4677    }
4678
4679    // Resolve prefix values
4680    match prefix {
4681        Some(prefix) => match prefix.as_str() {
4682            PREFIX_CWD | PREFIX_DEFAULT => {
4683                let mut new_path = PathBuf::from(CWD_PATH);
4684                new_path.push(path);
4685
4686                Some(new_path)
4687            }
4688            PREFIX_XDG => {
4689                if is_include_path {
4690                    get_xdg_config_home_value()
4691                        .map(|xdg_config_home_path| xdg_config_home_path.join(path))
4692                } else {
4693                    get_xdg_data_home_value()
4694                        .map(|xdg_data_home_path| xdg_data_home_path.join(path))
4695                }
4696            }
4697            _ => None, // Unsupported prefix
4698        },
4699        None => Some(path),
4700    }
4701}
4702
4703// Helper function to extract a string from the name table
4704#[cfg(all(feature = "std", feature = "parsing"))]
4705fn get_name_string(name_data: &[u8], name_id: u16) -> Option<String> {
4706    fontcode_get_name(name_data, name_id)
4707        .ok()
4708        .flatten()
4709        .map(|name| String::from_utf8_lossy(name.to_bytes()).to_string())
4710}
4711
4712/// Representative test codepoints for each Unicode block.
4713/// These are carefully chosen to be actual script characters (not punctuation/symbols)
4714/// that a font claiming to support this script should definitely have.
4715#[cfg(all(feature = "std", feature = "parsing"))]
4716fn get_verification_codepoints(start: u32, end: u32) -> Vec<u32> {
4717    match start {
4718        // Basic Latin - test uppercase, lowercase, and digits
4719        0x0000 => vec!['A' as u32, 'M' as u32, 'Z' as u32, 'a' as u32, 'm' as u32, 'z' as u32],
4720        // Latin-1 Supplement - common accented letters
4721        0x0080 => vec![0x00C0, 0x00C9, 0x00D1, 0x00E0, 0x00E9, 0x00F1], // À É Ñ à é ñ
4722        // Latin Extended-A
4723        0x0100 => vec![0x0100, 0x0110, 0x0141, 0x0152, 0x0160], // Ā Đ Ł Œ Š
4724        // Latin Extended-B
4725        0x0180 => vec![0x0180, 0x01A0, 0x01B0, 0x01CD], // ƀ Ơ ư Ǎ
4726        // IPA Extensions
4727        0x0250 => vec![0x0250, 0x0259, 0x026A, 0x0279], // ɐ ə ɪ ɹ
4728        // Greek and Coptic
4729        0x0370 => vec![0x0391, 0x0392, 0x0393, 0x03B1, 0x03B2, 0x03C9], // Α Β Γ α β ω
4730        // Cyrillic
4731        0x0400 => vec![0x0410, 0x0411, 0x0412, 0x0430, 0x0431, 0x042F], // А Б В а б Я
4732        // Armenian
4733        0x0530 => vec![0x0531, 0x0532, 0x0533, 0x0561, 0x0562], // Ա Բ Գ ա բ
4734        // Hebrew
4735        0x0590 => vec![0x05D0, 0x05D1, 0x05D2, 0x05E9, 0x05EA], // א ב ג ש ת
4736        // Arabic
4737        0x0600 => vec![0x0627, 0x0628, 0x062A, 0x062C, 0x0645], // ا ب ت ج م
4738        // Syriac
4739        0x0700 => vec![0x0710, 0x0712, 0x0713, 0x0715], // ܐ ܒ ܓ ܕ
4740        // Devanagari
4741        0x0900 => vec![0x0905, 0x0906, 0x0915, 0x0916, 0x0939], // अ आ क ख ह
4742        // Bengali
4743        0x0980 => vec![0x0985, 0x0986, 0x0995, 0x0996], // অ আ ক খ
4744        // Gurmukhi
4745        0x0A00 => vec![0x0A05, 0x0A06, 0x0A15, 0x0A16], // ਅ ਆ ਕ ਖ
4746        // Gujarati
4747        0x0A80 => vec![0x0A85, 0x0A86, 0x0A95, 0x0A96], // અ આ ક ખ
4748        // Oriya
4749        0x0B00 => vec![0x0B05, 0x0B06, 0x0B15, 0x0B16], // ଅ ଆ କ ଖ
4750        // Tamil
4751        0x0B80 => vec![0x0B85, 0x0B86, 0x0B95, 0x0BA4], // அ ஆ க த
4752        // Telugu
4753        0x0C00 => vec![0x0C05, 0x0C06, 0x0C15, 0x0C16], // అ ఆ క ఖ
4754        // Kannada
4755        0x0C80 => vec![0x0C85, 0x0C86, 0x0C95, 0x0C96], // ಅ ಆ ಕ ಖ
4756        // Malayalam
4757        0x0D00 => vec![0x0D05, 0x0D06, 0x0D15, 0x0D16], // അ ആ ക ഖ
4758        // Thai
4759        0x0E00 => vec![0x0E01, 0x0E02, 0x0E04, 0x0E07, 0x0E40], // ก ข ค ง เ
4760        // Lao
4761        0x0E80 => vec![0x0E81, 0x0E82, 0x0E84, 0x0E87], // ກ ຂ ຄ ງ
4762        // Myanmar
4763        0x1000 => vec![0x1000, 0x1001, 0x1002, 0x1010, 0x1019], // က ခ ဂ တ မ
4764        // Georgian
4765        0x10A0 => vec![0x10D0, 0x10D1, 0x10D2, 0x10D3], // ა ბ გ დ
4766        // Hangul Jamo
4767        0x1100 => vec![0x1100, 0x1102, 0x1103, 0x1161, 0x1162], // ᄀ ᄂ ᄃ ᅡ ᅢ
4768        // Ethiopic
4769        0x1200 => vec![0x1200, 0x1208, 0x1210, 0x1218], // ሀ ለ ሐ መ
4770        // Cherokee
4771        0x13A0 => vec![0x13A0, 0x13A1, 0x13A2, 0x13A3], // Ꭰ Ꭱ Ꭲ Ꭳ
4772        // Khmer
4773        0x1780 => vec![0x1780, 0x1781, 0x1782, 0x1783], // ក ខ គ ឃ
4774        // Mongolian
4775        0x1800 => vec![0x1820, 0x1821, 0x1822, 0x1823], // ᠠ ᠡ ᠢ ᠣ
4776        // Hiragana
4777        0x3040 => vec![0x3042, 0x3044, 0x3046, 0x304B, 0x304D, 0x3093], // あ い う か き ん
4778        // Katakana
4779        0x30A0 => vec![0x30A2, 0x30A4, 0x30A6, 0x30AB, 0x30AD, 0x30F3], // ア イ ウ カ キ ン
4780        // Bopomofo
4781        0x3100 => vec![0x3105, 0x3106, 0x3107, 0x3108], // ㄅ ㄆ ㄇ ㄈ
4782        // CJK Unified Ideographs - common characters
4783        0x4E00 => vec![0x4E00, 0x4E2D, 0x4EBA, 0x5927, 0x65E5, 0x6708], // 一 中 人 大 日 月
4784        // Hangul Syllables
4785        0xAC00 => vec![0xAC00, 0xAC01, 0xAC04, 0xB098, 0xB2E4], // 가 각 간 나 다
4786        // CJK Compatibility Ideographs
4787        0xF900 => vec![0xF900, 0xF901, 0xF902], // 豈 更 車
4788        // Arabic Presentation Forms-A
4789        0xFB50 => vec![0xFB50, 0xFB51, 0xFB52, 0xFB56], // ﭐ ﭑ ﭒ ﭖ
4790        // Arabic Presentation Forms-B
4791        0xFE70 => vec![0xFE70, 0xFE72, 0xFE74, 0xFE76], // ﹰ ﹲ ﹴ ﹶ
4792        // Halfwidth and Fullwidth Forms
4793        0xFF00 => vec![0xFF01, 0xFF21, 0xFF41, 0xFF61], // ! A a 。
4794        // Default: sample at regular intervals
4795        _ => {
4796            let range_size = end - start;
4797            if range_size > 20 {
4798                vec![
4799                    start + range_size / 5,
4800                    start + 2 * range_size / 5,
4801                    start + 3 * range_size / 5,
4802                    start + 4 * range_size / 5,
4803                ]
4804            } else {
4805                vec![start, start + range_size / 2]
4806            }
4807        }
4808    }
4809}
4810
4811/// Find the best Unicode CMAP subtable from a font provider.
4812/// Tries multiple platform/encoding combinations in priority order.
4813#[cfg(all(feature = "std", feature = "parsing"))]
4814fn find_best_cmap_subtable<'a>(
4815    cmap: &allsorts::tables::cmap::Cmap<'a>,
4816) -> Option<allsorts::tables::cmap::EncodingRecord> {
4817    use allsorts::tables::cmap::{PlatformId, EncodingId};
4818
4819    cmap.find_subtable(PlatformId::UNICODE, EncodingId(3))
4820        .or_else(|| cmap.find_subtable(PlatformId::UNICODE, EncodingId(4)))
4821        .or_else(|| cmap.find_subtable(PlatformId::WINDOWS, EncodingId(1)))
4822        .or_else(|| cmap.find_subtable(PlatformId::WINDOWS, EncodingId(10)))
4823        .or_else(|| cmap.find_subtable(PlatformId::UNICODE, EncodingId(0)))
4824        .or_else(|| cmap.find_subtable(PlatformId::UNICODE, EncodingId(1)))
4825}
4826
4827/// Verify OS/2 reported Unicode ranges against actual CMAP support.
4828/// Returns only ranges that are actually supported by the font's CMAP table.
4829#[cfg(all(feature = "std", feature = "parsing"))]
4830fn verify_unicode_ranges_with_cmap(
4831    provider: &impl FontTableProvider,
4832    os2_ranges: Vec<UnicodeRange>
4833) -> Vec<UnicodeRange> {
4834    use allsorts::tables::cmap::{Cmap, CmapSubtable};
4835
4836    if os2_ranges.is_empty() {
4837        return Vec::new();
4838    }
4839
4840    // Try to get CMAP subtable
4841    let cmap_data = match provider.table_data(tag::CMAP) {
4842        Ok(Some(data)) => data,
4843        _ => return os2_ranges, // Can't verify, trust OS/2
4844    };
4845
4846    let cmap = match ReadScope::new(&cmap_data).read::<Cmap<'_>>() {
4847        Ok(c) => c,
4848        Err(_) => return os2_ranges,
4849    };
4850
4851    let encoding_record = match find_best_cmap_subtable(&cmap) {
4852        Some(r) => r,
4853        None => return os2_ranges, // No suitable subtable, trust OS/2
4854    };
4855
4856    let cmap_subtable = match ReadScope::new(&cmap_data)
4857        .offset(encoding_record.offset as usize)
4858        .read::<CmapSubtable<'_>>()
4859    {
4860        Ok(st) => st,
4861        Err(_) => return os2_ranges,
4862    };
4863
4864    // Verify each range
4865    let mut verified_ranges = Vec::new();
4866
4867    for range in os2_ranges {
4868        let test_codepoints = get_verification_codepoints(range.start, range.end);
4869
4870        // Require at least 50% of test codepoints to have valid glyphs
4871        // This is stricter than before to avoid false positives
4872        let required_hits = (test_codepoints.len() + 1) / 2; // ceil(len/2)
4873        let mut hits = 0;
4874
4875        for cp in test_codepoints {
4876            if cp >= range.start && cp <= range.end {
4877                if let Ok(Some(gid)) = cmap_subtable.map_glyph(cp) {
4878                    if gid != 0 {
4879                        hits += 1;
4880                        if hits >= required_hits {
4881                            break;
4882                        }
4883                    }
4884                }
4885            }
4886        }
4887
4888        if hits >= required_hits {
4889            verified_ranges.push(range);
4890        }
4891    }
4892
4893    verified_ranges
4894}
4895
4896/// Analyze CMAP table to discover font coverage when OS/2 provides no info.
4897/// This is the fallback when OS/2 ulUnicodeRange bits are all zero.
4898#[cfg(all(feature = "std", feature = "parsing"))]
4899fn analyze_cmap_coverage(provider: &impl FontTableProvider) -> Option<Vec<UnicodeRange>> {
4900    use allsorts::tables::cmap::{Cmap, CmapSubtable};
4901
4902    let cmap_data = provider.table_data(tag::CMAP).ok()??;
4903    let cmap = ReadScope::new(&cmap_data).read::<Cmap<'_>>().ok()?;
4904
4905    let encoding_record = find_best_cmap_subtable(&cmap)?;
4906
4907    let cmap_subtable = ReadScope::new(&cmap_data)
4908        .offset(encoding_record.offset as usize)
4909        .read::<CmapSubtable<'_>>()
4910        .ok()?;
4911
4912    // Standard Unicode blocks to probe
4913    let blocks_to_check: &[(u32, u32)] = &[
4914        (0x0000, 0x007F), // Basic Latin
4915        (0x0080, 0x00FF), // Latin-1 Supplement
4916        (0x0100, 0x017F), // Latin Extended-A
4917        (0x0180, 0x024F), // Latin Extended-B
4918        (0x0250, 0x02AF), // IPA Extensions
4919        (0x0300, 0x036F), // Combining Diacritical Marks
4920        (0x0370, 0x03FF), // Greek and Coptic
4921        (0x0400, 0x04FF), // Cyrillic
4922        (0x0500, 0x052F), // Cyrillic Supplement
4923        (0x0530, 0x058F), // Armenian
4924        (0x0590, 0x05FF), // Hebrew
4925        (0x0600, 0x06FF), // Arabic
4926        (0x0700, 0x074F), // Syriac
4927        (0x0900, 0x097F), // Devanagari
4928        (0x0980, 0x09FF), // Bengali
4929        (0x0A00, 0x0A7F), // Gurmukhi
4930        (0x0A80, 0x0AFF), // Gujarati
4931        (0x0B00, 0x0B7F), // Oriya
4932        (0x0B80, 0x0BFF), // Tamil
4933        (0x0C00, 0x0C7F), // Telugu
4934        (0x0C80, 0x0CFF), // Kannada
4935        (0x0D00, 0x0D7F), // Malayalam
4936        (0x0E00, 0x0E7F), // Thai
4937        (0x0E80, 0x0EFF), // Lao
4938        (0x1000, 0x109F), // Myanmar
4939        (0x10A0, 0x10FF), // Georgian
4940        (0x1100, 0x11FF), // Hangul Jamo
4941        (0x1200, 0x137F), // Ethiopic
4942        (0x13A0, 0x13FF), // Cherokee
4943        (0x1780, 0x17FF), // Khmer
4944        (0x1800, 0x18AF), // Mongolian
4945        (0x2000, 0x206F), // General Punctuation
4946        (0x20A0, 0x20CF), // Currency Symbols
4947        (0x2100, 0x214F), // Letterlike Symbols
4948        (0x2190, 0x21FF), // Arrows
4949        (0x2200, 0x22FF), // Mathematical Operators
4950        (0x2500, 0x257F), // Box Drawing
4951        (0x25A0, 0x25FF), // Geometric Shapes
4952        (0x2600, 0x26FF), // Miscellaneous Symbols
4953        (0x3000, 0x303F), // CJK Symbols and Punctuation
4954        (0x3040, 0x309F), // Hiragana
4955        (0x30A0, 0x30FF), // Katakana
4956        (0x3100, 0x312F), // Bopomofo
4957        (0x3130, 0x318F), // Hangul Compatibility Jamo
4958        (0x4E00, 0x9FFF), // CJK Unified Ideographs
4959        (0xAC00, 0xD7AF), // Hangul Syllables
4960        (0xF900, 0xFAFF), // CJK Compatibility Ideographs
4961        (0xFB50, 0xFDFF), // Arabic Presentation Forms-A
4962        (0xFE70, 0xFEFF), // Arabic Presentation Forms-B
4963        (0xFF00, 0xFFEF), // Halfwidth and Fullwidth Forms
4964    ];
4965
4966    let mut ranges = Vec::new();
4967
4968    for &(start, end) in blocks_to_check {
4969        let test_codepoints = get_verification_codepoints(start, end);
4970        let required_hits = (test_codepoints.len() + 1) / 2;
4971        let mut hits = 0;
4972
4973        for cp in test_codepoints {
4974            if let Ok(Some(gid)) = cmap_subtable.map_glyph(cp) {
4975                if gid != 0 {
4976                    hits += 1;
4977                    if hits >= required_hits {
4978                        break;
4979                    }
4980                }
4981            }
4982        }
4983
4984        if hits >= required_hits {
4985            ranges.push(UnicodeRange { start, end });
4986        }
4987    }
4988
4989    if ranges.is_empty() {
4990        None
4991    } else {
4992        Some(ranges)
4993    }
4994}
4995
4996// Helper function to extract unicode ranges (unused, kept for reference)
4997#[cfg(all(feature = "std", feature = "parsing"))]
4998#[allow(dead_code)]
4999fn extract_unicode_ranges(os2_table: &Os2) -> Vec<UnicodeRange> {
5000    let mut unicode_ranges = Vec::new();
5001
5002    let ranges = [
5003        os2_table.ul_unicode_range1,
5004        os2_table.ul_unicode_range2,
5005        os2_table.ul_unicode_range3,
5006        os2_table.ul_unicode_range4,
5007    ];
5008
5009    for &(bit, start, end) in UNICODE_RANGE_MAPPINGS {
5010        let range_idx = bit / 32;
5011        let bit_pos = bit % 32;
5012        if range_idx < 4 && (ranges[range_idx] & (1 << bit_pos)) != 0 {
5013            unicode_ranges.push(UnicodeRange { start, end });
5014        }
5015    }
5016
5017    unicode_ranges
5018}
5019
5020// Helper function to detect if a font is monospace
5021#[cfg(all(feature = "std", feature = "parsing"))]
5022fn detect_monospace(
5023    provider: &impl FontTableProvider,
5024    os2_table: &Os2,
5025    detected_monospace: Option<bool>,
5026) -> Option<bool> {
5027    if let Some(is_monospace) = detected_monospace {
5028        return Some(is_monospace);
5029    }
5030
5031    // Try using PANOSE classification
5032    if os2_table.panose[0] == 2 {
5033        // 2 = Latin Text
5034        return Some(os2_table.panose[3] == 9); // 9 = Monospaced
5035    }
5036
5037    // Check glyph widths in hmtx table
5038    let hhea_data = provider.table_data(tag::HHEA).ok()??;
5039    let hhea_table = ReadScope::new(&hhea_data).read::<HheaTable>().ok()?;
5040    let maxp_data = provider.table_data(tag::MAXP).ok()??;
5041    let maxp_table = ReadScope::new(&maxp_data).read::<MaxpTable>().ok()?;
5042    let hmtx_data = provider.table_data(tag::HMTX).ok()??;
5043    let hmtx_table = ReadScope::new(&hmtx_data)
5044        .read_dep::<HmtxTable<'_>>((
5045            usize::from(maxp_table.num_glyphs),
5046            usize::from(hhea_table.num_h_metrics),
5047        ))
5048        .ok()?;
5049
5050    let mut monospace = true;
5051    let mut last_advance = 0;
5052
5053    // Check if all advance widths are the same
5054    for i in 0..hhea_table.num_h_metrics as usize {
5055        let advance = hmtx_table.h_metrics.read_item(i).ok()?.advance_width;
5056        if i > 0 && advance != last_advance {
5057            monospace = false;
5058            break;
5059        }
5060        last_advance = advance;
5061    }
5062
5063    Some(monospace)
5064}
5065
5066/// Guess font metadata from a filename using the existing tokenizer.
5067///
5068/// Uses [`config::tokenize_font_stem`] and [`config::FONT_STYLE_TOKENS`]
5069/// to extract the family name and detect style hints from the filename.
5070///
5071/// Only compiled for the filename-only (`not(parsing)`) scan path — its
5072/// sole caller is [`FcFontCache::build_from_filenames`]. With `parsing`
5073/// on, allsorts reads real metadata and this fallback is unused.
5074#[cfg(all(feature = "std", not(feature = "parsing")))]
5075fn pattern_from_filename(path: &std::path::Path) -> Option<FcPattern> {
5076    let ext = path.extension()?.to_str()?.to_ascii_lowercase();
5077    match ext.as_str() {
5078        "ttf" | "otf" | "ttc" | "woff" | "woff2" => {}
5079        _ => return None,
5080    }
5081
5082    let stem = path.file_stem()?.to_str()?;
5083    let all_tokens = crate::config::tokenize_lowercase(stem);
5084
5085    // Style detection: check if any token matches a known style keyword
5086    let has_token = |kw: &str| all_tokens.iter().any(|t| t == kw);
5087    let is_bold = has_token("bold") || has_token("heavy");
5088    let is_italic = has_token("italic");
5089    let is_oblique = has_token("oblique");
5090    let is_mono = has_token("mono") || has_token("monospace");
5091    let is_condensed = has_token("condensed");
5092
5093    // Family = non-style tokens joined
5094    let family_tokens = crate::config::tokenize_font_stem(stem);
5095    if family_tokens.is_empty() { return None; }
5096    let family = family_tokens.join(" ");
5097
5098    Some(FcPattern {
5099        name: Some(stem.to_string()),
5100        family: Some(family),
5101        bold: if is_bold { PatternMatch::True } else { PatternMatch::False },
5102        italic: if is_italic { PatternMatch::True } else { PatternMatch::False },
5103        oblique: if is_oblique { PatternMatch::True } else { PatternMatch::DontCare },
5104        monospace: if is_mono { PatternMatch::True } else { PatternMatch::DontCare },
5105        condensed: if is_condensed { PatternMatch::True } else { PatternMatch::DontCare },
5106        weight: if is_bold { FcWeight::Bold } else { FcWeight::Normal },
5107        stretch: if is_condensed { FcStretch::Condensed } else { FcStretch::Normal },
5108        unicode_ranges: Vec::new(),
5109        metadata: FcFontMetadata::default(),
5110        render_config: FcFontRenderConfig::default(),
5111    })
5112}