Skip to main content

rust_fontconfig/
lib.rs

1//! # rust-fontconfig
2//!
3//! Pure-Rust rewrite of the Linux fontconfig library (no system dependencies) - using allsorts as a font parser to support `.woff`, `.woff2`, `.ttc`, `.otf` and `.ttf`
4//!
5//! **NOTE**: Also works on Windows, macOS and WASM - without external dependencies!
6//!
7//! ## Usage
8//!
9//! ### Basic Font Query
10//!
11//! ```rust,no_run
12//! use rust_fontconfig::{FcFontCache, FcPattern};
13//!
14//! fn main() {
15//!     // Build the font cache
16//!     let cache = FcFontCache::build();
17//!
18//!     // Query a font by name
19//!     let results = cache.query(
20//!         &FcPattern {
21//!             name: Some(String::from("Arial")),
22//!             ..Default::default()
23//!         },
24//!         &mut Vec::new() // Trace messages container
25//!     );
26//!
27//!     if let Some(font_match) = results {
28//!         println!("Font match ID: {:?}", font_match.id);
29//!         println!("Font unicode ranges: {:?}", font_match.unicode_ranges);
30//!     } else {
31//!         println!("No matching font found");
32//!     }
33//! }
34//! ```
35//!
36//! ### Resolve Font Chain and Query for Text
37//!
38//! ```rust,no_run
39//! use rust_fontconfig::{FcFontCache, FcWeight, PatternMatch};
40//!
41//! fn main() {
42//!     # #[cfg(feature = "std")]
43//!     # {
44//!     let cache = FcFontCache::build();
45//!
46//!     // Build font fallback chain (without text parameter)
47//!     let font_chain = cache.resolve_font_chain(
48//!         &["Arial".to_string(), "sans-serif".to_string()],
49//!         FcWeight::Normal,
50//!         PatternMatch::DontCare,
51//!         PatternMatch::DontCare,
52//!         &mut Vec::new(),
53//!     );
54//!
55//!     // Query which fonts to use for specific text
56//!     let text = "Hello 你好 Здравствуйте";
57//!     let font_runs = font_chain.query_for_text(&cache, text);
58//!
59//!     println!("Text split into {} font runs:", font_runs.len());
60//!     for run in font_runs {
61//!         println!("  '{}' -> font {:?}", run.text, run.font_id);
62//!     }
63//!     # }
64//! }
65//! ```
66
67#![allow(non_snake_case)]
68
69// As of v4.1 this crate is std-only. The v4.0 `no_std` path is gone —
70// it never supported the registry / multi-thread parsing anyway, and
71// the shared-state `FcFontCache` refactor depends on `std::sync::RwLock`
72// which is unavailable without std. Keeping the `alloc::` import paths
73// means the existing call sites in this file and submodules keep
74// compiling — in std builds `alloc` is just `core::alloc`'s companion
75// crate already linked by the standard library.
76extern crate alloc;
77
78use alloc::collections::btree_map::BTreeMap;
79use alloc::string::{String, ToString};
80use alloc::vec::Vec;
81#[cfg(all(feature = "std", feature = "parsing"))]
82use allsorts::binary::read::ReadScope;
83#[cfg(all(feature = "std", feature = "parsing"))]
84use allsorts::get_name::fontcode_get_name;
85#[cfg(all(feature = "std", feature = "parsing"))]
86use allsorts::tables::os2::Os2;
87#[cfg(all(feature = "std", feature = "parsing"))]
88use allsorts::tables::{FontTableProvider, HheaTable, HmtxTable, MaxpTable};
89#[cfg(all(feature = "std", feature = "parsing"))]
90use allsorts::tag;
91#[cfg(feature = "std")]
92use std::path::PathBuf;
93
94pub mod utils;
95#[cfg(feature = "std")]
96pub mod config;
97
98#[cfg(feature = "ffi")]
99pub mod ffi;
100
101#[cfg(feature = "async-registry")]
102pub mod scoring;
103#[cfg(feature = "async-registry")]
104pub mod registry;
105#[cfg(feature = "async-registry")]
106pub mod multithread;
107#[cfg(feature = "cache")]
108pub mod disk_cache;
109
110/// Operating system type for generic font family resolution
111#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash)]
112pub enum OperatingSystem {
113    Windows,
114    Linux,
115    MacOS,
116    Wasm,
117}
118
119impl OperatingSystem {
120    /// Detect the current operating system at compile time
121    pub fn current() -> Self {
122        #[cfg(target_os = "windows")]
123        return OperatingSystem::Windows;
124        
125        #[cfg(target_os = "linux")]
126        return OperatingSystem::Linux;
127        
128        #[cfg(target_os = "macos")]
129        return OperatingSystem::MacOS;
130        
131        #[cfg(target_family = "wasm")]
132        return OperatingSystem::Wasm;
133        
134        #[cfg(not(any(target_os = "windows", target_os = "linux", target_os = "macos", target_family = "wasm")))]
135        return OperatingSystem::Linux; // Default fallback
136    }
137    
138    /// Get system-specific fonts for the "serif" generic family
139    /// Prioritizes fonts based on Unicode range coverage
140    pub fn get_serif_fonts(&self, unicode_ranges: &[UnicodeRange]) -> Vec<String> {
141        let has_cjk = has_cjk_ranges(unicode_ranges);
142        let has_arabic = has_arabic_ranges(unicode_ranges);
143        let _has_cyrillic = has_cyrillic_ranges(unicode_ranges);
144        
145        match self {
146            OperatingSystem::Windows => {
147                let mut fonts = Vec::new();
148                if has_cjk {
149                    fonts.extend_from_slice(&["MS Mincho", "SimSun", "MingLiU"]);
150                }
151                if has_arabic {
152                    fonts.push("Traditional Arabic");
153                }
154                fonts.push("Times New Roman");
155                fonts.iter().map(|s| s.to_string()).collect()
156            }
157            OperatingSystem::Linux => {
158                let mut fonts = Vec::new();
159                if has_cjk {
160                    fonts.extend_from_slice(&["Noto Serif CJK SC", "Noto Serif CJK JP", "Noto Serif CJK KR"]);
161                }
162                if has_arabic {
163                    fonts.push("Noto Serif Arabic");
164                }
165                fonts.extend_from_slice(&[
166                    "Times", "Times New Roman", "DejaVu Serif", "Free Serif", 
167                    "Noto Serif", "Bitstream Vera Serif", "Roman", "Regular"
168                ]);
169                fonts.iter().map(|s| s.to_string()).collect()
170            }
171            OperatingSystem::MacOS => {
172                let mut fonts = Vec::new();
173                if has_cjk {
174                    fonts.extend_from_slice(&["Hiragino Mincho ProN", "STSong", "AppleMyungjo"]);
175                }
176                if has_arabic {
177                    fonts.push("Geeza Pro");
178                }
179                fonts.extend_from_slice(&["Times", "New York", "Palatino"]);
180                fonts.iter().map(|s| s.to_string()).collect()
181            }
182            OperatingSystem::Wasm => Vec::new(),
183        }
184    }
185    
186    /// Get system-specific fonts for the "sans-serif" generic family
187    /// Prioritizes fonts based on Unicode range coverage
188    pub fn get_sans_serif_fonts(&self, unicode_ranges: &[UnicodeRange]) -> Vec<String> {
189        let has_cjk = has_cjk_ranges(unicode_ranges);
190        let has_arabic = has_arabic_ranges(unicode_ranges);
191        let _has_cyrillic = has_cyrillic_ranges(unicode_ranges);
192        let has_hebrew = has_hebrew_ranges(unicode_ranges);
193        let has_thai = has_thai_ranges(unicode_ranges);
194        
195        match self {
196            OperatingSystem::Windows => {
197                let mut fonts = Vec::new();
198                if has_cjk {
199                    fonts.extend_from_slice(&["Microsoft YaHei", "MS Gothic", "Malgun Gothic", "SimHei"]);
200                }
201                if has_arabic {
202                    fonts.push("Segoe UI Arabic");
203                }
204                if has_hebrew {
205                    fonts.push("Segoe UI Hebrew");
206                }
207                if has_thai {
208                    fonts.push("Leelawadee UI");
209                }
210                fonts.extend_from_slice(&["Segoe UI", "Tahoma", "Microsoft Sans Serif", "MS Sans Serif", "Helv"]);
211                fonts.iter().map(|s| s.to_string()).collect()
212            }
213            OperatingSystem::Linux => {
214                let mut fonts = Vec::new();
215                if has_cjk {
216                    fonts.extend_from_slice(&[
217                        "Noto Sans CJK SC", "Noto Sans CJK JP", "Noto Sans CJK KR",
218                        "WenQuanYi Micro Hei", "Droid Sans Fallback"
219                    ]);
220                }
221                if has_arabic {
222                    fonts.push("Noto Sans Arabic");
223                }
224                if has_hebrew {
225                    fonts.push("Noto Sans Hebrew");
226                }
227                if has_thai {
228                    fonts.push("Noto Sans Thai");
229                }
230                fonts.extend_from_slice(&["Ubuntu", "Arial", "DejaVu Sans", "Noto Sans", "Liberation Sans"]);
231                fonts.iter().map(|s| s.to_string()).collect()
232            }
233            OperatingSystem::MacOS => {
234                let mut fonts = Vec::new();
235                if has_cjk {
236                    fonts.extend_from_slice(&[
237                        "Hiragino Sans", "Hiragino Kaku Gothic ProN", 
238                        "PingFang SC", "PingFang TC", "Apple SD Gothic Neo"
239                    ]);
240                }
241                if has_arabic {
242                    fonts.push("Geeza Pro");
243                }
244                if has_hebrew {
245                    fonts.push("Arial Hebrew");
246                }
247                if has_thai {
248                    fonts.push("Thonburi");
249                }
250                fonts.extend_from_slice(&["San Francisco", "Helvetica Neue", "Lucida Grande"]);
251                fonts.iter().map(|s| s.to_string()).collect()
252            }
253            OperatingSystem::Wasm => Vec::new(),
254        }
255    }
256    
257    /// Get system-specific fonts for the "monospace" generic family
258    /// Prioritizes fonts based on Unicode range coverage
259    pub fn get_monospace_fonts(&self, unicode_ranges: &[UnicodeRange]) -> Vec<String> {
260        let has_cjk = has_cjk_ranges(unicode_ranges);
261        
262        match self {
263            OperatingSystem::Windows => {
264                let mut fonts = Vec::new();
265                if has_cjk {
266                    fonts.extend_from_slice(&["MS Gothic", "SimHei"]);
267                }
268                fonts.extend_from_slice(&["Segoe UI Mono", "Courier New", "Cascadia Code", "Cascadia Mono", "Consolas"]);
269                fonts.iter().map(|s| s.to_string()).collect()
270            }
271            OperatingSystem::Linux => {
272                let mut fonts = Vec::new();
273                if has_cjk {
274                    fonts.extend_from_slice(&["Noto Sans Mono CJK SC", "Noto Sans Mono CJK JP", "WenQuanYi Zen Hei Mono"]);
275                }
276                fonts.extend_from_slice(&[
277                    "Source Code Pro", "Cantarell", "DejaVu Sans Mono", 
278                    "Roboto Mono", "Ubuntu Monospace", "Droid Sans Mono"
279                ]);
280                fonts.iter().map(|s| s.to_string()).collect()
281            }
282            OperatingSystem::MacOS => {
283                let mut fonts = Vec::new();
284                if has_cjk {
285                    fonts.extend_from_slice(&["Hiragino Sans", "PingFang SC"]);
286                }
287                fonts.extend_from_slice(&["SF Mono", "Menlo", "Monaco", "Courier", "Oxygen Mono", "Source Code Pro", "Fira Mono"]);
288                fonts.iter().map(|s| s.to_string()).collect()
289            }
290            OperatingSystem::Wasm => Vec::new(),
291        }
292    }
293    
294    /// Expand a generic CSS font family to system-specific font names
295    /// Returns the original name if not a generic family
296    /// Prioritizes fonts based on Unicode range coverage
297    pub fn expand_generic_family(&self, family: &str, unicode_ranges: &[UnicodeRange]) -> Vec<String> {
298        match family.to_lowercase().as_str() {
299            "serif" => self.get_serif_fonts(unicode_ranges),
300            "sans-serif" => self.get_sans_serif_fonts(unicode_ranges),
301            "monospace" => self.get_monospace_fonts(unicode_ranges),
302            "cursive" | "fantasy" | "system-ui" => {
303                // Use sans-serif as fallback for these
304                self.get_sans_serif_fonts(unicode_ranges)
305            }
306            _ => vec![family.to_string()],
307        }
308    }
309}
310
311/// Expand a CSS font-family stack with generic families resolved to OS-specific fonts
312/// Prioritizes fonts based on Unicode range coverage
313/// Example: ["Arial", "sans-serif"] on macOS with CJK ranges -> ["Arial", "PingFang SC", "Hiragino Sans", ...]
314pub fn expand_font_families(families: &[String], os: OperatingSystem, unicode_ranges: &[UnicodeRange]) -> Vec<String> {
315    let mut expanded = Vec::new();
316    
317    for family in families {
318        expanded.extend(os.expand_generic_family(family, unicode_ranges));
319    }
320    
321    expanded
322}
323
324/// UUID to identify a font (collections are broken up into separate fonts)
325#[derive(Clone, Copy, PartialOrd, Ord, PartialEq, Eq, Hash)]
326#[cfg_attr(feature = "cache", derive(serde::Serialize, serde::Deserialize))]
327pub struct FontId(pub u128);
328
329impl core::fmt::Debug for FontId {
330    fn fmt(&self, f: &mut core::fmt::Formatter<'_>) -> core::fmt::Result {
331        core::fmt::Display::fmt(self, f)
332    }
333}
334
335impl core::fmt::Display for FontId {
336    fn fmt(&self, f: &mut core::fmt::Formatter<'_>) -> core::fmt::Result {
337        let id = self.0;
338        write!(
339            f,
340            "{:08x}-{:04x}-{:04x}-{:04x}-{:012x}",
341            (id >> 96) & 0xFFFFFFFF,
342            (id >> 80) & 0xFFFF,
343            (id >> 64) & 0xFFFF,
344            (id >> 48) & 0xFFFF,
345            id & 0xFFFFFFFFFFFF
346        )
347    }
348}
349
350impl FontId {
351    /// Generate a new unique FontId using an atomic counter
352    pub fn new() -> Self {
353        use core::sync::atomic::{AtomicU64, Ordering};
354        static COUNTER: AtomicU64 = AtomicU64::new(1);
355        let id = COUNTER.fetch_add(1, Ordering::Relaxed) as u128;
356        FontId(id)
357    }
358}
359
360/// Whether a field is required to match (yes / no / don't care)
361#[derive(Debug, Default, Copy, Clone, PartialOrd, Ord, PartialEq, Eq, Hash)]
362#[cfg_attr(feature = "cache", derive(serde::Serialize, serde::Deserialize))]
363#[repr(C)]
364pub enum PatternMatch {
365    /// Default: don't particularly care whether the requirement matches
366    #[default]
367    DontCare,
368    /// Requirement has to be true for the selected font
369    True,
370    /// Requirement has to be false for the selected font
371    False,
372}
373
374impl PatternMatch {
375    fn needs_to_match(&self) -> bool {
376        matches!(self, PatternMatch::True | PatternMatch::False)
377    }
378
379    fn matches(&self, other: &PatternMatch) -> bool {
380        match (self, other) {
381            (PatternMatch::DontCare, _) => true,
382            (_, PatternMatch::DontCare) => true,
383            (a, b) => a == b,
384        }
385    }
386}
387
388/// Font weight values as defined in CSS specification
389#[derive(Debug, Clone, Copy, PartialOrd, Ord, PartialEq, Eq, Hash)]
390#[cfg_attr(feature = "cache", derive(serde::Serialize, serde::Deserialize))]
391#[repr(C)]
392pub enum FcWeight {
393    Thin = 100,
394    ExtraLight = 200,
395    Light = 300,
396    Normal = 400,
397    Medium = 500,
398    SemiBold = 600,
399    Bold = 700,
400    ExtraBold = 800,
401    Black = 900,
402}
403
404impl FcWeight {
405    pub fn from_u16(weight: u16) -> Self {
406        match weight {
407            0..=149 => FcWeight::Thin,
408            150..=249 => FcWeight::ExtraLight,
409            250..=349 => FcWeight::Light,
410            350..=449 => FcWeight::Normal,
411            450..=549 => FcWeight::Medium,
412            550..=649 => FcWeight::SemiBold,
413            650..=749 => FcWeight::Bold,
414            750..=849 => FcWeight::ExtraBold,
415            _ => FcWeight::Black,
416        }
417    }
418
419    pub fn find_best_match(&self, available: &[FcWeight]) -> Option<FcWeight> {
420        if available.is_empty() {
421            return None;
422        }
423
424        // Exact match
425        if available.contains(self) {
426            return Some(*self);
427        }
428
429        // Get numeric value
430        let self_value = *self as u16;
431
432        match *self {
433            FcWeight::Normal => {
434                // For Normal (400), try Medium (500) first
435                if available.contains(&FcWeight::Medium) {
436                    return Some(FcWeight::Medium);
437                }
438                // Then try lighter weights
439                for weight in &[FcWeight::Light, FcWeight::ExtraLight, FcWeight::Thin] {
440                    if available.contains(weight) {
441                        return Some(*weight);
442                    }
443                }
444                // Last, try heavier weights
445                for weight in &[
446                    FcWeight::SemiBold,
447                    FcWeight::Bold,
448                    FcWeight::ExtraBold,
449                    FcWeight::Black,
450                ] {
451                    if available.contains(weight) {
452                        return Some(*weight);
453                    }
454                }
455            }
456            FcWeight::Medium => {
457                // For Medium (500), try Normal (400) first
458                if available.contains(&FcWeight::Normal) {
459                    return Some(FcWeight::Normal);
460                }
461                // Then try lighter weights
462                for weight in &[FcWeight::Light, FcWeight::ExtraLight, FcWeight::Thin] {
463                    if available.contains(weight) {
464                        return Some(*weight);
465                    }
466                }
467                // Last, try heavier weights
468                for weight in &[
469                    FcWeight::SemiBold,
470                    FcWeight::Bold,
471                    FcWeight::ExtraBold,
472                    FcWeight::Black,
473                ] {
474                    if available.contains(weight) {
475                        return Some(*weight);
476                    }
477                }
478            }
479            FcWeight::Thin | FcWeight::ExtraLight | FcWeight::Light => {
480                // For lightweight fonts (<400), first try lighter or equal weights
481                let mut best_match = None;
482                let mut smallest_diff = u16::MAX;
483
484                // Find the closest lighter weight
485                for weight in available {
486                    let weight_value = *weight as u16;
487                    // Only consider weights <= self (per test expectation)
488                    if weight_value <= self_value {
489                        let diff = self_value - weight_value;
490                        if diff < smallest_diff {
491                            smallest_diff = diff;
492                            best_match = Some(*weight);
493                        }
494                    }
495                }
496
497                if best_match.is_some() {
498                    return best_match;
499                }
500
501                // If no lighter weight, find the closest heavier weight
502                best_match = None;
503                smallest_diff = u16::MAX;
504
505                for weight in available {
506                    let weight_value = *weight as u16;
507                    if weight_value > self_value {
508                        let diff = weight_value - self_value;
509                        if diff < smallest_diff {
510                            smallest_diff = diff;
511                            best_match = Some(*weight);
512                        }
513                    }
514                }
515
516                return best_match;
517            }
518            FcWeight::SemiBold | FcWeight::Bold | FcWeight::ExtraBold | FcWeight::Black => {
519                // For heavyweight fonts (>500), first try heavier or equal weights
520                let mut best_match = None;
521                let mut smallest_diff = u16::MAX;
522
523                // Find the closest heavier weight
524                for weight in available {
525                    let weight_value = *weight as u16;
526                    // Only consider weights >= self
527                    if weight_value >= self_value {
528                        let diff = weight_value - self_value;
529                        if diff < smallest_diff {
530                            smallest_diff = diff;
531                            best_match = Some(*weight);
532                        }
533                    }
534                }
535
536                if best_match.is_some() {
537                    return best_match;
538                }
539
540                // If no heavier weight, find the closest lighter weight
541                best_match = None;
542                smallest_diff = u16::MAX;
543
544                for weight in available {
545                    let weight_value = *weight as u16;
546                    if weight_value < self_value {
547                        let diff = self_value - weight_value;
548                        if diff < smallest_diff {
549                            smallest_diff = diff;
550                            best_match = Some(*weight);
551                        }
552                    }
553                }
554
555                return best_match;
556            }
557        }
558
559        // If nothing matches by now, return the first available weight
560        Some(available[0])
561    }
562}
563
564impl Default for FcWeight {
565    fn default() -> Self {
566        FcWeight::Normal
567    }
568}
569
570/// CSS font-stretch values
571#[derive(Debug, Clone, Copy, PartialOrd, Ord, PartialEq, Eq, Hash)]
572#[cfg_attr(feature = "cache", derive(serde::Serialize, serde::Deserialize))]
573#[repr(C)]
574pub enum FcStretch {
575    UltraCondensed = 1,
576    ExtraCondensed = 2,
577    Condensed = 3,
578    SemiCondensed = 4,
579    Normal = 5,
580    SemiExpanded = 6,
581    Expanded = 7,
582    ExtraExpanded = 8,
583    UltraExpanded = 9,
584}
585
586impl FcStretch {
587    pub fn is_condensed(&self) -> bool {
588        use self::FcStretch::*;
589        match self {
590            UltraCondensed => true,
591            ExtraCondensed => true,
592            Condensed => true,
593            SemiCondensed => true,
594            Normal => false,
595            SemiExpanded => false,
596            Expanded => false,
597            ExtraExpanded => false,
598            UltraExpanded => false,
599        }
600    }
601    pub fn from_u16(width_class: u16) -> Self {
602        match width_class {
603            1 => FcStretch::UltraCondensed,
604            2 => FcStretch::ExtraCondensed,
605            3 => FcStretch::Condensed,
606            4 => FcStretch::SemiCondensed,
607            5 => FcStretch::Normal,
608            6 => FcStretch::SemiExpanded,
609            7 => FcStretch::Expanded,
610            8 => FcStretch::ExtraExpanded,
611            9 => FcStretch::UltraExpanded,
612            _ => FcStretch::Normal,
613        }
614    }
615
616    /// Follows CSS spec for stretch matching
617    pub fn find_best_match(&self, available: &[FcStretch]) -> Option<FcStretch> {
618        if available.is_empty() {
619            return None;
620        }
621
622        if available.contains(self) {
623            return Some(*self);
624        }
625
626        // For 'normal' or condensed values, narrower widths are checked first, then wider values
627        if *self <= FcStretch::Normal {
628            // Find narrower values first
629            let mut closest_narrower = None;
630            for stretch in available.iter() {
631                if *stretch < *self
632                    && (closest_narrower.is_none() || *stretch > closest_narrower.unwrap())
633                {
634                    closest_narrower = Some(*stretch);
635                }
636            }
637
638            if closest_narrower.is_some() {
639                return closest_narrower;
640            }
641
642            // Otherwise, find wider values
643            let mut closest_wider = None;
644            for stretch in available.iter() {
645                if *stretch > *self
646                    && (closest_wider.is_none() || *stretch < closest_wider.unwrap())
647                {
648                    closest_wider = Some(*stretch);
649                }
650            }
651
652            return closest_wider;
653        } else {
654            // For expanded values, wider values are checked first, then narrower values
655            let mut closest_wider = None;
656            for stretch in available.iter() {
657                if *stretch > *self
658                    && (closest_wider.is_none() || *stretch < closest_wider.unwrap())
659                {
660                    closest_wider = Some(*stretch);
661                }
662            }
663
664            if closest_wider.is_some() {
665                return closest_wider;
666            }
667
668            // Otherwise, find narrower values
669            let mut closest_narrower = None;
670            for stretch in available.iter() {
671                if *stretch < *self
672                    && (closest_narrower.is_none() || *stretch > closest_narrower.unwrap())
673                {
674                    closest_narrower = Some(*stretch);
675                }
676            }
677
678            return closest_narrower;
679        }
680    }
681}
682
683impl Default for FcStretch {
684    fn default() -> Self {
685        FcStretch::Normal
686    }
687}
688
689/// Unicode range representation for font matching
690#[repr(C)]
691#[derive(Debug, Clone, Copy, PartialEq, Eq, PartialOrd, Ord, Hash)]
692#[cfg_attr(feature = "cache", derive(serde::Serialize, serde::Deserialize))]
693pub struct UnicodeRange {
694    pub start: u32,
695    pub end: u32,
696}
697
698/// The default set of Unicode-block fallback scripts that
699/// [`FcFontCache::resolve_font_chain`] pulls in when no explicit
700/// `scripts_hint` is supplied.
701///
702/// Keeping this exposed lets callers that *do* want the default
703/// behaviour build the set explicitly — typically by union-ing it
704/// with a detected-from-document set before calling
705/// [`FcFontCache::resolve_font_chain_with_scripts`].
706pub const DEFAULT_UNICODE_FALLBACK_SCRIPTS: &[UnicodeRange] = &[
707    UnicodeRange { start: 0x0400, end: 0x04FF }, // Cyrillic
708    UnicodeRange { start: 0x0600, end: 0x06FF }, // Arabic
709    UnicodeRange { start: 0x0900, end: 0x097F }, // Devanagari
710    UnicodeRange { start: 0x3040, end: 0x309F }, // Hiragana
711    UnicodeRange { start: 0x30A0, end: 0x30FF }, // Katakana
712    UnicodeRange { start: 0x4E00, end: 0x9FFF }, // CJK Unified Ideographs
713    UnicodeRange { start: 0xAC00, end: 0xD7A3 }, // Hangul Syllables
714];
715
716impl UnicodeRange {
717    pub fn contains(&self, c: char) -> bool {
718        let c = c as u32;
719        c >= self.start && c <= self.end
720    }
721
722    pub fn overlaps(&self, other: &UnicodeRange) -> bool {
723        self.start <= other.end && other.start <= self.end
724    }
725
726    pub fn is_subset_of(&self, other: &UnicodeRange) -> bool {
727        self.start >= other.start && self.end <= other.end
728    }
729}
730
731/// Check if any range covers CJK Unified Ideographs, Hiragana, Katakana, or Hangul
732pub fn has_cjk_ranges(ranges: &[UnicodeRange]) -> bool {
733    ranges.iter().any(|r| {
734        (r.start >= 0x4E00 && r.start <= 0x9FFF) ||
735        (r.start >= 0x3040 && r.start <= 0x309F) ||
736        (r.start >= 0x30A0 && r.start <= 0x30FF) ||
737        (r.start >= 0xAC00 && r.start <= 0xD7AF)
738    })
739}
740
741/// Check if any range covers the Arabic block
742pub fn has_arabic_ranges(ranges: &[UnicodeRange]) -> bool {
743    ranges.iter().any(|r| r.start >= 0x0600 && r.start <= 0x06FF)
744}
745
746/// Check if any range covers the Cyrillic block
747pub fn has_cyrillic_ranges(ranges: &[UnicodeRange]) -> bool {
748    ranges.iter().any(|r| r.start >= 0x0400 && r.start <= 0x04FF)
749}
750
751/// Check if any range covers the Hebrew block
752pub fn has_hebrew_ranges(ranges: &[UnicodeRange]) -> bool {
753    ranges.iter().any(|r| r.start >= 0x0590 && r.start <= 0x05FF)
754}
755
756/// Check if any range covers the Thai block
757pub fn has_thai_ranges(ranges: &[UnicodeRange]) -> bool {
758    ranges.iter().any(|r| r.start >= 0x0E00 && r.start <= 0x0E7F)
759}
760
761/// Log levels for trace messages
762#[derive(Debug, Clone, Copy, PartialOrd, Ord, PartialEq, Eq, Hash)]
763pub enum TraceLevel {
764    Debug,
765    Info,
766    Warning,
767    Error,
768}
769
770/// Reason for font matching failure or success
771#[derive(Debug, Clone, PartialEq, Eq, Hash)]
772pub enum MatchReason {
773    NameMismatch {
774        requested: Option<String>,
775        found: Option<String>,
776    },
777    FamilyMismatch {
778        requested: Option<String>,
779        found: Option<String>,
780    },
781    StyleMismatch {
782        property: &'static str,
783        requested: String,
784        found: String,
785    },
786    WeightMismatch {
787        requested: FcWeight,
788        found: FcWeight,
789    },
790    StretchMismatch {
791        requested: FcStretch,
792        found: FcStretch,
793    },
794    UnicodeRangeMismatch {
795        character: char,
796        ranges: Vec<UnicodeRange>,
797    },
798    Success,
799}
800
801/// Trace message for debugging font matching
802#[derive(Debug, Clone, PartialEq, Eq)]
803pub struct TraceMsg {
804    pub level: TraceLevel,
805    pub path: String,
806    pub reason: MatchReason,
807}
808
809/// Hinting style for font rendering.
810#[repr(C)]
811#[derive(Debug, Default, Clone, Copy, PartialEq, Eq, PartialOrd, Ord, Hash)]
812#[cfg_attr(feature = "cache", derive(serde::Serialize, serde::Deserialize))]
813pub enum FcHintStyle {
814    #[default]
815    None = 0,
816    Slight = 1,
817    Medium = 2,
818    Full = 3,
819}
820
821/// Subpixel rendering order.
822#[repr(C)]
823#[derive(Debug, Default, Clone, Copy, PartialEq, Eq, PartialOrd, Ord, Hash)]
824#[cfg_attr(feature = "cache", derive(serde::Serialize, serde::Deserialize))]
825pub enum FcRgba {
826    #[default]
827    Unknown = 0,
828    Rgb = 1,
829    Bgr = 2,
830    Vrgb = 3,
831    Vbgr = 4,
832    None = 5,
833}
834
835/// LCD filter mode for subpixel rendering.
836#[repr(C)]
837#[derive(Debug, Default, Clone, Copy, PartialEq, Eq, PartialOrd, Ord, Hash)]
838#[cfg_attr(feature = "cache", derive(serde::Serialize, serde::Deserialize))]
839pub enum FcLcdFilter {
840    #[default]
841    None = 0,
842    Default = 1,
843    Light = 2,
844    Legacy = 3,
845}
846
847/// Per-font rendering configuration from system font config (Linux fonts.conf).
848///
849/// All fields are `Option<T>` -- `None` means "use system default".
850/// On non-Linux platforms, this is always all-None (no per-font overrides).
851#[derive(Debug, Default, Clone, PartialEq, PartialOrd)]
852#[cfg_attr(feature = "cache", derive(serde::Serialize, serde::Deserialize))]
853pub struct FcFontRenderConfig {
854    pub antialias: Option<bool>,
855    pub hinting: Option<bool>,
856    pub hintstyle: Option<FcHintStyle>,
857    pub autohint: Option<bool>,
858    pub rgba: Option<FcRgba>,
859    pub lcdfilter: Option<FcLcdFilter>,
860    pub embeddedbitmap: Option<bool>,
861    pub embolden: Option<bool>,
862    pub dpi: Option<f64>,
863    pub scale: Option<f64>,
864    pub minspace: Option<bool>,
865}
866
867/// Helper newtype to provide Eq/Ord for Option<f64> via total-order bit comparison.
868/// This allows FcFontRenderConfig to be used inside FcPattern which derives Eq + Ord.
869impl Eq for FcFontRenderConfig {}
870
871impl Ord for FcFontRenderConfig {
872    fn cmp(&self, other: &Self) -> core::cmp::Ordering {
873        // Compare all non-f64 fields first
874        let ord = self.antialias.cmp(&other.antialias)
875            .then_with(|| self.hinting.cmp(&other.hinting))
876            .then_with(|| self.hintstyle.cmp(&other.hintstyle))
877            .then_with(|| self.autohint.cmp(&other.autohint))
878            .then_with(|| self.rgba.cmp(&other.rgba))
879            .then_with(|| self.lcdfilter.cmp(&other.lcdfilter))
880            .then_with(|| self.embeddedbitmap.cmp(&other.embeddedbitmap))
881            .then_with(|| self.embolden.cmp(&other.embolden))
882            .then_with(|| self.minspace.cmp(&other.minspace));
883
884        // For f64 fields, use to_bits() for total ordering
885        let ord = ord.then_with(|| {
886            let a = self.dpi.map(|v| v.to_bits());
887            let b = other.dpi.map(|v| v.to_bits());
888            a.cmp(&b)
889        });
890        ord.then_with(|| {
891            let a = self.scale.map(|v| v.to_bits());
892            let b = other.scale.map(|v| v.to_bits());
893            a.cmp(&b)
894        })
895    }
896}
897
898/// Font pattern for matching
899#[derive(Default, Clone, PartialOrd, Ord, PartialEq, Eq)]
900#[cfg_attr(feature = "cache", derive(serde::Serialize, serde::Deserialize))]
901#[repr(C)]
902pub struct FcPattern {
903    // font name
904    pub name: Option<String>,
905    // family name
906    pub family: Option<String>,
907    // "italic" property
908    pub italic: PatternMatch,
909    // "oblique" property
910    pub oblique: PatternMatch,
911    // "bold" property
912    pub bold: PatternMatch,
913    // "monospace" property
914    pub monospace: PatternMatch,
915    // "condensed" property
916    pub condensed: PatternMatch,
917    // font weight
918    pub weight: FcWeight,
919    // font stretch
920    pub stretch: FcStretch,
921    // unicode ranges to match
922    pub unicode_ranges: Vec<UnicodeRange>,
923    // extended font metadata
924    pub metadata: FcFontMetadata,
925    // per-font rendering configuration (from system fonts.conf on Linux)
926    pub render_config: FcFontRenderConfig,
927}
928
929impl core::fmt::Debug for FcPattern {
930    fn fmt(&self, f: &mut core::fmt::Formatter<'_>) -> core::fmt::Result {
931        let mut d = f.debug_struct("FcPattern");
932
933        if let Some(name) = &self.name {
934            d.field("name", name);
935        }
936
937        if let Some(family) = &self.family {
938            d.field("family", family);
939        }
940
941        if self.italic != PatternMatch::DontCare {
942            d.field("italic", &self.italic);
943        }
944
945        if self.oblique != PatternMatch::DontCare {
946            d.field("oblique", &self.oblique);
947        }
948
949        if self.bold != PatternMatch::DontCare {
950            d.field("bold", &self.bold);
951        }
952
953        if self.monospace != PatternMatch::DontCare {
954            d.field("monospace", &self.monospace);
955        }
956
957        if self.condensed != PatternMatch::DontCare {
958            d.field("condensed", &self.condensed);
959        }
960
961        if self.weight != FcWeight::Normal {
962            d.field("weight", &self.weight);
963        }
964
965        if self.stretch != FcStretch::Normal {
966            d.field("stretch", &self.stretch);
967        }
968
969        if !self.unicode_ranges.is_empty() {
970            d.field("unicode_ranges", &self.unicode_ranges);
971        }
972
973        // Only show non-empty metadata fields
974        let empty_metadata = FcFontMetadata::default();
975        if self.metadata != empty_metadata {
976            d.field("metadata", &self.metadata);
977        }
978
979        // Only show render_config when it differs from default
980        let empty_render_config = FcFontRenderConfig::default();
981        if self.render_config != empty_render_config {
982            d.field("render_config", &self.render_config);
983        }
984
985        d.finish()
986    }
987}
988
989/// Font metadata from the OS/2 table
990#[derive(Debug, Default, Clone, PartialEq, Eq, PartialOrd, Ord)]
991#[cfg_attr(feature = "cache", derive(serde::Serialize, serde::Deserialize))]
992pub struct FcFontMetadata {
993    pub copyright: Option<String>,
994    pub designer: Option<String>,
995    pub designer_url: Option<String>,
996    pub font_family: Option<String>,
997    pub font_subfamily: Option<String>,
998    pub full_name: Option<String>,
999    pub id_description: Option<String>,
1000    pub license: Option<String>,
1001    pub license_url: Option<String>,
1002    pub manufacturer: Option<String>,
1003    pub manufacturer_url: Option<String>,
1004    pub postscript_name: Option<String>,
1005    pub preferred_family: Option<String>,
1006    pub preferred_subfamily: Option<String>,
1007    pub trademark: Option<String>,
1008    pub unique_id: Option<String>,
1009    pub version: Option<String>,
1010}
1011
1012impl FcPattern {
1013    /// Check if this pattern would match the given character
1014    pub fn contains_char(&self, c: char) -> bool {
1015        if self.unicode_ranges.is_empty() {
1016            return true; // No ranges specified means match all characters
1017        }
1018
1019        for range in &self.unicode_ranges {
1020            if range.contains(c) {
1021                return true;
1022            }
1023        }
1024
1025        false
1026    }
1027}
1028
1029/// Font match result with UUID
1030#[derive(Debug, Clone, PartialEq, Eq)]
1031pub struct FontMatch {
1032    pub id: FontId,
1033    pub unicode_ranges: Vec<UnicodeRange>,
1034    pub fallbacks: Vec<FontMatchNoFallback>,
1035}
1036
1037/// Font match result with UUID (without fallback)
1038#[derive(Debug, Clone, PartialEq, Eq)]
1039pub struct FontMatchNoFallback {
1040    pub id: FontId,
1041    pub unicode_ranges: Vec<UnicodeRange>,
1042}
1043
1044/// A run of text that uses the same font
1045/// Returned by FontFallbackChain::query_for_text()
1046#[derive(Debug, Clone, PartialEq, Eq)]
1047pub struct ResolvedFontRun {
1048    /// The text content of this run
1049    pub text: String,
1050    /// Start byte index in the original text
1051    pub start_byte: usize,
1052    /// End byte index in the original text (exclusive)
1053    pub end_byte: usize,
1054    /// The font to use for this run (None if no font found)
1055    pub font_id: Option<FontId>,
1056    /// Which CSS font-family this came from
1057    pub css_source: String,
1058}
1059
1060/// Resolved font fallback chain for a CSS font-family stack
1061/// This represents the complete chain of fonts to use for rendering text
1062#[derive(Debug, Clone, PartialEq, Eq)]
1063pub struct FontFallbackChain {
1064    /// CSS-based fallbacks: Each CSS font expanded to its system fallbacks
1065    /// Example: ["NotoSansJP" -> [Hiragino Sans, PingFang SC], "sans-serif" -> [Helvetica]]
1066    pub css_fallbacks: Vec<CssFallbackGroup>,
1067    
1068    /// Unicode-based fallbacks: Fonts added to cover missing Unicode ranges
1069    /// Only populated if css_fallbacks don't cover all requested characters
1070    pub unicode_fallbacks: Vec<FontMatch>,
1071    
1072    /// The original CSS font-family stack that was requested
1073    pub original_stack: Vec<String>,
1074}
1075
1076impl FontFallbackChain {
1077    /// Resolve which font should be used for a specific character
1078    /// Returns (FontId, css_source_name) where css_source_name indicates which CSS font matched
1079    /// Returns None if no font in the chain can render this character
1080    pub fn resolve_char(&self, cache: &FcFontCache, ch: char) -> Option<(FontId, String)> {
1081        let codepoint = ch as u32;
1082
1083        // Check CSS fallbacks in order
1084        for group in &self.css_fallbacks {
1085            for font in &group.fonts {
1086                let Some(meta) = cache.get_metadata_by_id(&font.id) else { continue };
1087                if meta.unicode_ranges.is_empty() {
1088                    continue; // No range info — don't assume it covers everything
1089                }
1090                if meta.unicode_ranges.iter().any(|r| codepoint >= r.start && codepoint <= r.end) {
1091                    return Some((font.id, group.css_name.clone()));
1092                }
1093            }
1094        }
1095
1096        // Check Unicode fallbacks
1097        for font in &self.unicode_fallbacks {
1098            let Some(meta) = cache.get_metadata_by_id(&font.id) else { continue };
1099            if meta.unicode_ranges.iter().any(|r| codepoint >= r.start && codepoint <= r.end) {
1100                return Some((font.id, "(unicode-fallback)".to_string()));
1101            }
1102        }
1103
1104        None
1105    }
1106    
1107    /// Resolve all characters in a text string to their fonts
1108    /// Returns a vector of (character, FontId, css_source) tuples
1109    pub fn resolve_text(&self, cache: &FcFontCache, text: &str) -> Vec<(char, Option<(FontId, String)>)> {
1110        text.chars()
1111            .map(|ch| (ch, self.resolve_char(cache, ch)))
1112            .collect()
1113    }
1114    
1115    /// Query which fonts should be used for a text string, grouped by font
1116    /// Returns runs of consecutive characters that use the same font
1117    /// This is the main API for text shaping - call this to get font runs, then shape each run
1118    pub fn query_for_text(&self, cache: &FcFontCache, text: &str) -> Vec<ResolvedFontRun> {
1119        if text.is_empty() {
1120            return Vec::new();
1121        }
1122        
1123        let mut runs: Vec<ResolvedFontRun> = Vec::new();
1124        let mut current_font: Option<FontId> = None;
1125        let mut current_css_source: Option<String> = None;
1126        let mut current_start_byte: usize = 0;
1127        
1128        for (byte_idx, ch) in text.char_indices() {
1129            let resolved = self.resolve_char(cache, ch);
1130            let (font_id, css_source) = match &resolved {
1131                Some((id, source)) => (Some(*id), Some(source.clone())),
1132                None => (None, None),
1133            };
1134            
1135            // Check if we need to start a new run
1136            let font_changed = font_id != current_font;
1137            
1138            if font_changed && byte_idx > 0 {
1139                // Finalize the current run
1140                let run_text = &text[current_start_byte..byte_idx];
1141                runs.push(ResolvedFontRun {
1142                    text: run_text.to_string(),
1143                    start_byte: current_start_byte,
1144                    end_byte: byte_idx,
1145                    font_id: current_font,
1146                    css_source: current_css_source.clone().unwrap_or_default(),
1147                });
1148                current_start_byte = byte_idx;
1149            }
1150            
1151            current_font = font_id;
1152            current_css_source = css_source;
1153        }
1154        
1155        // Finalize the last run
1156        if current_start_byte < text.len() {
1157            let run_text = &text[current_start_byte..];
1158            runs.push(ResolvedFontRun {
1159                text: run_text.to_string(),
1160                start_byte: current_start_byte,
1161                end_byte: text.len(),
1162                font_id: current_font,
1163                css_source: current_css_source.unwrap_or_default(),
1164            });
1165        }
1166        
1167        runs
1168    }
1169}
1170
1171/// A group of fonts that are fallbacks for a single CSS font-family name
1172#[derive(Debug, Clone, PartialEq, Eq)]
1173pub struct CssFallbackGroup {
1174    /// The CSS font name (e.g., "NotoSansJP", "sans-serif")
1175    pub css_name: String,
1176    
1177    /// System fonts that match this CSS name
1178    /// First font in list is the best match
1179    pub fonts: Vec<FontMatch>,
1180}
1181
1182/// Cache key for font fallback chain queries
1183///
1184/// IMPORTANT: This key intentionally does NOT include per-text unicode
1185/// ranges — fallback chains are cached by CSS properties only. Different
1186/// texts with the same CSS font-stack share the same chain.
1187///
1188/// `scripts_hint_hash` distinguishes *which set of Unicode-fallback
1189/// scripts* the caller asked for. `None` means "the default set of 7
1190/// major scripts" (Cyrillic/Arabic/Devanagari/Hiragana/Katakana/CJK/Hangul,
1191/// back-compat behaviour of `resolve_font_chain`). `Some(h)` is a
1192/// stable hash of a caller-supplied script list so an ASCII-only
1193/// query doesn't collide with a CJK-aware one.
1194#[cfg(feature = "std")]
1195#[derive(Debug, Clone, PartialEq, Eq, Hash)]
1196pub(crate) struct FontChainCacheKey {
1197    /// CSS font stack (expanded to OS-specific fonts)
1198    pub(crate) font_families: Vec<String>,
1199    /// Font weight
1200    pub(crate) weight: FcWeight,
1201    /// Font style flags
1202    pub(crate) italic: PatternMatch,
1203    pub(crate) oblique: PatternMatch,
1204    /// Hash of the caller-supplied script hint (or `None` for the default set).
1205    pub(crate) scripts_hint_hash: Option<u64>,
1206}
1207
1208/// Hash a `scripts_hint` slice into a stable u64 for use as a
1209/// [`FontChainCacheKey`] component. Order-insensitive: we sort a
1210/// local copy before hashing so `[CJK, Arabic]` and `[Arabic, CJK]`
1211/// key into the same cache slot.
1212#[cfg(feature = "std")]
1213fn hash_scripts_hint(ranges: &[UnicodeRange]) -> u64 {
1214    let mut sorted: Vec<UnicodeRange> = ranges.to_vec();
1215    sorted.sort();
1216    let mut buf = Vec::with_capacity(sorted.len() * 8);
1217    for r in &sorted {
1218        buf.extend_from_slice(&r.start.to_le_bytes());
1219        buf.extend_from_slice(&r.end.to_le_bytes());
1220    }
1221    crate::utils::content_hash_u64(&buf)
1222}
1223
1224/// Path to a font file
1225///
1226/// `bytes_hash` is a deterministic 64-bit hash of the file's full
1227/// byte contents (see [`crate::utils::content_hash_u64`]). All faces
1228/// of a given `.ttc` file share the same `bytes_hash`, and two
1229/// different paths pointing at the same file contents also do —
1230/// so the cache can share a single `Arc<[u8]>` across them via
1231/// [`FcFontCache::get_font_bytes`]. A value of `0` means "hash
1232/// not computed" (e.g. built from a filename-only scan, or loaded
1233/// from a legacy v1 disk cache); callers must treat `0` as opaque
1234/// and fall back to unshared reads.
1235#[derive(Debug, Clone, PartialOrd, Ord, PartialEq, Eq)]
1236#[cfg_attr(feature = "cache", derive(serde::Serialize, serde::Deserialize))]
1237#[repr(C)]
1238pub struct FcFontPath {
1239    pub path: String,
1240    pub font_index: usize,
1241    /// 64-bit content hash of the file's bytes. 0 = not computed.
1242    #[cfg_attr(feature = "cache", serde(default))]
1243    pub bytes_hash: u64,
1244}
1245
1246/// In-memory font data
1247#[derive(Debug, Clone, PartialEq, Eq)]
1248#[repr(C)]
1249pub struct FcFont {
1250    pub bytes: Vec<u8>,
1251    pub font_index: usize,
1252    pub id: String, // For identification in tests
1253}
1254
1255/// Owned font-source descriptor, returned by
1256/// [`FcFontCache::get_font_by_id`].
1257///
1258/// In v4.0 this was a borrowed enum (`FontSource<'a>` with refs into
1259/// the pattern map). With v4.1's shared-state cache, the map lives
1260/// behind an `RwLock`, so returning a reference would require the
1261/// caller to hold a read guard for the full lifetime of the result —
1262/// which bleeds the locking strategy into every call site. The owned
1263/// variant clones the small `FcFont` / `FcFontPath` struct and
1264/// releases the lock immediately. Bytes/mmap are not cloned — those
1265/// go through `get_font_bytes` which hands out `Arc<FontBytes>`.
1266#[derive(Debug, Clone)]
1267pub enum OwnedFontSource {
1268    /// Font loaded from memory (small metadata + owned `Vec<u8>`).
1269    Memory(FcFont),
1270    /// Font loaded from disk.
1271    Disk(FcFontPath),
1272}
1273
1274/// A handle to font bytes returned by [`FcFontCache::get_font_bytes`].
1275///
1276/// On disk, an `Mmap` is used so untouched pages don't count toward
1277/// process RSS. In-memory fonts (`FcFont`) come back as `Owned` since
1278/// they're already on the heap.
1279///
1280/// `FontBytes` derefs to `[u8]` and implements `AsRef<[u8]>`, so any
1281/// existing API that wants `&[u8]` (allsorts, ttf-parser, …) can
1282/// accept it without code changes.
1283///
1284/// Both variants are `Send + Sync` (mmaps and `Arc<[u8]>` are both
1285/// safe to share across threads).
1286#[cfg(feature = "std")]
1287pub enum FontBytes {
1288    /// Heap-owned bytes. Used for `FontSource::Memory` and as a
1289    /// fallback when mmap is unavailable.
1290    Owned(std::sync::Arc<[u8]>),
1291    /// File-backed mmap. Read-only; pages are demand-loaded by the
1292    /// kernel.
1293    Mmapped(mmapio::Mmap),
1294}
1295
1296#[cfg(feature = "std")]
1297impl FontBytes {
1298    /// Borrow the underlying byte slice.
1299    #[inline]
1300    pub fn as_slice(&self) -> &[u8] {
1301        match self {
1302            FontBytes::Owned(arc) => arc,
1303            FontBytes::Mmapped(m) => &m[..],
1304        }
1305    }
1306}
1307
1308#[cfg(feature = "std")]
1309impl core::ops::Deref for FontBytes {
1310    type Target = [u8];
1311    #[inline]
1312    fn deref(&self) -> &[u8] {
1313        self.as_slice()
1314    }
1315}
1316
1317#[cfg(feature = "std")]
1318impl AsRef<[u8]> for FontBytes {
1319    #[inline]
1320    fn as_ref(&self) -> &[u8] {
1321        self.as_slice()
1322    }
1323}
1324
1325#[cfg(feature = "std")]
1326impl core::fmt::Debug for FontBytes {
1327    fn fmt(&self, f: &mut core::fmt::Formatter<'_>) -> core::fmt::Result {
1328        let kind = match self {
1329            FontBytes::Owned(_) => "Owned",
1330            FontBytes::Mmapped(_) => "Mmapped",
1331        };
1332        write!(f, "FontBytes::{}({} bytes)", kind, self.as_slice().len())
1333    }
1334}
1335
1336/// Open a font file as an mmap-backed [`FontBytes`]. Falls back to a
1337/// heap read if mmap fails (e.g. the file is on a network share that
1338/// doesn't support mmap, or we're on a target without `std`-mmap).
1339#[cfg(feature = "std")]
1340fn open_font_bytes_mmap(path: &str) -> Option<std::sync::Arc<FontBytes>> {
1341    use std::fs::File;
1342    use std::sync::Arc;
1343
1344    #[cfg(not(target_family = "wasm"))]
1345    {
1346        if let Ok(file) = File::open(path) {
1347            // Safety: `Mmap::map` requires that the file is not
1348            // mutated while mapped. For system fonts that's the
1349            // overwhelming common case; if a user replaces the file
1350            // we accept reading the snapshot we mapped earlier.
1351            if let Ok(mmap) = unsafe { mmapio::MmapOptions::new().map(&file) } {
1352                return Some(Arc::new(FontBytes::Mmapped(mmap)));
1353            }
1354        }
1355    }
1356    let bytes = std::fs::read(path).ok()?;
1357    Some(Arc::new(FontBytes::Owned(Arc::from(bytes))))
1358}
1359
1360/// A named font to be added to the font cache from memory.
1361/// This is the primary way to supply custom fonts to the application.
1362#[derive(Debug, Clone)]
1363pub struct NamedFont {
1364    /// Human-readable name for this font (e.g., "My Custom Font")
1365    pub name: String,
1366    /// The raw font file bytes (TTF, OTF, WOFF, WOFF2, TTC)
1367    pub bytes: Vec<u8>,
1368}
1369
1370impl NamedFont {
1371    /// Create a new named font from bytes
1372    pub fn new(name: impl Into<String>, bytes: Vec<u8>) -> Self {
1373        Self {
1374            name: name.into(),
1375            bytes,
1376        }
1377    }
1378}
1379
1380/// Font cache, initialized at startup.
1381///
1382/// Thread-safe, shared font cache.
1383///
1384/// As of v4.1 the cache internally owns its state via
1385/// `Arc<RwLock<FcFontCacheInner>>`: cloning an `FcFontCache` returns
1386/// a handle that shares the same underlying data. Writes by one holder
1387/// (typically the background builder inside `FcFontRegistry`) become
1388/// immediately visible to every other holder (layout engines,
1389/// shape-time resolvers, etc.).
1390///
1391/// Before 4.1 the clone deep-copied every map, so external holders
1392/// were frozen at the moment they took the snapshot — the mismatch
1393/// between "live registry cache" and "frozen font manager cache"
1394/// was the root of the silent-text regression when lazy scout mode
1395/// was enabled. The shared-state design eliminates that entire class
1396/// of staleness bugs by construction.
1397pub struct FcFontCache {
1398    pub(crate) shared: std::sync::Arc<FcFontCacheShared>,
1399}
1400
1401/// Shared interior of `FcFontCache`. Always accessed through an
1402/// `Arc` — never referenced directly by external callers.
1403pub(crate) struct FcFontCacheShared {
1404    /// Main pattern/metadata state, guarded by a reader-writer lock.
1405    /// Builder threads take the write lock to insert a parsed font;
1406    /// all query paths take the read lock.
1407    pub(crate) state: std::sync::RwLock<FcFontCacheInner>,
1408    /// Font fallback chain cache. Not part of the RwLock-guarded
1409    /// state because cache insertions happen under `&self` on read
1410    /// paths (they're a memoisation, not observable state).
1411    pub(crate) chain_cache: std::sync::Mutex<std::collections::HashMap<FontChainCacheKey, FontFallbackChain>>,
1412    /// Shared file-bytes cache: content-hash → weak [`FontBytes`].
1413    ///
1414    /// [`FcFontCache::get_font_bytes`] populates this so that multiple
1415    /// FontIds backed by the same file (e.g. every face of a `.ttc`)
1416    /// return the same `Arc<FontBytes>` — and therefore the same mmap
1417    /// — instead of each allocating their own buffer. We hold `Weak`
1418    /// references so the mmap unmap as soon as no parsed font holds
1419    /// it alive.
1420    pub(crate) shared_bytes: std::sync::Mutex<std::collections::HashMap<u64, std::sync::Weak<FontBytes>>>,
1421}
1422
1423/// The actual font-pattern state, held behind the RwLock in
1424/// `FcFontCacheShared`. Private — all access goes through
1425/// `FcFontCache` methods which lock transparently.
1426#[derive(Default, Debug)]
1427pub(crate) struct FcFontCacheInner {
1428    /// Pattern to FontId mapping (query index)
1429    pub(crate) patterns: BTreeMap<FcPattern, FontId>,
1430    /// On-disk font paths
1431    pub(crate) disk_fonts: BTreeMap<FontId, FcFontPath>,
1432    /// In-memory fonts
1433    pub(crate) memory_fonts: BTreeMap<FontId, FcFont>,
1434    /// Metadata cache (patterns stored by ID for quick lookup)
1435    pub(crate) metadata: BTreeMap<FontId, FcPattern>,
1436    /// Token index: maps lowercase tokens ("noto", "sans", "jp") to sets of FontIds.
1437    /// Enables fast fuzzy search by intersecting token sets.
1438    pub(crate) token_index: BTreeMap<String, alloc::collections::BTreeSet<FontId>>,
1439    /// Pre-tokenized font names (lowercase): FontId -> Vec<lowercase tokens>.
1440    /// Avoids re-tokenization during fuzzy search.
1441    pub(crate) font_tokens: BTreeMap<FontId, Vec<String>>,
1442}
1443
1444impl FcFontCacheInner {
1445    /// Add a font pattern to the token index. Called under the
1446    /// write lock by insertion paths.
1447    pub(crate) fn index_pattern_tokens(&mut self, pattern: &FcPattern, id: FontId) {
1448        // Extract tokens from both name and family
1449        let mut all_tokens = Vec::new();
1450
1451        if let Some(name) = &pattern.name {
1452            all_tokens.extend(FcFontCache::extract_font_name_tokens(name));
1453        }
1454
1455        if let Some(family) = &pattern.family {
1456            all_tokens.extend(FcFontCache::extract_font_name_tokens(family));
1457        }
1458
1459        // Convert tokens to lowercase and store them
1460        let tokens_lower: Vec<String> =
1461            all_tokens.iter().map(|t| t.to_lowercase()).collect();
1462
1463        // Add each token (lowercase) to the index
1464        for token_lower in &tokens_lower {
1465            self.token_index
1466                .entry(token_lower.clone())
1467                .or_insert_with(alloc::collections::BTreeSet::new)
1468                .insert(id);
1469        }
1470
1471        // Store pre-tokenized font name for fast lookup (no re-tokenization needed)
1472        self.font_tokens.insert(id, tokens_lower);
1473    }
1474}
1475
1476impl Clone for FcFontCache {
1477    /// Shallow clone — the returned handle shares the same underlying
1478    /// state as `self`. Writes through either are visible to both.
1479    /// This is the whole point of the v4.1 redesign; callers that need
1480    /// an isolated frozen copy must explicitly request one (e.g. via
1481    /// `snapshot_state`, which is intentionally not provided because
1482    /// we no longer have a use case for it).
1483    fn clone(&self) -> Self {
1484        Self {
1485            shared: std::sync::Arc::clone(&self.shared),
1486        }
1487    }
1488}
1489
1490impl core::fmt::Debug for FcFontCache {
1491    fn fmt(&self, f: &mut core::fmt::Formatter<'_>) -> core::fmt::Result {
1492        let state = self.state_read();
1493        f.debug_struct("FcFontCache")
1494            .field("patterns_len", &state.patterns.len())
1495            .field("metadata_len", &state.metadata.len())
1496            .field("disk_fonts_len", &state.disk_fonts.len())
1497            .field("memory_fonts_len", &state.memory_fonts.len())
1498            .finish()
1499    }
1500}
1501
1502impl Default for FcFontCache {
1503    fn default() -> Self {
1504        Self {
1505            shared: std::sync::Arc::new(FcFontCacheShared {
1506                state: std::sync::RwLock::new(FcFontCacheInner::default()),
1507                chain_cache: std::sync::Mutex::new(std::collections::HashMap::new()),
1508                shared_bytes: std::sync::Mutex::new(std::collections::HashMap::new()),
1509            }),
1510        }
1511    }
1512}
1513
1514impl FcFontCache {
1515    /// Acquire a read guard on the cache's state. Panics if the lock
1516    /// was poisoned by a panic inside the write guard — same
1517    /// contract as `RwLock::read().expect(..)`.
1518    #[inline]
1519    pub(crate) fn state_read(
1520        &self,
1521    ) -> std::sync::RwLockReadGuard<'_, FcFontCacheInner> {
1522        self.shared
1523            .state
1524            .read()
1525            .unwrap_or_else(|poisoned| poisoned.into_inner())
1526    }
1527
1528    /// Acquire a write guard on the cache's state. Panics on
1529    /// poisoning, same as `state_read`.
1530    #[inline]
1531    pub(crate) fn state_write(
1532        &self,
1533    ) -> std::sync::RwLockWriteGuard<'_, FcFontCacheInner> {
1534        self.shared
1535            .state
1536            .write()
1537            .unwrap_or_else(|poisoned| poisoned.into_inner())
1538    }
1539
1540    /// Adds in-memory font files.
1541    ///
1542    /// Note: takes `&self` — the shared cache handles interior
1543    /// mutability via the RwLock.
1544    pub fn with_memory_fonts(&self, fonts: Vec<(FcPattern, FcFont)>) -> &Self {
1545        let mut state = self.state_write();
1546        for (pattern, font) in fonts {
1547            let id = FontId::new();
1548            state.patterns.insert(pattern.clone(), id);
1549            state.metadata.insert(id, pattern.clone());
1550            state.memory_fonts.insert(id, font);
1551            state.index_pattern_tokens(&pattern, id);
1552        }
1553        self
1554    }
1555
1556    /// Adds a memory font with a specific ID (for testing).
1557    pub fn with_memory_font_with_id(
1558        &self,
1559        id: FontId,
1560        pattern: FcPattern,
1561        font: FcFont,
1562    ) -> &Self {
1563        let mut state = self.state_write();
1564        state.patterns.insert(pattern.clone(), id);
1565        state.metadata.insert(id, pattern.clone());
1566        state.memory_fonts.insert(id, font);
1567        state.index_pattern_tokens(&pattern, id);
1568        self
1569    }
1570
1571    /// Register a newly-parsed on-disk font. Called by the builder
1572    /// thread inside `FcFontRegistry`. Allocates a fresh `FontId`,
1573    /// inserts the pattern + path + metadata in one write lock, and
1574    /// invalidates the chain cache so subsequent resolutions pick
1575    /// up the new font.
1576    pub fn insert_builder_font(&self, pattern: FcPattern, path: FcFontPath) {
1577        let id = FontId::new();
1578        {
1579            let mut state = self.state_write();
1580            state.index_pattern_tokens(&pattern, id);
1581            state.patterns.insert(pattern.clone(), id);
1582            state.disk_fonts.insert(id, path);
1583            state.metadata.insert(id, pattern);
1584        }
1585        // Invalidate chain cache so callers see the new font on the
1586        // next resolve. Scoped after the state write to keep lock
1587        // nesting shallow.
1588        if let Ok(mut cc) = self.shared.chain_cache.lock() {
1589            cc.clear();
1590        }
1591    }
1592
1593    /// Insert a *fast-probed* pattern into the cache and return its
1594    /// fresh `FontId`. Used by [`FcFontRegistry::request_fonts_fast`]
1595    /// when a cmap probe discovers a font that covers some subset of
1596    /// the requested codepoints. Unlike [`insert_builder_font`] this
1597    /// does **not** populate the token index (we don't have NAME
1598    /// table data), so fuzzy-name lookups on fast-probed fonts fall
1599    /// through to the filename-guess in `known_paths`.
1600    pub fn insert_fast_pattern(&self, pattern: FcPattern, path: FcFontPath) -> FontId {
1601        let id = FontId::new();
1602        let mut state = self.state_write();
1603        state.patterns.insert(pattern.clone(), id);
1604        state.disk_fonts.insert(id, path);
1605        state.metadata.insert(id, pattern);
1606        id
1607    }
1608
1609    /// Look up all `FontId`s whose `FcFontPath` matches `path`.
1610    /// Cheap way for `request_fonts_fast` to reuse fast-probed
1611    /// entries across layout passes without re-reading the cmap.
1612    ///
1613    /// O(n) over the disk_fonts map; fine for the typical case of
1614    /// <100 parsed fonts, and we skip the scan entirely when a
1615    /// stack's first candidate covers.
1616    pub fn lookup_paths_cached(&self, path: &str) -> Option<Vec<FontId>> {
1617        let state = self.state_read();
1618        let mut out = Vec::new();
1619        for (id, font_path) in &state.disk_fonts {
1620            if font_path.path == path {
1621                out.push(*id);
1622            }
1623        }
1624        if out.is_empty() { None } else { Some(out) }
1625    }
1626
1627    /// Get font data for a given font ID.
1628    ///
1629    /// Returns owned values (not references) because the underlying
1630    /// maps live behind an RwLock — a reference could not outlive
1631    /// the read guard. In-memory fonts come back as cloned `FcFont`
1632    /// instances; disk fonts return their `FcFontPath`.
1633    pub fn get_font_by_id(&self, id: &FontId) -> Option<OwnedFontSource> {
1634        let state = self.state_read();
1635        if let Some(font) = state.memory_fonts.get(id) {
1636            return Some(OwnedFontSource::Memory(font.clone()));
1637        }
1638        if let Some(path) = state.disk_fonts.get(id) {
1639            return Some(OwnedFontSource::Disk(path.clone()));
1640        }
1641        None
1642    }
1643
1644    /// Get metadata for a font ID. Returns an owned `FcPattern`
1645    /// (cloned out of the shared map) because we can't return a
1646    /// reference across the RwLock boundary.
1647    pub fn get_metadata_by_id(&self, id: &FontId) -> Option<FcPattern> {
1648        self.state_read().metadata.get(id).cloned()
1649    }
1650
1651    /// Get the font bytes for `id` as a shared [`FontBytes`].
1652    ///
1653    /// On disk the returned `Arc<FontBytes>` wraps an mmap of the file
1654    /// (`FontBytes::Mmapped`). Untouched pages of the file never count
1655    /// toward the process's RSS — for a font where layout shapes only
1656    /// a handful of glyphs, this is the difference between paying for
1657    /// the whole 4 MiB `.ttc` and paying for the cmap + a few glyf
1658    /// pages.
1659    ///
1660    /// In-memory fonts (`FontSource::Memory`) come back as
1661    /// `FontBytes::Owned`, since the bytes are already on the heap.
1662    ///
1663    /// Multiple `FontId`s backed by the same file content (every face
1664    /// of a `.ttc`, or two paths with identical bytes) return the
1665    /// *same* `Arc<FontBytes>` thanks to a content-hash → `Weak`
1666    /// cache. Bytes get unmapped automatically when the last consumer
1667    /// drops the Arc.
1668    ///
1669    /// `FontBytes` derefs to `[u8]`, so callers that only need
1670    /// `&[u8]` (allsorts, ttf-parser, …) can pass it through without
1671    /// thinking about the backing.
1672    ///
1673    /// Failure modes: returns `None` if the path is unknown, or the
1674    /// file no longer exists / cannot be opened, or the mmap call
1675    /// fails. Callers may retry with a fresh `get_font_bytes` if they
1676    /// suspect the file was replaced underneath them; the next call
1677    /// re-opens cleanly.
1678    #[cfg(feature = "std")]
1679    pub fn get_font_bytes(&self, id: &FontId) -> Option<std::sync::Arc<FontBytes>> {
1680        use std::sync::Arc;
1681        match self.get_font_by_id(id)? {
1682            OwnedFontSource::Memory(font) => Some(Arc::new(FontBytes::Owned(
1683                Arc::from(font.bytes.as_slice()),
1684            ))),
1685            OwnedFontSource::Disk(path) => {
1686                let hash = path.bytes_hash;
1687                if hash != 0 {
1688                    if let Ok(guard) = self.shared.shared_bytes.lock() {
1689                        if let Some(weak) = guard.get(&hash) {
1690                            if let Some(arc) = weak.upgrade() {
1691                                return Some(arc);
1692                            }
1693                        }
1694                    }
1695                }
1696
1697                let arc = open_font_bytes_mmap(&path.path)?;
1698                if hash != 0 {
1699                    if let Ok(mut guard) = self.shared.shared_bytes.lock() {
1700                        // Overwrite any stale weak ref that failed to upgrade.
1701                        guard.insert(hash, Arc::downgrade(&arc));
1702                    }
1703                }
1704                Some(arc)
1705            }
1706        }
1707    }
1708
1709    /// Returns an empty font cache (no_std / no filesystem).
1710    #[cfg(not(feature = "std"))]
1711    pub fn build() -> Self { Self::default() }
1712
1713    /// Scans system font directories using filename heuristics (no allsorts).
1714    #[cfg(all(feature = "std", not(feature = "parsing")))]
1715    pub fn build() -> Self { Self::build_from_filenames() }
1716
1717    /// Scans and parses all system fonts via allsorts for full metadata.
1718    #[cfg(all(feature = "std", feature = "parsing"))]
1719    pub fn build() -> Self { Self::build_inner(None) }
1720
1721    /// Filename-only scan: discovers fonts on disk, guesses metadata from
1722    /// the filename using [`config::tokenize_font_stem`].
1723    #[cfg(all(feature = "std", not(feature = "parsing")))]
1724    fn build_from_filenames() -> Self {
1725        let cache = Self::default();
1726        {
1727            let mut state = cache.state_write();
1728            for dir in crate::config::font_directories(OperatingSystem::current()) {
1729                for path in FcCollectFontFilesRecursive(dir) {
1730                    let pattern = match pattern_from_filename(&path) {
1731                        Some(p) => p,
1732                        None => continue,
1733                    };
1734                    let id = FontId::new();
1735                    state.disk_fonts.insert(id, FcFontPath {
1736                        path: path.to_string_lossy().to_string(),
1737                        font_index: 0,
1738                        // Filename-only scan — we never read the bytes,
1739                        // so there's no dedup key. Leave as 0.
1740                        bytes_hash: 0,
1741                    });
1742                    state.index_pattern_tokens(&pattern, id);
1743                    state.metadata.insert(id, pattern.clone());
1744                    state.patterns.insert(pattern, id);
1745                }
1746            }
1747        }
1748        cache
1749    }
1750    
1751    /// Builds a font cache with only specific font families (and their fallbacks).
1752    /// 
1753    /// This is a performance optimization for applications that know ahead of time
1754    /// which fonts they need. Instead of scanning all system fonts (which can be slow
1755    /// on systems with many fonts), only fonts matching the specified families are loaded.
1756    /// 
1757    /// Generic family names like "sans-serif", "serif", "monospace" are expanded
1758    /// to OS-specific font names (e.g., "sans-serif" on macOS becomes "Helvetica Neue", 
1759    /// "San Francisco", etc.).
1760    /// 
1761    /// **Note**: This will NOT automatically load fallback fonts for scripts not covered
1762    /// by the requested families. If you need Arabic, CJK, or emoji support, either:
1763    /// - Add those families explicitly to the filter
1764    /// - Use `with_memory_fonts()` to add bundled fonts
1765    /// - Use `build()` to load all system fonts
1766    /// 
1767    /// # Arguments
1768    /// * `families` - Font family names to load (e.g., ["Arial", "sans-serif"])
1769    /// 
1770    /// # Example
1771    /// ```ignore
1772    /// // Only load Arial and sans-serif fallback fonts
1773    /// let cache = FcFontCache::build_with_families(&["Arial", "sans-serif"]);
1774    /// ```
1775    #[cfg(all(feature = "std", feature = "parsing"))]
1776    pub fn build_with_families(families: &[impl AsRef<str>]) -> Self {
1777        // Expand generic families to OS-specific names
1778        let os = OperatingSystem::current();
1779        let mut target_families: Vec<String> = Vec::new();
1780        
1781        for family in families {
1782            let family_str = family.as_ref();
1783            let expanded = os.expand_generic_family(family_str, &[]);
1784            if expanded.is_empty() || (expanded.len() == 1 && expanded[0] == family_str) {
1785                target_families.push(family_str.to_string());
1786            } else {
1787                target_families.extend(expanded);
1788            }
1789        }
1790        
1791        Self::build_inner(Some(&target_families))
1792    }
1793    
1794    /// Inner build function that handles both filtered and unfiltered font loading.
1795    /// 
1796    /// # Arguments
1797    /// * `family_filter` - If Some, only load fonts matching these family names.
1798    ///                     If None, load all fonts.
1799    #[cfg(all(feature = "std", feature = "parsing"))]
1800    fn build_inner(family_filter: Option<&[String]>) -> Self {
1801        let cache = FcFontCache::default();
1802
1803        // Normalize filter families for matching
1804        let filter_normalized: Option<Vec<String>> = family_filter.map(|families| {
1805            families
1806                .iter()
1807                .map(|f| crate::utils::normalize_family_name(f))
1808                .collect()
1809        });
1810
1811        // Helper closure to check if a pattern matches the filter
1812        let matches_filter = |pattern: &FcPattern| -> bool {
1813            match &filter_normalized {
1814                None => true, // No filter = accept all
1815                Some(targets) => {
1816                    pattern.name.as_ref().map_or(false, |name| {
1817                        let name_norm = crate::utils::normalize_family_name(name);
1818                        targets.iter().any(|target| name_norm.contains(target))
1819                    }) || pattern.family.as_ref().map_or(false, |family| {
1820                        let family_norm = crate::utils::normalize_family_name(family);
1821                        targets.iter().any(|target| family_norm.contains(target))
1822                    })
1823                }
1824            }
1825        };
1826
1827        let mut state = cache.state_write();
1828
1829        #[cfg(target_os = "linux")]
1830        {
1831            if let Some((font_entries, render_configs)) = FcScanDirectories() {
1832                for (mut pattern, path) in font_entries {
1833                    if matches_filter(&pattern) {
1834                        // Apply per-font render config if a matching family rule exists
1835                        if let Some(family) = pattern.name.as_ref().or(pattern.family.as_ref()) {
1836                            if let Some(rc) = render_configs.get(family) {
1837                                pattern.render_config = rc.clone();
1838                            }
1839                        }
1840                        let id = FontId::new();
1841                        state.patterns.insert(pattern.clone(), id);
1842                        state.metadata.insert(id, pattern.clone());
1843                        state.disk_fonts.insert(id, path);
1844                        state.index_pattern_tokens(&pattern, id);
1845                    }
1846                }
1847            }
1848        }
1849
1850        #[cfg(target_os = "windows")]
1851        {
1852            let system_root = std::env::var("SystemRoot")
1853                .or_else(|_| std::env::var("WINDIR"))
1854                .unwrap_or_else(|_| "C:\\Windows".to_string());
1855
1856            let user_profile = std::env::var("USERPROFILE")
1857                .unwrap_or_else(|_| "C:\\Users\\Default".to_string());
1858
1859            let font_dirs = vec![
1860                (None, format!("{}\\Fonts\\", system_root)),
1861                (None, format!("{}\\AppData\\Local\\Microsoft\\Windows\\Fonts\\", user_profile)),
1862            ];
1863
1864            let font_entries = FcScanDirectoriesInner(&font_dirs);
1865            for (pattern, path) in font_entries {
1866                if matches_filter(&pattern) {
1867                    let id = FontId::new();
1868                    state.patterns.insert(pattern.clone(), id);
1869                    state.metadata.insert(id, pattern.clone());
1870                    state.disk_fonts.insert(id, path);
1871                    state.index_pattern_tokens(&pattern, id);
1872                }
1873            }
1874        }
1875
1876        #[cfg(target_os = "macos")]
1877        {
1878            let font_dirs = vec![
1879                (None, "~/Library/Fonts".to_owned()),
1880                (None, "/System/Library/Fonts".to_owned()),
1881                (None, "/Library/Fonts".to_owned()),
1882                (None, "/System/Library/AssetsV2".to_owned()),
1883            ];
1884
1885            let font_entries = FcScanDirectoriesInner(&font_dirs);
1886            for (pattern, path) in font_entries {
1887                if matches_filter(&pattern) {
1888                    let id = FontId::new();
1889                    state.patterns.insert(pattern.clone(), id);
1890                    state.metadata.insert(id, pattern.clone());
1891                    state.disk_fonts.insert(id, path);
1892                    state.index_pattern_tokens(&pattern, id);
1893                }
1894            }
1895        }
1896
1897        drop(state);
1898        cache
1899    }
1900    
1901    /// Check if a font ID is a memory font (preferred over disk fonts)
1902    pub fn is_memory_font(&self, id: &FontId) -> bool {
1903        self.state_read().memory_fonts.contains_key(id)
1904    }
1905
1906    /// Returns the list of fonts and font patterns.
1907    ///
1908    /// Returns owned `FcPattern` values (cloned out of the shared
1909    /// state) — this is the v4.1 API change described on
1910    /// [`FcFontCache`]. Callers that need to iterate without
1911    /// cloning should use [`FcFontCache::for_each_pattern`].
1912    pub fn list(&self) -> Vec<(FcPattern, FontId)> {
1913        self.state_read()
1914            .patterns
1915            .iter()
1916            .map(|(pattern, id)| (pattern.clone(), *id))
1917            .collect()
1918    }
1919
1920    /// Iterate over every `(pattern, id)` pair under a single read
1921    /// guard. `f` is called once per entry — avoids the per-entry
1922    /// clone that [`list`] incurs.
1923    pub fn for_each_pattern<F: FnMut(&FcPattern, &FontId)>(&self, mut f: F) {
1924        let state = self.state_read();
1925        for (pattern, id) in &state.patterns {
1926            f(pattern, id);
1927        }
1928    }
1929
1930    /// Returns true if the cache contains no font patterns
1931    pub fn is_empty(&self) -> bool {
1932        self.state_read().patterns.is_empty()
1933    }
1934
1935    /// Returns the number of font patterns in the cache
1936    pub fn len(&self) -> usize {
1937        self.state_read().patterns.len()
1938    }
1939
1940    /// Queries a font from the in-memory cache, returns the first found font (early return)
1941    /// Memory fonts are always preferred over disk fonts with the same match quality.
1942    pub fn query(&self, pattern: &FcPattern, trace: &mut Vec<TraceMsg>) -> Option<FontMatch> {
1943        let state = self.state_read();
1944        let mut matches = Vec::new();
1945
1946        for (stored_pattern, id) in &state.patterns {
1947            if Self::query_matches_internal(stored_pattern, pattern, trace) {
1948                let metadata = state.metadata.get(id).unwrap_or(stored_pattern);
1949
1950                // Calculate Unicode compatibility score
1951                let unicode_compatibility = if pattern.unicode_ranges.is_empty() {
1952                    // No specific Unicode requirements, use general coverage
1953                    Self::calculate_unicode_coverage(&metadata.unicode_ranges) as i32
1954                } else {
1955                    // Calculate how well this font covers the requested Unicode ranges
1956                    Self::calculate_unicode_compatibility(&pattern.unicode_ranges, &metadata.unicode_ranges)
1957                };
1958
1959                let style_score = Self::calculate_style_score(pattern, metadata);
1960
1961                // Memory fonts get a bonus to prefer them over disk fonts
1962                let is_memory = state.memory_fonts.contains_key(id);
1963
1964                matches.push((*id, unicode_compatibility, style_score, metadata.clone(), is_memory));
1965            }
1966        }
1967
1968        // Sort by: 1. Memory font (preferred), 2. Unicode compatibility, 3. Style score
1969        matches.sort_by(|a, b| {
1970            // Memory fonts first
1971            b.4.cmp(&a.4)
1972                .then_with(|| b.1.cmp(&a.1)) // Unicode compatibility (higher is better)
1973                .then_with(|| a.2.cmp(&b.2)) // Style score (lower is better)
1974        });
1975
1976        matches.first().map(|(id, _, _, metadata, _)| {
1977            FontMatch {
1978                id: *id,
1979                unicode_ranges: metadata.unicode_ranges.clone(),
1980                fallbacks: Vec::new(), // Fallbacks computed lazily via compute_fallbacks()
1981            }
1982        })
1983    }
1984
1985    /// Queries all fonts matching a pattern (internal use only).
1986    ///
1987    /// Note: This function is now private. Use resolve_font_chain() to build a font fallback chain,
1988    /// then call FontFallbackChain::query_for_text() to resolve fonts for specific text.
1989    fn query_internal(&self, pattern: &FcPattern, trace: &mut Vec<TraceMsg>) -> Vec<FontMatch> {
1990        let state = self.state_read();
1991        self.query_internal_locked(&state, pattern, trace)
1992    }
1993
1994    /// Internal variant used when the caller already holds a read
1995    /// guard on the state. Avoids re-locking.
1996    fn query_internal_locked(
1997        &self,
1998        state: &FcFontCacheInner,
1999        pattern: &FcPattern,
2000        trace: &mut Vec<TraceMsg>,
2001    ) -> Vec<FontMatch> {
2002        let mut matches = Vec::new();
2003
2004        for (stored_pattern, id) in &state.patterns {
2005            if Self::query_matches_internal(stored_pattern, pattern, trace) {
2006                let metadata = state.metadata.get(id).unwrap_or(stored_pattern);
2007
2008                // Calculate Unicode compatibility score
2009                let unicode_compatibility = if pattern.unicode_ranges.is_empty() {
2010                    Self::calculate_unicode_coverage(&metadata.unicode_ranges) as i32
2011                } else {
2012                    Self::calculate_unicode_compatibility(&pattern.unicode_ranges, &metadata.unicode_ranges)
2013                };
2014
2015                let style_score = Self::calculate_style_score(pattern, metadata);
2016                matches.push((*id, unicode_compatibility, style_score, metadata.clone()));
2017            }
2018        }
2019
2020        // Sort by style score (lowest first), THEN by Unicode compatibility (highest first)
2021        // Style matching (weight, italic, etc.) is now the primary criterion
2022        // Deterministic tiebreaker: prefer non-italic, then alphabetical by name
2023        matches.sort_by(|a, b| {
2024            a.2.cmp(&b.2) // Style score (lower is better)
2025                .then_with(|| b.1.cmp(&a.1)) // Unicode compatibility (higher is better)
2026                .then_with(|| a.3.italic.cmp(&b.3.italic)) // Prefer non-italic
2027                .then_with(|| a.3.name.cmp(&b.3.name)) // Alphabetical tiebreaker
2028        });
2029
2030        matches
2031            .into_iter()
2032            .map(|(id, _, _, metadata)| {
2033                FontMatch {
2034                    id,
2035                    unicode_ranges: metadata.unicode_ranges.clone(),
2036                    fallbacks: Vec::new(), // Fallbacks computed lazily via compute_fallbacks()
2037                }
2038            })
2039            .collect()
2040    }
2041
2042    /// Compute fallback fonts for a given font
2043    /// This is a lazy operation that can be expensive - only call when actually needed
2044    /// (e.g., for FFI or debugging, not needed for resolve_char)
2045    pub fn compute_fallbacks(
2046        &self,
2047        font_id: &FontId,
2048        trace: &mut Vec<TraceMsg>,
2049    ) -> Vec<FontMatchNoFallback> {
2050        let state = self.state_read();
2051        let pattern = match state.metadata.get(font_id) {
2052            Some(p) => p.clone(),
2053            None => return Vec::new(),
2054        };
2055        drop(state);
2056
2057        self.compute_fallbacks_for_pattern(&pattern, Some(font_id), trace)
2058    }
2059
2060    fn compute_fallbacks_for_pattern(
2061        &self,
2062        pattern: &FcPattern,
2063        exclude_id: Option<&FontId>,
2064        _trace: &mut Vec<TraceMsg>,
2065    ) -> Vec<FontMatchNoFallback> {
2066        let state = self.state_read();
2067        let mut candidates = Vec::new();
2068
2069        // Collect all potential fallbacks (excluding original pattern)
2070        for (stored_pattern, id) in &state.patterns {
2071            // Skip if this is the original font
2072            if exclude_id.is_some() && exclude_id.unwrap() == id {
2073                continue;
2074            }
2075
2076            // Check if this font supports any of the unicode ranges
2077            if !stored_pattern.unicode_ranges.is_empty() && !pattern.unicode_ranges.is_empty() {
2078                // Calculate Unicode compatibility
2079                let unicode_compatibility = Self::calculate_unicode_compatibility(
2080                    &pattern.unicode_ranges,
2081                    &stored_pattern.unicode_ranges
2082                );
2083
2084                // Only include if there's actual overlap
2085                if unicode_compatibility > 0 {
2086                    let style_score = Self::calculate_style_score(pattern, stored_pattern);
2087                    candidates.push((
2088                        FontMatchNoFallback {
2089                            id: *id,
2090                            unicode_ranges: stored_pattern.unicode_ranges.clone(),
2091                        },
2092                        unicode_compatibility,
2093                        style_score,
2094                        stored_pattern.clone(),
2095                    ));
2096                }
2097            } else if pattern.unicode_ranges.is_empty() && !stored_pattern.unicode_ranges.is_empty() {
2098                // No specific Unicode requirements, use general coverage
2099                let coverage = Self::calculate_unicode_coverage(&stored_pattern.unicode_ranges) as i32;
2100                let style_score = Self::calculate_style_score(pattern, stored_pattern);
2101                candidates.push((
2102                    FontMatchNoFallback {
2103                        id: *id,
2104                        unicode_ranges: stored_pattern.unicode_ranges.clone(),
2105                    },
2106                    coverage,
2107                    style_score,
2108                    stored_pattern.clone(),
2109                ));
2110            }
2111        }
2112
2113        drop(state);
2114
2115        // Sort by Unicode compatibility (highest first), THEN by style score (lowest first)
2116        candidates.sort_by(|a, b| {
2117            b.1.cmp(&a.1)
2118                .then_with(|| a.2.cmp(&b.2))
2119        });
2120
2121        // Deduplicate by keeping only the best match per unique unicode range
2122        let mut seen_ranges = Vec::new();
2123        let mut deduplicated = Vec::new();
2124
2125        for (id, _, _, pattern) in candidates {
2126            let mut is_new_range = false;
2127
2128            for range in &pattern.unicode_ranges {
2129                if !seen_ranges.iter().any(|r: &UnicodeRange| r.overlaps(range)) {
2130                    seen_ranges.push(*range);
2131                    is_new_range = true;
2132                }
2133            }
2134
2135            if is_new_range {
2136                deduplicated.push(id);
2137            }
2138        }
2139
2140        deduplicated
2141    }
2142
2143    /// Get in-memory font data (cloned out of the shared state).
2144    pub fn get_memory_font(&self, id: &FontId) -> Option<FcFont> {
2145        self.state_read().memory_fonts.get(id).cloned()
2146    }
2147
2148    /// Check if a pattern matches the query, with detailed tracing
2149    fn trace_path(k: &FcPattern) -> String {
2150        k.name.as_ref().cloned().unwrap_or_else(|| "<unknown>".to_string())
2151    }
2152
2153    pub fn query_matches_internal(
2154        k: &FcPattern,
2155        pattern: &FcPattern,
2156        trace: &mut Vec<TraceMsg>,
2157    ) -> bool {
2158        // Check name - substring match
2159        if let Some(ref name) = pattern.name {
2160            if !k.name.as_ref().map_or(false, |kn| kn.contains(name)) {
2161                trace.push(TraceMsg {
2162                    level: TraceLevel::Info,
2163                    path: Self::trace_path(k),
2164                    reason: MatchReason::NameMismatch {
2165                        requested: pattern.name.clone(),
2166                        found: k.name.clone(),
2167                    },
2168                });
2169                return false;
2170            }
2171        }
2172
2173        // Check family - substring match
2174        if let Some(ref family) = pattern.family {
2175            if !k.family.as_ref().map_or(false, |kf| kf.contains(family)) {
2176                trace.push(TraceMsg {
2177                    level: TraceLevel::Info,
2178                    path: Self::trace_path(k),
2179                    reason: MatchReason::FamilyMismatch {
2180                        requested: pattern.family.clone(),
2181                        found: k.family.clone(),
2182                    },
2183                });
2184                return false;
2185            }
2186        }
2187
2188        // Check style properties
2189        let style_properties = [
2190            (
2191                "italic",
2192                pattern.italic.needs_to_match(),
2193                pattern.italic.matches(&k.italic),
2194            ),
2195            (
2196                "oblique",
2197                pattern.oblique.needs_to_match(),
2198                pattern.oblique.matches(&k.oblique),
2199            ),
2200            (
2201                "bold",
2202                pattern.bold.needs_to_match(),
2203                pattern.bold.matches(&k.bold),
2204            ),
2205            (
2206                "monospace",
2207                pattern.monospace.needs_to_match(),
2208                pattern.monospace.matches(&k.monospace),
2209            ),
2210            (
2211                "condensed",
2212                pattern.condensed.needs_to_match(),
2213                pattern.condensed.matches(&k.condensed),
2214            ),
2215        ];
2216
2217        for (property_name, needs_to_match, matches) in style_properties {
2218            if needs_to_match && !matches {
2219                let (requested, found) = match property_name {
2220                    "italic" => (format!("{:?}", pattern.italic), format!("{:?}", k.italic)),
2221                    "oblique" => (format!("{:?}", pattern.oblique), format!("{:?}", k.oblique)),
2222                    "bold" => (format!("{:?}", pattern.bold), format!("{:?}", k.bold)),
2223                    "monospace" => (
2224                        format!("{:?}", pattern.monospace),
2225                        format!("{:?}", k.monospace),
2226                    ),
2227                    "condensed" => (
2228                        format!("{:?}", pattern.condensed),
2229                        format!("{:?}", k.condensed),
2230                    ),
2231                    _ => (String::new(), String::new()),
2232                };
2233
2234                trace.push(TraceMsg {
2235                    level: TraceLevel::Info,
2236                    path: Self::trace_path(k),
2237                    reason: MatchReason::StyleMismatch {
2238                        property: property_name,
2239                        requested,
2240                        found,
2241                    },
2242                });
2243                return false;
2244            }
2245        }
2246
2247        // Check weight - hard filter if non-normal weight is requested
2248        if pattern.weight != FcWeight::Normal && pattern.weight != k.weight {
2249            trace.push(TraceMsg {
2250                level: TraceLevel::Info,
2251                path: Self::trace_path(k),
2252                reason: MatchReason::WeightMismatch {
2253                    requested: pattern.weight,
2254                    found: k.weight,
2255                },
2256            });
2257            return false;
2258        }
2259
2260        // Check stretch - hard filter if non-normal stretch is requested
2261        if pattern.stretch != FcStretch::Normal && pattern.stretch != k.stretch {
2262            trace.push(TraceMsg {
2263                level: TraceLevel::Info,
2264                path: Self::trace_path(k),
2265                reason: MatchReason::StretchMismatch {
2266                    requested: pattern.stretch,
2267                    found: k.stretch,
2268                },
2269            });
2270            return false;
2271        }
2272
2273        // Check unicode ranges if specified
2274        if !pattern.unicode_ranges.is_empty() {
2275            let mut has_overlap = false;
2276
2277            for p_range in &pattern.unicode_ranges {
2278                for k_range in &k.unicode_ranges {
2279                    if p_range.overlaps(k_range) {
2280                        has_overlap = true;
2281                        break;
2282                    }
2283                }
2284                if has_overlap {
2285                    break;
2286                }
2287            }
2288
2289            if !has_overlap {
2290                trace.push(TraceMsg {
2291                    level: TraceLevel::Info,
2292                    path: Self::trace_path(k),
2293                    reason: MatchReason::UnicodeRangeMismatch {
2294                        character: '\0', // No specific character to report
2295                        ranges: k.unicode_ranges.clone(),
2296                    },
2297                });
2298                return false;
2299            }
2300        }
2301
2302        true
2303    }
2304    
2305    /// Resolve a complete font fallback chain for a CSS font-family stack
2306    /// This is the main entry point for font resolution with caching
2307    /// Automatically expands generic CSS families (serif, sans-serif, monospace) to OS-specific fonts
2308    /// 
2309    /// # Arguments
2310    /// * `font_families` - CSS font-family stack (e.g., ["Arial", "sans-serif"])
2311    /// * `text` - The text to render (used to extract Unicode ranges)
2312    /// * `weight` - Font weight
2313    /// * `italic` - Italic style requirement
2314    /// * `oblique` - Oblique style requirement
2315    /// * `trace` - Debug trace messages
2316    /// 
2317    /// # Returns
2318    /// A complete font fallback chain with CSS fallbacks and Unicode fallbacks
2319    /// 
2320    /// # Example
2321    /// ```no_run
2322    /// # use rust_fontconfig::{FcFontCache, FcWeight, PatternMatch};
2323    /// let cache = FcFontCache::build();
2324    /// let families = vec!["Arial".to_string(), "sans-serif".to_string()];
2325    /// let chain = cache.resolve_font_chain(&families, FcWeight::Normal, 
2326    ///                                       PatternMatch::DontCare, PatternMatch::DontCare, 
2327    ///                                       &mut Vec::new());
2328    /// // On macOS: families expanded to ["Arial", "San Francisco", "Helvetica Neue", "Lucida Grande"]
2329    /// ```
2330    #[cfg(feature = "std")]
2331    pub fn resolve_font_chain(
2332        &self,
2333        font_families: &[String],
2334        weight: FcWeight,
2335        italic: PatternMatch,
2336        oblique: PatternMatch,
2337        trace: &mut Vec<TraceMsg>,
2338    ) -> FontFallbackChain {
2339        self.resolve_font_chain_with_os(font_families, weight, italic, oblique, trace, OperatingSystem::current())
2340    }
2341    
2342    /// Resolve font chain with explicit OS specification (useful for testing)
2343    #[cfg(feature = "std")]
2344    pub fn resolve_font_chain_with_os(
2345        &self,
2346        font_families: &[String],
2347        weight: FcWeight,
2348        italic: PatternMatch,
2349        oblique: PatternMatch,
2350        trace: &mut Vec<TraceMsg>,
2351        os: OperatingSystem,
2352    ) -> FontFallbackChain {
2353        self.resolve_font_chain_impl(font_families, weight, italic, oblique, None, trace, os)
2354    }
2355
2356    /// Resolve a font fallback chain, restricting Unicode fallbacks to the
2357    /// caller-supplied set of scripts (usually derived from the actual
2358    /// text content of the document).
2359    ///
2360    /// - `scripts_hint: None` → back-compat behaviour, equivalent to
2361    ///   [`FcFontCache::resolve_font_chain`]: pulls in fallback fonts for
2362    ///   the full [`DEFAULT_UNICODE_FALLBACK_SCRIPTS`] set.
2363    /// - `scripts_hint: Some(&[])` → no Unicode fallbacks attached. For
2364    ///   an ASCII-only page this avoids pulling Arial Unicode MS,
2365    ///   CJK fonts, etc. into memory when they're not needed.
2366    /// - `scripts_hint: Some(&[CJK])` → only CJK fallback attached.
2367    ///
2368    /// The chain cache is keyed so an ASCII-only resolution cannot be
2369    /// served from a slot populated by a default/all-scripts resolution.
2370    #[cfg(feature = "std")]
2371    pub fn resolve_font_chain_with_scripts(
2372        &self,
2373        font_families: &[String],
2374        weight: FcWeight,
2375        italic: PatternMatch,
2376        oblique: PatternMatch,
2377        scripts_hint: Option<&[UnicodeRange]>,
2378        trace: &mut Vec<TraceMsg>,
2379    ) -> FontFallbackChain {
2380        self.resolve_font_chain_impl(
2381            font_families, weight, italic, oblique, scripts_hint,
2382            trace, OperatingSystem::current(),
2383        )
2384    }
2385
2386    /// Shared entry used by [`resolve_font_chain_with_os`] and
2387    /// [`resolve_font_chain_with_scripts`]. Handles the cache lookup,
2388    /// generic-family expansion, and delegation to the uncached builder.
2389    #[cfg(feature = "std")]
2390    fn resolve_font_chain_impl(
2391        &self,
2392        font_families: &[String],
2393        weight: FcWeight,
2394        italic: PatternMatch,
2395        oblique: PatternMatch,
2396        scripts_hint: Option<&[UnicodeRange]>,
2397        trace: &mut Vec<TraceMsg>,
2398        os: OperatingSystem,
2399    ) -> FontFallbackChain {
2400        // Check cache FIRST - key uses original (unexpanded) families
2401        // plus a hash over the scripts_hint so ASCII-only callers don't
2402        // consume a slot filled by a default-scripts caller.
2403        let scripts_hint_hash = scripts_hint.map(hash_scripts_hint);
2404        let cache_key = FontChainCacheKey {
2405            font_families: font_families.to_vec(),
2406            weight,
2407            italic,
2408            oblique,
2409            scripts_hint_hash,
2410        };
2411
2412        if let Some(cached) = self
2413            .shared
2414            .chain_cache
2415            .lock()
2416            .ok()
2417            .and_then(|c| c.get(&cache_key).cloned())
2418        {
2419            return cached;
2420        }
2421
2422        // Expand generic CSS families to OS-specific fonts
2423        let expanded_families = expand_font_families(font_families, os, &[]);
2424
2425        // Build the chain
2426        let chain = self.resolve_font_chain_uncached(
2427            &expanded_families,
2428            weight,
2429            italic,
2430            oblique,
2431            scripts_hint,
2432            trace,
2433        );
2434
2435        // Cache the result
2436        if let Ok(mut cache) = self.shared.chain_cache.lock() {
2437            cache.insert(cache_key, chain.clone());
2438        }
2439
2440        chain
2441    }
2442    
2443    /// Internal implementation without caching.
2444    ///
2445    /// `scripts_hint`:
2446    /// - `None` pulls in the full [`DEFAULT_UNICODE_FALLBACK_SCRIPTS`]
2447    ///   set (the original, back-compat behaviour).
2448    /// - `Some(&[])` attaches no Unicode fallbacks.
2449    /// - `Some(ranges)` attaches fallbacks only for those ranges.
2450    #[cfg(feature = "std")]
2451    fn resolve_font_chain_uncached(
2452        &self,
2453        font_families: &[String],
2454        weight: FcWeight,
2455        italic: PatternMatch,
2456        oblique: PatternMatch,
2457        scripts_hint: Option<&[UnicodeRange]>,
2458        trace: &mut Vec<TraceMsg>,
2459    ) -> FontFallbackChain {
2460        let mut css_fallbacks = Vec::new();
2461        
2462        // Resolve each CSS font-family to its system fallbacks
2463        for (_i, family) in font_families.iter().enumerate() {
2464            // Check if this is a generic font family
2465            let (pattern, is_generic) = if config::is_generic_family(family) {
2466                let monospace = if family.eq_ignore_ascii_case("monospace") {
2467                    PatternMatch::True
2468                } else {
2469                    PatternMatch::False
2470                };
2471                let pattern = FcPattern {
2472                    name: None,
2473                    weight,
2474                    italic,
2475                    oblique,
2476                    monospace,
2477                    unicode_ranges: Vec::new(),
2478                    ..Default::default()
2479                };
2480                (pattern, true)
2481            } else {
2482                // Specific font family name
2483                let pattern = FcPattern {
2484                    name: Some(family.clone()),
2485                    weight,
2486                    italic,
2487                    oblique,
2488                    unicode_ranges: Vec::new(),
2489                    ..Default::default()
2490                };
2491                (pattern, false)
2492            };
2493            
2494            // Use fuzzy matching for specific fonts (fast token-based lookup)
2495            // For generic families, use query (slower but necessary for property matching)
2496            let mut matches = if is_generic {
2497                // Generic families need full pattern matching
2498                self.query_internal(&pattern, trace)
2499            } else {
2500                // Specific font names: use fast token-based fuzzy matching
2501                self.fuzzy_query_by_name(family, weight, italic, oblique, &[], trace)
2502            };
2503            
2504            // For generic families, limit to top 5 fonts to avoid too many matches
2505            if is_generic && matches.len() > 5 {
2506                matches.truncate(5);
2507            }
2508            
2509            // Always add the CSS fallback group to preserve CSS ordering
2510            // even if no fonts were found for this family
2511            css_fallbacks.push(CssFallbackGroup {
2512                css_name: family.clone(),
2513                fonts: matches,
2514            });
2515        }
2516        
2517        // Populate unicode_fallbacks. CSS fallback fonts may falsely claim
2518        // coverage of a script via the OS/2 unicode-range bits without
2519        // actually having glyphs, so we supplement the CSS chain with an
2520        // explicit lookup for each requested script block. resolve_char()
2521        // prefers CSS fallbacks first (earlier in the chain wins).
2522        //
2523        // The set of script blocks to cover is caller-controlled via
2524        // `scripts_hint`: `None` keeps the back-compat DEFAULT_UNICODE_FALLBACK_SCRIPTS
2525        // behaviour (7 scripts) so existing `resolve_font_chain` consumers
2526        // stay unchanged; `Some(&[])` opts into "no unicode fallbacks at all"
2527        // for ASCII-only documents, eliminating the big CJK / Arabic fonts
2528        // from the resolved chain (and therefore from eager downstream parses).
2529        let important_ranges: &[UnicodeRange] =
2530            scripts_hint.unwrap_or(DEFAULT_UNICODE_FALLBACK_SCRIPTS);
2531        let unicode_fallbacks = if important_ranges.is_empty() {
2532            Vec::new()
2533        } else {
2534            let all_uncovered = vec![false; important_ranges.len()];
2535            self.find_unicode_fallbacks(
2536                important_ranges,
2537                &all_uncovered,
2538                &css_fallbacks,
2539                weight,
2540                italic,
2541                oblique,
2542                trace,
2543            )
2544        };
2545
2546        FontFallbackChain {
2547            css_fallbacks,
2548            unicode_fallbacks,
2549            original_stack: font_families.to_vec(),
2550        }
2551    }
2552    
2553    /// Extract Unicode ranges from text
2554    #[allow(dead_code)]
2555    fn extract_unicode_ranges(text: &str) -> Vec<UnicodeRange> {
2556        let mut chars: Vec<char> = text.chars().collect();
2557        chars.sort_unstable();
2558        chars.dedup();
2559        
2560        if chars.is_empty() {
2561            return Vec::new();
2562        }
2563        
2564        let mut ranges = Vec::new();
2565        let mut range_start = chars[0] as u32;
2566        let mut range_end = range_start;
2567        
2568        for &c in &chars[1..] {
2569            let codepoint = c as u32;
2570            if codepoint == range_end + 1 {
2571                range_end = codepoint;
2572            } else {
2573                ranges.push(UnicodeRange { start: range_start, end: range_end });
2574                range_start = codepoint;
2575                range_end = codepoint;
2576            }
2577        }
2578        
2579        ranges.push(UnicodeRange { start: range_start, end: range_end });
2580        ranges
2581    }
2582    
2583    /// Fuzzy query for fonts by name when exact match fails
2584    /// Uses intelligent token-based matching with inverted index for speed:
2585    /// 1. Break name into tokens (e.g., "NotoSansJP" -> ["noto", "sans", "jp"])
2586    /// 2. Use token_index to find candidate fonts via BTreeSet intersection
2587    /// 3. Score only the candidate fonts (instead of all 800+ patterns)
2588    /// 4. Prioritize fonts matching more tokens + Unicode coverage
2589    #[cfg(feature = "std")]
2590    fn fuzzy_query_by_name(
2591        &self,
2592        requested_name: &str,
2593        weight: FcWeight,
2594        italic: PatternMatch,
2595        oblique: PatternMatch,
2596        unicode_ranges: &[UnicodeRange],
2597        _trace: &mut Vec<TraceMsg>,
2598    ) -> Vec<FontMatch> {
2599        // Extract tokens from the requested name (e.g., "NotoSansJP" -> ["noto", "sans", "jp"])
2600        let tokens = Self::extract_font_name_tokens(requested_name);
2601        
2602        if tokens.is_empty() {
2603            return Vec::new();
2604        }
2605        
2606        // Convert tokens to lowercase for case-insensitive lookup
2607        let tokens_lower: Vec<String> = tokens.iter().map(|t| t.to_lowercase()).collect();
2608        
2609        // Progressive token matching strategy:
2610        // Start with first token, then progressively narrow down with each additional token
2611        // If adding a token results in 0 matches, use the previous (broader) set
2612        // Example: ["Noto"] -> 10 fonts, ["Noto","Sans"] -> 2 fonts, ["Noto","Sans","JP"] -> 0 fonts => use 2 fonts
2613        
2614        let state = self.state_read();
2615
2616        // Start with the first token
2617        let first_token = &tokens_lower[0];
2618        let mut candidate_ids = match state.token_index.get(first_token) {
2619            Some(ids) if !ids.is_empty() => ids.clone(),
2620            _ => {
2621                // First token not found - no fonts match, quit immediately
2622                return Vec::new();
2623            }
2624        };
2625
2626        // Progressively narrow down with each additional token
2627        for token in &tokens_lower[1..] {
2628            if let Some(token_ids) = state.token_index.get(token) {
2629                // Calculate intersection
2630                let intersection: alloc::collections::BTreeSet<FontId> =
2631                    candidate_ids.intersection(token_ids).copied().collect();
2632
2633                if intersection.is_empty() {
2634                    // Adding this token results in 0 matches - keep previous set and stop
2635                    break;
2636                } else {
2637                    // Successfully narrowed down - use intersection
2638                    candidate_ids = intersection;
2639                }
2640            } else {
2641                // Token not in index - keep current set and stop
2642                break;
2643            }
2644        }
2645
2646        // Now score only the candidate fonts (HUGE speedup!)
2647        let mut candidates = Vec::new();
2648
2649        for id in candidate_ids {
2650            let pattern = match state.metadata.get(&id) {
2651                Some(p) => p,
2652                None => continue,
2653            };
2654            
2655            // Get pre-tokenized font name (already lowercase)
2656            let font_tokens_lower = match state.font_tokens.get(&id) {
2657                Some(tokens) => tokens,
2658                None => continue,
2659            };
2660            
2661            if font_tokens_lower.is_empty() {
2662                continue;
2663            }
2664            
2665            // Calculate token match score (how many requested tokens appear in font name)
2666            // Both tokens_lower and font_tokens_lower are already lowercase, so direct comparison
2667            let token_matches = tokens_lower.iter()
2668                .filter(|req_token| {
2669                    font_tokens_lower.iter().any(|font_token| {
2670                        // Both already lowercase — exact token match (index guarantees candidates)
2671                        font_token == *req_token
2672                    })
2673                })
2674                .count();
2675            
2676            // Skip if no tokens match (shouldn't happen due to index, but safety check)
2677            if token_matches == 0 {
2678                continue;
2679            }
2680            
2681            // Calculate token similarity score (0-100)
2682            let token_similarity = (token_matches * 100 / tokens.len()) as i32;
2683            
2684            // Calculate Unicode range similarity
2685            let unicode_similarity = if !unicode_ranges.is_empty() && !pattern.unicode_ranges.is_empty() {
2686                Self::calculate_unicode_compatibility(unicode_ranges, &pattern.unicode_ranges)
2687            } else {
2688                0
2689            };
2690            
2691            // CRITICAL: If we have Unicode requirements, ONLY accept fonts that cover them
2692            // A font with great name match but no Unicode coverage is useless
2693            if !unicode_ranges.is_empty() && unicode_similarity == 0 {
2694                continue;
2695            }
2696            
2697            let style_score = Self::calculate_style_score(&FcPattern {
2698                weight,
2699                italic,
2700                oblique,
2701                ..Default::default()
2702            }, pattern);
2703            
2704            candidates.push((
2705                id,
2706                token_similarity,
2707                unicode_similarity,
2708                style_score,
2709                pattern.clone(),
2710            ));
2711        }
2712        
2713        // Sort by:
2714        // 1. Token matches (more matches = better)
2715        // 2. Unicode compatibility (if ranges provided)
2716        // 3. Style score (lower is better)
2717        // 4. Deterministic tiebreaker: prefer non-italic, then by font name
2718        candidates.sort_by(|a, b| {
2719            if !unicode_ranges.is_empty() {
2720                // When we have Unicode requirements, prioritize coverage
2721                b.1.cmp(&a.1) // Token similarity (higher is better) - PRIMARY
2722                    .then_with(|| b.2.cmp(&a.2)) // Unicode similarity (higher is better) - SECONDARY
2723                    .then_with(|| a.3.cmp(&b.3)) // Style score (lower is better) - TERTIARY
2724                    .then_with(|| a.4.italic.cmp(&b.4.italic)) // Prefer non-italic (False < True)
2725                    .then_with(|| a.4.name.cmp(&b.4.name)) // Alphabetical by name
2726            } else {
2727                // No Unicode requirements, token similarity is primary
2728                b.1.cmp(&a.1) // Token similarity (higher is better)
2729                    .then_with(|| a.3.cmp(&b.3)) // Style score (lower is better)
2730                    .then_with(|| a.4.italic.cmp(&b.4.italic)) // Prefer non-italic (False < True)
2731                    .then_with(|| a.4.name.cmp(&b.4.name)) // Alphabetical by name
2732            }
2733        });
2734        
2735        // Take top 5 matches
2736        candidates.truncate(5);
2737        
2738        // Convert to FontMatch
2739        candidates
2740            .into_iter()
2741            .map(|(id, _token_sim, _unicode_sim, _style, pattern)| {
2742                FontMatch {
2743                    id,
2744                    unicode_ranges: pattern.unicode_ranges.clone(),
2745                    fallbacks: Vec::new(), // Fallbacks computed lazily via compute_fallbacks()
2746                }
2747            })
2748            .collect()
2749    }
2750    
2751    /// Extract tokens from a font name
2752    /// E.g., "NotoSansJP" -> ["Noto", "Sans", "JP"]
2753    /// E.g., "Noto Sans CJK JP" -> ["Noto", "Sans", "CJK", "JP"]
2754    pub fn extract_font_name_tokens(name: &str) -> Vec<String> {
2755        let mut tokens = Vec::new();
2756        let mut current_token = String::new();
2757        let mut last_was_lower = false;
2758        
2759        for c in name.chars() {
2760            if c.is_whitespace() || c == '-' || c == '_' {
2761                // Word separator
2762                if !current_token.is_empty() {
2763                    tokens.push(current_token.clone());
2764                    current_token.clear();
2765                }
2766                last_was_lower = false;
2767            } else if c.is_uppercase() && last_was_lower && !current_token.is_empty() {
2768                // CamelCase boundary (e.g., "Noto" | "Sans")
2769                tokens.push(current_token.clone());
2770                current_token.clear();
2771                current_token.push(c);
2772                last_was_lower = false;
2773            } else {
2774                current_token.push(c);
2775                last_was_lower = c.is_lowercase();
2776            }
2777        }
2778        
2779        if !current_token.is_empty() {
2780            tokens.push(current_token);
2781        }
2782        
2783        tokens
2784    }
2785    
2786    /// Find fonts to cover missing Unicode ranges
2787    /// Uses intelligent matching: prefers fonts with similar names to existing ones
2788    /// Early quits once all Unicode ranges are covered for performance
2789    fn find_unicode_fallbacks(
2790        &self,
2791        unicode_ranges: &[UnicodeRange],
2792        covered_chars: &[bool],
2793        existing_groups: &[CssFallbackGroup],
2794        _weight: FcWeight,
2795        _italic: PatternMatch,
2796        _oblique: PatternMatch,
2797        trace: &mut Vec<TraceMsg>,
2798    ) -> Vec<FontMatch> {
2799        // Extract uncovered ranges
2800        let mut uncovered_ranges = Vec::new();
2801        for (i, &covered) in covered_chars.iter().enumerate() {
2802            if !covered && i < unicode_ranges.len() {
2803                uncovered_ranges.push(unicode_ranges[i].clone());
2804            }
2805        }
2806        
2807        if uncovered_ranges.is_empty() {
2808            return Vec::new();
2809        }
2810
2811        // Query for fonts that cover these ranges.
2812        // Use DontCare for weight/italic/oblique — we want ANY font that covers
2813        // the missing characters, regardless of style. The similarity sort below
2814        // will prefer fonts matching the existing chain's style anyway.
2815        let pattern = FcPattern {
2816            name: None,
2817            weight: FcWeight::Normal, // Normal weight is not filtered by query_matches_internal (line 1836)
2818            italic: PatternMatch::DontCare,
2819            oblique: PatternMatch::DontCare,
2820            unicode_ranges: uncovered_ranges.clone(),
2821            ..Default::default()
2822        };
2823        
2824        let mut candidates = self.query_internal(&pattern, trace);
2825
2826        // Intelligent sorting: prefer fonts with similar names to existing ones
2827        // Extract font family prefixes from existing fonts (e.g., "Noto Sans" from "Noto Sans JP")
2828        let existing_prefixes: Vec<String> = existing_groups
2829            .iter()
2830            .flat_map(|group| {
2831                group.fonts.iter().filter_map(|font| {
2832                    self.get_metadata_by_id(&font.id)
2833                        .and_then(|meta| meta.family.clone())
2834                        .and_then(|family| {
2835                            // Extract prefix (e.g., "Noto Sans" from "Noto Sans JP")
2836                            family.split_whitespace()
2837                                .take(2)
2838                                .collect::<Vec<_>>()
2839                                .join(" ")
2840                                .into()
2841                        })
2842                })
2843            })
2844            .collect();
2845        
2846        // Sort candidates by:
2847        // 1. Name similarity to existing fonts (highest priority)
2848        // 2. Unicode coverage (secondary)
2849        candidates.sort_by(|a, b| {
2850            let a_meta = self.get_metadata_by_id(&a.id);
2851            let b_meta = self.get_metadata_by_id(&b.id);
2852
2853            let a_score = Self::calculate_font_similarity_score(a_meta.as_ref(), &existing_prefixes);
2854            let b_score = Self::calculate_font_similarity_score(b_meta.as_ref(), &existing_prefixes);
2855            
2856            b_score.cmp(&a_score) // Higher score = better match
2857                .then_with(|| {
2858                    let a_coverage = Self::calculate_unicode_compatibility(&uncovered_ranges, &a.unicode_ranges);
2859                    let b_coverage = Self::calculate_unicode_compatibility(&uncovered_ranges, &b.unicode_ranges);
2860                    b_coverage.cmp(&a_coverage)
2861                })
2862        });
2863        
2864        // Early quit optimization: only take fonts until all ranges are covered
2865        let mut result = Vec::new();
2866        let mut remaining_uncovered: Vec<bool> = vec![true; uncovered_ranges.len()];
2867        
2868        for candidate in candidates {
2869            // Check which ranges this font covers
2870            let mut covers_new_range = false;
2871            
2872            for (i, range) in uncovered_ranges.iter().enumerate() {
2873                if remaining_uncovered[i] {
2874                    // Check if this font covers this range
2875                    for font_range in &candidate.unicode_ranges {
2876                        if font_range.overlaps(range) {
2877                            remaining_uncovered[i] = false;
2878                            covers_new_range = true;
2879                            break;
2880                        }
2881                    }
2882                }
2883            }
2884            
2885            // Only add fonts that cover at least one new range
2886            if covers_new_range {
2887                result.push(candidate);
2888                
2889                // Early quit: if all ranges are covered, stop
2890                if remaining_uncovered.iter().all(|&uncovered| !uncovered) {
2891                    break;
2892                }
2893            }
2894        }
2895        
2896        result
2897    }
2898    
2899    /// Calculate similarity score between a font and existing font prefixes
2900    /// Higher score = more similar
2901    fn calculate_font_similarity_score(
2902        font_meta: Option<&FcPattern>,
2903        existing_prefixes: &[String],
2904    ) -> i32 {
2905        let Some(meta) = font_meta else { return 0; };
2906        let Some(family) = &meta.family else { return 0; };
2907        
2908        // Check if this font's family matches any existing prefix
2909        for prefix in existing_prefixes {
2910            if family.starts_with(prefix) {
2911                return 100; // Strong match
2912            }
2913            if family.contains(prefix) {
2914                return 50; // Partial match
2915            }
2916        }
2917        
2918        0 // No match
2919    }
2920    
2921    /// Find fallback fonts for a given pattern
2922    // Helper to calculate total unicode coverage
2923    pub fn calculate_unicode_coverage(ranges: &[UnicodeRange]) -> u64 {
2924        ranges
2925            .iter()
2926            .map(|range| (range.end - range.start + 1) as u64)
2927            .sum()
2928    }
2929
2930    /// Calculate how well a font's Unicode ranges cover the requested ranges
2931    /// Returns a compatibility score (higher is better, 0 means no overlap)
2932    pub fn calculate_unicode_compatibility(
2933        requested: &[UnicodeRange],
2934        available: &[UnicodeRange],
2935    ) -> i32 {
2936        if requested.is_empty() {
2937            // No specific requirements, return total coverage
2938            return Self::calculate_unicode_coverage(available) as i32;
2939        }
2940        
2941        let mut total_coverage = 0u32;
2942        
2943        for req_range in requested {
2944            for avail_range in available {
2945                // Calculate overlap between requested and available ranges
2946                let overlap_start = req_range.start.max(avail_range.start);
2947                let overlap_end = req_range.end.min(avail_range.end);
2948                
2949                if overlap_start <= overlap_end {
2950                    // There is overlap
2951                    let overlap_size = overlap_end - overlap_start + 1;
2952                    total_coverage += overlap_size;
2953                }
2954            }
2955        }
2956        
2957        total_coverage as i32
2958    }
2959
2960    pub fn calculate_style_score(original: &FcPattern, candidate: &FcPattern) -> i32 {
2961
2962        let mut score = 0_i32;
2963
2964        // Weight calculation with special handling for bold property
2965        if (original.bold == PatternMatch::True && candidate.weight == FcWeight::Bold)
2966            || (original.bold == PatternMatch::False && candidate.weight != FcWeight::Bold)
2967        {
2968            // No weight penalty when bold is requested and font has Bold weight
2969            // No weight penalty when non-bold is requested and font has non-Bold weight
2970        } else {
2971            // Apply normal weight difference penalty
2972            let weight_diff = (original.weight as i32 - candidate.weight as i32).abs();
2973            score += weight_diff as i32;
2974        }
2975
2976        // Exact weight match bonus: reward fonts whose weight matches the request exactly,
2977        // with an extra bonus when both are Normal (the most common case for body text)
2978        if original.weight == candidate.weight {
2979            score -= 15;
2980            if original.weight == FcWeight::Normal {
2981                score -= 10; // Extra bonus for Normal-Normal match
2982            }
2983        }
2984
2985        // Stretch calculation with special handling for condensed property
2986        if (original.condensed == PatternMatch::True && candidate.stretch.is_condensed())
2987            || (original.condensed == PatternMatch::False && !candidate.stretch.is_condensed())
2988        {
2989            // No stretch penalty when condensed is requested and font has condensed stretch
2990            // No stretch penalty when non-condensed is requested and font has non-condensed stretch
2991        } else {
2992            // Apply normal stretch difference penalty
2993            let stretch_diff = (original.stretch as i32 - candidate.stretch as i32).abs();
2994            score += (stretch_diff * 100) as i32;
2995        }
2996
2997        // Handle style properties with standard penalties and bonuses
2998        let style_props = [
2999            (original.italic, candidate.italic, 300, 150),
3000            (original.oblique, candidate.oblique, 200, 100),
3001            (original.bold, candidate.bold, 300, 150),
3002            (original.monospace, candidate.monospace, 100, 50),
3003            (original.condensed, candidate.condensed, 100, 50),
3004        ];
3005
3006        for (orig, cand, mismatch_penalty, dontcare_penalty) in style_props {
3007            if orig.needs_to_match() {
3008                if orig == PatternMatch::False && cand == PatternMatch::DontCare {
3009                    // Requesting non-italic but font doesn't declare: small penalty
3010                    // (less than a full mismatch but more than a perfect match)
3011                    score += dontcare_penalty / 2;
3012                } else if !orig.matches(&cand) {
3013                    if cand == PatternMatch::DontCare {
3014                        score += dontcare_penalty;
3015                    } else {
3016                        score += mismatch_penalty;
3017                    }
3018                } else if orig == PatternMatch::True && cand == PatternMatch::True {
3019                    // Give bonus for exact True match
3020                    score -= 20;
3021                } else if orig == PatternMatch::False && cand == PatternMatch::False {
3022                    // Give bonus for exact False match (prefer explicitly non-italic
3023                    // over fonts with unknown/DontCare italic status)
3024                    score -= 20;
3025                }
3026            } else {
3027                // orig == DontCare: prefer "normal" fonts over styled ones.
3028                // When the caller doesn't specify italic/bold/etc., a font
3029                // that IS italic/bold should score slightly worse than one
3030                // that isn't, so Regular is chosen over Italic by default.
3031                if cand == PatternMatch::True {
3032                    score += dontcare_penalty / 3;
3033                }
3034            }
3035        }
3036
3037        // ── Name-based "base font" detection ──
3038        // The shorter the font name relative to its family, the more "basic" the
3039        // variant.  E.g. "System Font" (the base) should score better than
3040        // "System Font Regular Italic" (a variant) when the user hasn't
3041        // explicitly requested italic.
3042        if let (Some(name), Some(family)) = (&candidate.name, &candidate.family) {
3043            let name_lower = name.to_lowercase();
3044            let family_lower = family.to_lowercase();
3045
3046            // Strip the family prefix from the name to get the "extra" part
3047            let extra = if name_lower.starts_with(&family_lower) {
3048                name_lower[family_lower.len()..].to_string()
3049            } else {
3050                String::new()
3051            };
3052
3053            // Strip common neutral descriptors that don't indicate a style variant
3054            let stripped = extra
3055                .replace("regular", "")
3056                .replace("normal", "")
3057                .replace("book", "")
3058                .replace("roman", "");
3059            let stripped = stripped.trim();
3060
3061            if stripped.is_empty() {
3062                // This is a "base font" – name is just the family (± "Regular")
3063                score -= 50;
3064            } else {
3065                // Name has extra style descriptors – add a penalty per extra word
3066                let extra_words = stripped.split_whitespace().count();
3067                score += (extra_words as i32) * 25;
3068            }
3069        }
3070
3071        // ── Subfamily "Regular" bonus ──
3072        // Fonts whose OpenType subfamily is exactly "Regular" are the canonical
3073        // base variant and should be strongly preferred.
3074        if let Some(ref subfamily) = candidate.metadata.font_subfamily {
3075            let sf_lower = subfamily.to_lowercase();
3076            if sf_lower == "regular" {
3077                score -= 30;
3078            }
3079        }
3080
3081        score
3082    }
3083}
3084
3085#[cfg(all(feature = "std", feature = "parsing", target_os = "linux"))]
3086fn FcScanDirectories() -> Option<(Vec<(FcPattern, FcFontPath)>, BTreeMap<String, FcFontRenderConfig>)> {
3087    use std::fs;
3088    use std::path::Path;
3089
3090    const BASE_FONTCONFIG_PATH: &str = "/etc/fonts/fonts.conf";
3091
3092    if !Path::new(BASE_FONTCONFIG_PATH).exists() {
3093        return None;
3094    }
3095
3096    let mut font_paths = Vec::with_capacity(32);
3097    let mut paths_to_visit = vec![(None, PathBuf::from(BASE_FONTCONFIG_PATH))];
3098    let mut render_configs: BTreeMap<String, FcFontRenderConfig> = BTreeMap::new();
3099
3100    while let Some((prefix, path_to_visit)) = paths_to_visit.pop() {
3101        let path = match process_path(&prefix, path_to_visit, true) {
3102            Some(path) => path,
3103            None => continue,
3104        };
3105
3106        let metadata = match fs::metadata(&path) {
3107            Ok(metadata) => metadata,
3108            Err(_) => continue,
3109        };
3110
3111        if metadata.is_file() {
3112            let xml_utf8 = match fs::read_to_string(&path) {
3113                Ok(xml_utf8) => xml_utf8,
3114                Err(_) => continue,
3115            };
3116
3117            if ParseFontsConf(&xml_utf8, &mut paths_to_visit, &mut font_paths).is_none() {
3118                continue;
3119            }
3120
3121            // Also parse render config blocks from this file
3122            ParseFontsConfRenderConfig(&xml_utf8, &mut render_configs);
3123        } else if metadata.is_dir() {
3124            let dir_entries = match fs::read_dir(&path) {
3125                Ok(dir_entries) => dir_entries,
3126                Err(_) => continue,
3127            };
3128
3129            for entry_result in dir_entries {
3130                let entry = match entry_result {
3131                    Ok(entry) => entry,
3132                    Err(_) => continue,
3133                };
3134
3135                let entry_path = entry.path();
3136
3137                // `fs::metadata` traverses symbolic links
3138                let entry_metadata = match fs::metadata(&entry_path) {
3139                    Ok(metadata) => metadata,
3140                    Err(_) => continue,
3141                };
3142
3143                if !entry_metadata.is_file() {
3144                    continue;
3145                }
3146
3147                let file_name = match entry_path.file_name() {
3148                    Some(name) => name,
3149                    None => continue,
3150                };
3151
3152                let file_name_str = file_name.to_string_lossy();
3153                if file_name_str.starts_with(|c: char| c.is_ascii_digit())
3154                    && file_name_str.ends_with(".conf")
3155                {
3156                    paths_to_visit.push((None, entry_path));
3157                }
3158            }
3159        }
3160    }
3161
3162    if font_paths.is_empty() {
3163        return None;
3164    }
3165
3166    Some((FcScanDirectoriesInner(&font_paths), render_configs))
3167}
3168
3169// Parses the fonts.conf file
3170#[cfg(all(feature = "std", feature = "parsing", target_os = "linux"))]
3171fn ParseFontsConf(
3172    input: &str,
3173    paths_to_visit: &mut Vec<(Option<String>, PathBuf)>,
3174    font_paths: &mut Vec<(Option<String>, String)>,
3175) -> Option<()> {
3176    use xmlparser::Token::*;
3177    use xmlparser::Tokenizer;
3178
3179    const TAG_INCLUDE: &str = "include";
3180    const TAG_DIR: &str = "dir";
3181    const ATTRIBUTE_PREFIX: &str = "prefix";
3182
3183    let mut current_prefix: Option<&str> = None;
3184    let mut current_path: Option<&str> = None;
3185    let mut is_in_include = false;
3186    let mut is_in_dir = false;
3187
3188    for token_result in Tokenizer::from(input) {
3189        let token = match token_result {
3190            Ok(token) => token,
3191            Err(_) => return None,
3192        };
3193
3194        match token {
3195            ElementStart { local, .. } => {
3196                if is_in_include || is_in_dir {
3197                    return None; /* error: nested tags */
3198                }
3199
3200                match local.as_str() {
3201                    TAG_INCLUDE => {
3202                        is_in_include = true;
3203                    }
3204                    TAG_DIR => {
3205                        is_in_dir = true;
3206                    }
3207                    _ => continue,
3208                }
3209
3210                current_path = None;
3211            }
3212            Text { text, .. } => {
3213                let text = text.as_str().trim();
3214                if text.is_empty() {
3215                    continue;
3216                }
3217                if is_in_include || is_in_dir {
3218                    current_path = Some(text);
3219                }
3220            }
3221            Attribute { local, value, .. } => {
3222                if !is_in_include && !is_in_dir {
3223                    continue;
3224                }
3225                // attribute on <include> or <dir> node
3226                if local.as_str() == ATTRIBUTE_PREFIX {
3227                    current_prefix = Some(value.as_str());
3228                }
3229            }
3230            ElementEnd { end, .. } => {
3231                let end_tag = match end {
3232                    xmlparser::ElementEnd::Close(_, a) => a,
3233                    _ => continue,
3234                };
3235
3236                match end_tag.as_str() {
3237                    TAG_INCLUDE => {
3238                        if !is_in_include {
3239                            continue;
3240                        }
3241
3242                        if let Some(current_path) = current_path.as_ref() {
3243                            paths_to_visit.push((
3244                                current_prefix.map(ToOwned::to_owned),
3245                                PathBuf::from(*current_path),
3246                            ));
3247                        }
3248                    }
3249                    TAG_DIR => {
3250                        if !is_in_dir {
3251                            continue;
3252                        }
3253
3254                        if let Some(current_path) = current_path.as_ref() {
3255                            font_paths.push((
3256                                current_prefix.map(ToOwned::to_owned),
3257                                (*current_path).to_owned(),
3258                            ));
3259                        }
3260                    }
3261                    _ => continue,
3262                }
3263
3264                is_in_include = false;
3265                is_in_dir = false;
3266                current_path = None;
3267                current_prefix = None;
3268            }
3269            _ => {}
3270        }
3271    }
3272
3273    Some(())
3274}
3275
3276/// Parses `<match target="font">` blocks from fonts.conf XML and returns
3277/// a map from family name to per-font rendering configuration.
3278///
3279/// Example fonts.conf snippet that this handles:
3280/// ```xml
3281/// <match target="font">
3282///   <test name="family"><string>Inconsolata</string></test>
3283///   <edit name="antialias" mode="assign"><bool>true</bool></edit>
3284///   <edit name="hintstyle" mode="assign"><const>hintslight</const></edit>
3285/// </match>
3286/// ```
3287#[cfg(all(feature = "std", feature = "parsing", target_os = "linux"))]
3288fn ParseFontsConfRenderConfig(
3289    input: &str,
3290    configs: &mut BTreeMap<String, FcFontRenderConfig>,
3291) {
3292    use xmlparser::Token::*;
3293    use xmlparser::Tokenizer;
3294
3295    // Parser state machine
3296    #[derive(Clone, Copy, PartialEq)]
3297    enum State {
3298        /// Outside any relevant block
3299        Idle,
3300        /// Inside <match target="font">
3301        InMatchFont,
3302        /// Inside <test name="family"> within a match block
3303        InTestFamily,
3304        /// Inside <edit name="..."> within a match block
3305        InEdit,
3306        /// Inside a value element (<bool>, <double>, <const>, <string>) within <edit> or <test>
3307        InValue,
3308    }
3309
3310    let mut state = State::Idle;
3311    let mut match_is_font_target = false;
3312    let mut current_family: Option<String> = None;
3313    let mut current_edit_name: Option<String> = None;
3314    let mut current_value: Option<String> = None;
3315    let mut value_tag: Option<String> = None;
3316    let mut config = FcFontRenderConfig::default();
3317    let mut in_test = false;
3318    let mut test_name: Option<String> = None;
3319
3320    for token_result in Tokenizer::from(input) {
3321        let token = match token_result {
3322            Ok(token) => token,
3323            Err(_) => continue,
3324        };
3325
3326        match token {
3327            ElementStart { local, .. } => {
3328                let tag = local.as_str();
3329                match tag {
3330                    "match" => {
3331                        // Reset state for a new match block
3332                        match_is_font_target = false;
3333                        current_family = None;
3334                        config = FcFontRenderConfig::default();
3335                    }
3336                    "test" if state == State::InMatchFont => {
3337                        in_test = true;
3338                        test_name = None;
3339                    }
3340                    "edit" if state == State::InMatchFont => {
3341                        current_edit_name = None;
3342                    }
3343                    "bool" | "double" | "const" | "string" | "int" => {
3344                        if state == State::InTestFamily || state == State::InEdit {
3345                            value_tag = Some(tag.to_owned());
3346                            current_value = None;
3347                        }
3348                    }
3349                    _ => {}
3350                }
3351            }
3352            Attribute { local, value, .. } => {
3353                let attr_name = local.as_str();
3354                let attr_value = value.as_str();
3355
3356                match attr_name {
3357                    "target" => {
3358                        if attr_value == "font" {
3359                            match_is_font_target = true;
3360                        }
3361                    }
3362                    "name" => {
3363                        if in_test && state == State::InMatchFont {
3364                            test_name = Some(attr_value.to_owned());
3365                        } else if state == State::InMatchFont {
3366                            current_edit_name = Some(attr_value.to_owned());
3367                        }
3368                    }
3369                    _ => {}
3370                }
3371            }
3372            Text { text, .. } => {
3373                let text = text.as_str().trim();
3374                if !text.is_empty() && (state == State::InTestFamily || state == State::InEdit) {
3375                    current_value = Some(text.to_owned());
3376                }
3377            }
3378            ElementEnd { end, .. } => {
3379                match end {
3380                    xmlparser::ElementEnd::Open => {
3381                        // Tag just opened (after attributes processed)
3382                        if match_is_font_target && state == State::Idle {
3383                            state = State::InMatchFont;
3384                            match_is_font_target = false;
3385                        } else if in_test {
3386                            if test_name.as_deref() == Some("family") {
3387                                state = State::InTestFamily;
3388                            }
3389                            in_test = false;
3390                        } else if current_edit_name.is_some() && state == State::InMatchFont {
3391                            state = State::InEdit;
3392                        }
3393                    }
3394                    xmlparser::ElementEnd::Close(_, local) => {
3395                        let tag = local.as_str();
3396                        match tag {
3397                            "match" => {
3398                                // End of match block: store config if we have a family
3399                                if let Some(family) = current_family.take() {
3400                                    let empty = FcFontRenderConfig::default();
3401                                    if config != empty {
3402                                        configs.insert(family, config.clone());
3403                                    }
3404                                }
3405                                state = State::Idle;
3406                                config = FcFontRenderConfig::default();
3407                            }
3408                            "test" => {
3409                                if state == State::InTestFamily {
3410                                    // Extract the family name from the value we collected
3411                                    if let Some(ref val) = current_value {
3412                                        current_family = Some(val.clone());
3413                                    }
3414                                    state = State::InMatchFont;
3415                                }
3416                                current_value = None;
3417                                value_tag = None;
3418                            }
3419                            "edit" => {
3420                                if state == State::InEdit {
3421                                    // Apply the collected value to the config
3422                                    if let (Some(ref name), Some(ref val)) = (&current_edit_name, &current_value) {
3423                                        apply_edit_value(&mut config, name, val, value_tag.as_deref());
3424                                    }
3425                                    state = State::InMatchFont;
3426                                }
3427                                current_edit_name = None;
3428                                current_value = None;
3429                                value_tag = None;
3430                            }
3431                            "bool" | "double" | "const" | "string" | "int" => {
3432                                // value_tag and current_value already set by Text handler
3433                            }
3434                            _ => {}
3435                        }
3436                    }
3437                    xmlparser::ElementEnd::Empty => {
3438                        // Self-closing tags: nothing to do
3439                    }
3440                }
3441            }
3442            _ => {}
3443        }
3444    }
3445}
3446
3447/// Apply a parsed edit value to the render config.
3448#[cfg(all(feature = "std", feature = "parsing", target_os = "linux"))]
3449fn apply_edit_value(
3450    config: &mut FcFontRenderConfig,
3451    edit_name: &str,
3452    value: &str,
3453    value_tag: Option<&str>,
3454) {
3455    match edit_name {
3456        "antialias" => {
3457            config.antialias = parse_bool_value(value);
3458        }
3459        "hinting" => {
3460            config.hinting = parse_bool_value(value);
3461        }
3462        "autohint" => {
3463            config.autohint = parse_bool_value(value);
3464        }
3465        "embeddedbitmap" => {
3466            config.embeddedbitmap = parse_bool_value(value);
3467        }
3468        "embolden" => {
3469            config.embolden = parse_bool_value(value);
3470        }
3471        "minspace" => {
3472            config.minspace = parse_bool_value(value);
3473        }
3474        "hintstyle" => {
3475            config.hintstyle = parse_hintstyle_const(value);
3476        }
3477        "rgba" => {
3478            config.rgba = parse_rgba_const(value);
3479        }
3480        "lcdfilter" => {
3481            config.lcdfilter = parse_lcdfilter_const(value);
3482        }
3483        "dpi" => {
3484            if let Ok(v) = value.parse::<f64>() {
3485                config.dpi = Some(v);
3486            }
3487        }
3488        "scale" => {
3489            if let Ok(v) = value.parse::<f64>() {
3490                config.scale = Some(v);
3491            }
3492        }
3493        _ => {
3494            // Unknown edit property, ignore
3495        }
3496    }
3497}
3498
3499#[cfg(all(feature = "std", feature = "parsing", target_os = "linux"))]
3500fn parse_bool_value(value: &str) -> Option<bool> {
3501    match value {
3502        "true" => Some(true),
3503        "false" => Some(false),
3504        _ => None,
3505    }
3506}
3507
3508#[cfg(all(feature = "std", feature = "parsing", target_os = "linux"))]
3509fn parse_hintstyle_const(value: &str) -> Option<FcHintStyle> {
3510    match value {
3511        "hintnone" => Some(FcHintStyle::None),
3512        "hintslight" => Some(FcHintStyle::Slight),
3513        "hintmedium" => Some(FcHintStyle::Medium),
3514        "hintfull" => Some(FcHintStyle::Full),
3515        _ => None,
3516    }
3517}
3518
3519#[cfg(all(feature = "std", feature = "parsing", target_os = "linux"))]
3520fn parse_rgba_const(value: &str) -> Option<FcRgba> {
3521    match value {
3522        "unknown" => Some(FcRgba::Unknown),
3523        "rgb" => Some(FcRgba::Rgb),
3524        "bgr" => Some(FcRgba::Bgr),
3525        "vrgb" => Some(FcRgba::Vrgb),
3526        "vbgr" => Some(FcRgba::Vbgr),
3527        "none" => Some(FcRgba::None),
3528        _ => None,
3529    }
3530}
3531
3532#[cfg(all(feature = "std", feature = "parsing", target_os = "linux"))]
3533fn parse_lcdfilter_const(value: &str) -> Option<FcLcdFilter> {
3534    match value {
3535        "lcdnone" => Some(FcLcdFilter::None),
3536        "lcddefault" => Some(FcLcdFilter::Default),
3537        "lcdlight" => Some(FcLcdFilter::Light),
3538        "lcdlegacy" => Some(FcLcdFilter::Legacy),
3539        _ => None,
3540    }
3541}
3542
3543// Unicode range bit positions to actual ranges (full table from OpenType spec).
3544// Based on: https://learn.microsoft.com/en-us/typography/opentype/spec/os2#ur
3545#[cfg(all(feature = "std", feature = "parsing"))]
3546const UNICODE_RANGE_MAPPINGS: &[(usize, u32, u32)] = &[
3547    // ulUnicodeRange1 (bits 0-31)
3548    (0, 0x0000, 0x007F), // Basic Latin
3549    (1, 0x0080, 0x00FF), // Latin-1 Supplement
3550    (2, 0x0100, 0x017F), // Latin Extended-A
3551    (3, 0x0180, 0x024F), // Latin Extended-B
3552    (4, 0x0250, 0x02AF), // IPA Extensions
3553    (5, 0x02B0, 0x02FF), // Spacing Modifier Letters
3554    (6, 0x0300, 0x036F), // Combining Diacritical Marks
3555    (7, 0x0370, 0x03FF), // Greek and Coptic
3556    (8, 0x2C80, 0x2CFF), // Coptic
3557    (9, 0x0400, 0x04FF), // Cyrillic
3558    (10, 0x0530, 0x058F), // Armenian
3559    (11, 0x0590, 0x05FF), // Hebrew
3560    (12, 0x0600, 0x06FF), // Arabic
3561    (13, 0x0700, 0x074F), // Syriac
3562    (14, 0x0780, 0x07BF), // Thaana
3563    (15, 0x0900, 0x097F), // Devanagari
3564    (16, 0x0980, 0x09FF), // Bengali
3565    (17, 0x0A00, 0x0A7F), // Gurmukhi
3566    (18, 0x0A80, 0x0AFF), // Gujarati
3567    (19, 0x0B00, 0x0B7F), // Oriya
3568    (20, 0x0B80, 0x0BFF), // Tamil
3569    (21, 0x0C00, 0x0C7F), // Telugu
3570    (22, 0x0C80, 0x0CFF), // Kannada
3571    (23, 0x0D00, 0x0D7F), // Malayalam
3572    (24, 0x0E00, 0x0E7F), // Thai
3573    (25, 0x0E80, 0x0EFF), // Lao
3574    (26, 0x10A0, 0x10FF), // Georgian
3575    (27, 0x1B00, 0x1B7F), // Balinese
3576    (28, 0x1100, 0x11FF), // Hangul Jamo
3577    (29, 0x1E00, 0x1EFF), // Latin Extended Additional
3578    (30, 0x1F00, 0x1FFF), // Greek Extended
3579    (31, 0x2000, 0x206F), // General Punctuation
3580    // ulUnicodeRange2 (bits 32-63)
3581    (32, 0x2070, 0x209F), // Superscripts And Subscripts
3582    (33, 0x20A0, 0x20CF), // Currency Symbols
3583    (34, 0x20D0, 0x20FF), // Combining Diacritical Marks For Symbols
3584    (35, 0x2100, 0x214F), // Letterlike Symbols
3585    (36, 0x2150, 0x218F), // Number Forms
3586    (37, 0x2190, 0x21FF), // Arrows
3587    (38, 0x2200, 0x22FF), // Mathematical Operators
3588    (39, 0x2300, 0x23FF), // Miscellaneous Technical
3589    (40, 0x2400, 0x243F), // Control Pictures
3590    (41, 0x2440, 0x245F), // Optical Character Recognition
3591    (42, 0x2460, 0x24FF), // Enclosed Alphanumerics
3592    (43, 0x2500, 0x257F), // Box Drawing
3593    (44, 0x2580, 0x259F), // Block Elements
3594    (45, 0x25A0, 0x25FF), // Geometric Shapes
3595    (46, 0x2600, 0x26FF), // Miscellaneous Symbols
3596    (47, 0x2700, 0x27BF), // Dingbats
3597    (48, 0x3000, 0x303F), // CJK Symbols And Punctuation
3598    (49, 0x3040, 0x309F), // Hiragana
3599    (50, 0x30A0, 0x30FF), // Katakana
3600    (51, 0x3100, 0x312F), // Bopomofo
3601    (52, 0x3130, 0x318F), // Hangul Compatibility Jamo
3602    (53, 0x3190, 0x319F), // Kanbun
3603    (54, 0x31A0, 0x31BF), // Bopomofo Extended
3604    (55, 0x31C0, 0x31EF), // CJK Strokes
3605    (56, 0x31F0, 0x31FF), // Katakana Phonetic Extensions
3606    (57, 0x3200, 0x32FF), // Enclosed CJK Letters And Months
3607    (58, 0x3300, 0x33FF), // CJK Compatibility
3608    (59, 0x4E00, 0x9FFF), // CJK Unified Ideographs
3609    (60, 0xA000, 0xA48F), // Yi Syllables
3610    (61, 0xA490, 0xA4CF), // Yi Radicals
3611    (62, 0xAC00, 0xD7AF), // Hangul Syllables
3612    (63, 0xD800, 0xDFFF), // Non-Plane 0 (note: surrogates, not directly usable)
3613    // ulUnicodeRange3 (bits 64-95)
3614    (64, 0x10000, 0x10FFFF), // Phoenician and other non-BMP (bit 64 indicates non-BMP support)
3615    (65, 0xF900, 0xFAFF), // CJK Compatibility Ideographs
3616    (66, 0xFB00, 0xFB4F), // Alphabetic Presentation Forms
3617    (67, 0xFB50, 0xFDFF), // Arabic Presentation Forms-A
3618    (68, 0xFE00, 0xFE0F), // Variation Selectors
3619    (69, 0xFE10, 0xFE1F), // Vertical Forms
3620    (70, 0xFE20, 0xFE2F), // Combining Half Marks
3621    (71, 0xFE30, 0xFE4F), // CJK Compatibility Forms
3622    (72, 0xFE50, 0xFE6F), // Small Form Variants
3623    (73, 0xFE70, 0xFEFF), // Arabic Presentation Forms-B
3624    (74, 0xFF00, 0xFFEF), // Halfwidth And Fullwidth Forms
3625    (75, 0xFFF0, 0xFFFF), // Specials
3626    (76, 0x0F00, 0x0FFF), // Tibetan
3627    (77, 0x0700, 0x074F), // Syriac
3628    (78, 0x0780, 0x07BF), // Thaana
3629    (79, 0x0D80, 0x0DFF), // Sinhala
3630    (80, 0x1000, 0x109F), // Myanmar
3631    (81, 0x1200, 0x137F), // Ethiopic
3632    (82, 0x13A0, 0x13FF), // Cherokee
3633    (83, 0x1400, 0x167F), // Unified Canadian Aboriginal Syllabics
3634    (84, 0x1680, 0x169F), // Ogham
3635    (85, 0x16A0, 0x16FF), // Runic
3636    (86, 0x1780, 0x17FF), // Khmer
3637    (87, 0x1800, 0x18AF), // Mongolian
3638    (88, 0x2800, 0x28FF), // Braille Patterns
3639    (89, 0xA000, 0xA48F), // Yi Syllables
3640    (90, 0x1680, 0x169F), // Ogham
3641    (91, 0x16A0, 0x16FF), // Runic
3642    (92, 0x1700, 0x171F), // Tagalog
3643    (93, 0x1720, 0x173F), // Hanunoo
3644    (94, 0x1740, 0x175F), // Buhid
3645    (95, 0x1760, 0x177F), // Tagbanwa
3646    // ulUnicodeRange4 (bits 96-127)
3647    (96, 0x1900, 0x194F), // Limbu
3648    (97, 0x1950, 0x197F), // Tai Le
3649    (98, 0x1980, 0x19DF), // New Tai Lue
3650    (99, 0x1A00, 0x1A1F), // Buginese
3651    (100, 0x2C00, 0x2C5F), // Glagolitic
3652    (101, 0x2D30, 0x2D7F), // Tifinagh
3653    (102, 0x4DC0, 0x4DFF), // Yijing Hexagram Symbols
3654    (103, 0xA800, 0xA82F), // Syloti Nagri
3655    (104, 0x10000, 0x1007F), // Linear B Syllabary
3656    (105, 0x10080, 0x100FF), // Linear B Ideograms
3657    (106, 0x10100, 0x1013F), // Aegean Numbers
3658    (107, 0x10140, 0x1018F), // Ancient Greek Numbers
3659    (108, 0x10300, 0x1032F), // Old Italic
3660    (109, 0x10330, 0x1034F), // Gothic
3661    (110, 0x10380, 0x1039F), // Ugaritic
3662    (111, 0x103A0, 0x103DF), // Old Persian
3663    (112, 0x10400, 0x1044F), // Deseret
3664    (113, 0x10450, 0x1047F), // Shavian
3665    (114, 0x10480, 0x104AF), // Osmanya
3666    (115, 0x10800, 0x1083F), // Cypriot Syllabary
3667    (116, 0x10A00, 0x10A5F), // Kharoshthi
3668    (117, 0x1D000, 0x1D0FF), // Byzantine Musical Symbols
3669    (118, 0x1D100, 0x1D1FF), // Musical Symbols
3670    (119, 0x1D200, 0x1D24F), // Ancient Greek Musical Notation
3671    (120, 0x1D300, 0x1D35F), // Tai Xuan Jing Symbols
3672    (121, 0x1D400, 0x1D7FF), // Mathematical Alphanumeric Symbols
3673    (122, 0x1F000, 0x1F02F), // Mahjong Tiles
3674    (123, 0x1F030, 0x1F09F), // Domino Tiles
3675    (124, 0x1F300, 0x1F9FF), // Miscellaneous Symbols And Pictographs (Emoji)
3676    (125, 0x1F680, 0x1F6FF), // Transport And Map Symbols
3677    (126, 0x1F700, 0x1F77F), // Alchemical Symbols
3678    (127, 0x1F900, 0x1F9FF), // Supplemental Symbols and Pictographs
3679];
3680
3681/// Intermediate parsed data from a single font face within a font file.
3682/// Used to share parsing logic between `FcParseFont` and `FcParseFontBytesInner`.
3683#[cfg(all(feature = "std", feature = "parsing"))]
3684struct ParsedFontFace {
3685    pattern: FcPattern,
3686    font_index: usize,
3687}
3688
3689/// Parse all font table data from a single font face and return the extracted patterns.
3690///
3691/// This is the shared core of `FcParseFont` and `FcParseFontBytesInner`:
3692/// TTC detection, font table parsing, OS/2/head/post reading, unicode range extraction,
3693/// CMAP verification, monospace detection, metadata extraction, and pattern creation.
3694#[cfg(all(feature = "std", feature = "parsing"))]
3695fn parse_font_faces(font_bytes: &[u8]) -> Option<Vec<ParsedFontFace>> {
3696    use allsorts::{
3697        binary::read::ReadScope,
3698        font_data::FontData,
3699        get_name::fontcode_get_name,
3700        post::PostTable,
3701        tables::{
3702            os2::Os2, HeadTable, NameTable,
3703        },
3704        tag,
3705    };
3706    use std::collections::BTreeSet;
3707
3708    const FONT_SPECIFIER_NAME_ID: u16 = 4;
3709    const FONT_SPECIFIER_FAMILY_ID: u16 = 1;
3710
3711    let max_fonts = if font_bytes.len() >= 12 && &font_bytes[0..4] == b"ttcf" {
3712        // Read numFonts from TTC header (offset 8, 4 bytes)
3713        let num_fonts =
3714            u32::from_be_bytes([font_bytes[8], font_bytes[9], font_bytes[10], font_bytes[11]]);
3715        // Cap at a reasonable maximum as a safety measure
3716        std::cmp::min(num_fonts as usize, 100)
3717    } else {
3718        // Not a collection, just one font
3719        1
3720    };
3721
3722    let scope = ReadScope::new(font_bytes);
3723    let font_file = scope.read::<FontData<'_>>().ok()?;
3724
3725    // Handle collections properly by iterating through all fonts
3726    let mut results = Vec::new();
3727
3728    for font_index in 0..max_fonts {
3729        let provider = font_file.table_provider(font_index).ok()?;
3730        let head_data = provider.table_data(tag::HEAD).ok()??.into_owned();
3731        let head_table = ReadScope::new(&head_data).read::<HeadTable>().ok()?;
3732
3733        let is_bold = head_table.is_bold();
3734        let is_italic = head_table.is_italic();
3735        let mut detected_monospace = None;
3736
3737        let post_data = provider.table_data(tag::POST).ok()??;
3738        if let Ok(post_table) = ReadScope::new(&post_data).read::<PostTable>() {
3739            // isFixedPitch here - https://learn.microsoft.com/en-us/typography/opentype/spec/post#header
3740            detected_monospace = Some(post_table.header.is_fixed_pitch != 0);
3741        }
3742
3743        // Get font properties from OS/2 table
3744        let os2_data = provider.table_data(tag::OS_2).ok()??;
3745        let os2_table = ReadScope::new(&os2_data)
3746            .read_dep::<Os2>(os2_data.len())
3747            .ok()?;
3748
3749        // Extract additional style information
3750        let is_oblique = os2_table
3751            .fs_selection
3752            .contains(allsorts::tables::os2::FsSelection::OBLIQUE);
3753        let weight = FcWeight::from_u16(os2_table.us_weight_class);
3754        let stretch = FcStretch::from_u16(os2_table.us_width_class);
3755
3756        // Extract unicode ranges from OS/2 table (fast, but may be inaccurate)
3757        // These are hints about what the font *should* support
3758        // For actual glyph coverage verification, query the font file directly
3759        let mut unicode_ranges = Vec::new();
3760
3761        // Process the 4 Unicode range bitfields from OS/2 table
3762        let os2_ranges = [
3763            os2_table.ul_unicode_range1,
3764            os2_table.ul_unicode_range2,
3765            os2_table.ul_unicode_range3,
3766            os2_table.ul_unicode_range4,
3767        ];
3768
3769        for &(bit, start, end) in UNICODE_RANGE_MAPPINGS {
3770            let range_idx = bit / 32;
3771            let bit_pos = bit % 32;
3772            if range_idx < 4 && (os2_ranges[range_idx] & (1 << bit_pos)) != 0 {
3773                unicode_ranges.push(UnicodeRange { start, end });
3774            }
3775        }
3776
3777        // Verify OS/2 reported ranges against actual CMAP support
3778        // OS/2 ulUnicodeRange bits can be unreliable - fonts may claim support
3779        // for ranges they don't actually have glyphs for
3780        unicode_ranges = verify_unicode_ranges_with_cmap(&provider, unicode_ranges);
3781
3782        // If still empty (OS/2 had no ranges or all were invalid), do full CMAP analysis
3783        if unicode_ranges.is_empty() {
3784            if let Some(cmap_ranges) = analyze_cmap_coverage(&provider) {
3785                unicode_ranges = cmap_ranges;
3786            }
3787        }
3788
3789        // Use the shared detect_monospace helper for PANOSE + hmtx fallback
3790        let is_monospace = detect_monospace(&provider, &os2_table, detected_monospace)
3791            .unwrap_or(false);
3792
3793        let name_data = provider.table_data(tag::NAME).ok()??.into_owned();
3794        let name_table = ReadScope::new(&name_data).read::<NameTable>().ok()?;
3795
3796        // Extract metadata from name table
3797        let mut metadata = FcFontMetadata::default();
3798
3799        const NAME_ID_COPYRIGHT: u16 = 0;
3800        const NAME_ID_FAMILY: u16 = 1;
3801        const NAME_ID_SUBFAMILY: u16 = 2;
3802        const NAME_ID_UNIQUE_ID: u16 = 3;
3803        const NAME_ID_FULL_NAME: u16 = 4;
3804        const NAME_ID_VERSION: u16 = 5;
3805        const NAME_ID_POSTSCRIPT_NAME: u16 = 6;
3806        const NAME_ID_TRADEMARK: u16 = 7;
3807        const NAME_ID_MANUFACTURER: u16 = 8;
3808        const NAME_ID_DESIGNER: u16 = 9;
3809        const NAME_ID_DESCRIPTION: u16 = 10;
3810        const NAME_ID_VENDOR_URL: u16 = 11;
3811        const NAME_ID_DESIGNER_URL: u16 = 12;
3812        const NAME_ID_LICENSE: u16 = 13;
3813        const NAME_ID_LICENSE_URL: u16 = 14;
3814        const NAME_ID_PREFERRED_FAMILY: u16 = 16;
3815        const NAME_ID_PREFERRED_SUBFAMILY: u16 = 17;
3816
3817        metadata.copyright = get_name_string(&name_data, NAME_ID_COPYRIGHT);
3818        metadata.font_family = get_name_string(&name_data, NAME_ID_FAMILY);
3819        metadata.font_subfamily = get_name_string(&name_data, NAME_ID_SUBFAMILY);
3820        metadata.full_name = get_name_string(&name_data, NAME_ID_FULL_NAME);
3821        metadata.unique_id = get_name_string(&name_data, NAME_ID_UNIQUE_ID);
3822        metadata.version = get_name_string(&name_data, NAME_ID_VERSION);
3823        metadata.postscript_name = get_name_string(&name_data, NAME_ID_POSTSCRIPT_NAME);
3824        metadata.trademark = get_name_string(&name_data, NAME_ID_TRADEMARK);
3825        metadata.manufacturer = get_name_string(&name_data, NAME_ID_MANUFACTURER);
3826        metadata.designer = get_name_string(&name_data, NAME_ID_DESIGNER);
3827        metadata.id_description = get_name_string(&name_data, NAME_ID_DESCRIPTION);
3828        metadata.designer_url = get_name_string(&name_data, NAME_ID_DESIGNER_URL);
3829        metadata.manufacturer_url = get_name_string(&name_data, NAME_ID_VENDOR_URL);
3830        metadata.license = get_name_string(&name_data, NAME_ID_LICENSE);
3831        metadata.license_url = get_name_string(&name_data, NAME_ID_LICENSE_URL);
3832        metadata.preferred_family = get_name_string(&name_data, NAME_ID_PREFERRED_FAMILY);
3833        metadata.preferred_subfamily = get_name_string(&name_data, NAME_ID_PREFERRED_SUBFAMILY);
3834
3835        // One font can support multiple patterns
3836        let mut f_family = None;
3837
3838        let patterns = name_table
3839            .name_records
3840            .iter()
3841            .filter_map(|name_record| {
3842                let name_id = name_record.name_id;
3843                if name_id == FONT_SPECIFIER_FAMILY_ID {
3844                    if let Ok(Some(family)) =
3845                        fontcode_get_name(&name_data, FONT_SPECIFIER_FAMILY_ID)
3846                    {
3847                        f_family = Some(family);
3848                    }
3849                    None
3850                } else if name_id == FONT_SPECIFIER_NAME_ID {
3851                    let family = f_family.as_ref()?;
3852                    let name = fontcode_get_name(&name_data, FONT_SPECIFIER_NAME_ID).ok()??;
3853                    if name.to_bytes().is_empty() {
3854                        None
3855                    } else {
3856                        let mut name_str =
3857                            String::from_utf8_lossy(name.to_bytes()).to_string();
3858                        let mut family_str =
3859                            String::from_utf8_lossy(family.as_bytes()).to_string();
3860                        if name_str.starts_with('.') {
3861                            name_str = name_str[1..].to_string();
3862                        }
3863                        if family_str.starts_with('.') {
3864                            family_str = family_str[1..].to_string();
3865                        }
3866                        Some((
3867                            FcPattern {
3868                                name: Some(name_str),
3869                                family: Some(family_str),
3870                                bold: if is_bold {
3871                                    PatternMatch::True
3872                                } else {
3873                                    PatternMatch::False
3874                                },
3875                                italic: if is_italic {
3876                                    PatternMatch::True
3877                                } else {
3878                                    PatternMatch::False
3879                                },
3880                                oblique: if is_oblique {
3881                                    PatternMatch::True
3882                                } else {
3883                                    PatternMatch::False
3884                                },
3885                                monospace: if is_monospace {
3886                                    PatternMatch::True
3887                                } else {
3888                                    PatternMatch::False
3889                                },
3890                                condensed: if stretch <= FcStretch::Condensed {
3891                                    PatternMatch::True
3892                                } else {
3893                                    PatternMatch::False
3894                                },
3895                                weight,
3896                                stretch,
3897                                unicode_ranges: unicode_ranges.clone(),
3898                                metadata: metadata.clone(),
3899                                render_config: FcFontRenderConfig::default(),
3900                            },
3901                            font_index,
3902                        ))
3903                    }
3904                } else {
3905                    None
3906                }
3907            })
3908            .collect::<BTreeSet<_>>();
3909
3910        results.extend(patterns.into_iter().map(|(pat, idx)| ParsedFontFace {
3911            pattern: pat,
3912            font_index: idx,
3913        }));
3914    }
3915
3916    if results.is_empty() {
3917        None
3918    } else {
3919        Some(results)
3920    }
3921}
3922
3923// Remaining implementation for font scanning, parsing, etc.
3924#[cfg(all(feature = "std", feature = "parsing"))]
3925pub(crate) fn FcParseFont(filepath: &PathBuf) -> Option<Vec<(FcPattern, FcFontPath)>> {
3926    #[cfg(all(not(target_family = "wasm"), feature = "std"))]
3927    use mmapio::MmapOptions;
3928    use std::fs::File;
3929
3930    // Try parsing the font file and see if the postscript name matches
3931    let file = File::open(filepath).ok()?;
3932
3933    #[cfg(all(not(target_family = "wasm"), feature = "std"))]
3934    let font_bytes = unsafe { MmapOptions::new().map(&file).ok()? };
3935
3936    #[cfg(not(all(not(target_family = "wasm"), feature = "std")))]
3937    let font_bytes = std::fs::read(filepath).ok()?;
3938
3939    let faces = parse_font_faces(&font_bytes[..])?;
3940    let path_str = filepath.to_string_lossy().to_string();
3941    // Hash once per file — every face of a .ttc shares this value,
3942    // so the shared-bytes cache can return the same Arc<[u8]> for
3943    // all of them. Use the cheap sampled variant so the scout doesn't
3944    // page-fault the full file into RSS just to produce a dedup key.
3945    let bytes_hash = crate::utils::content_dedup_hash_u64(&font_bytes[..]);
3946
3947    Some(
3948        faces
3949            .into_iter()
3950            .map(|face| {
3951                (
3952                    face.pattern,
3953                    FcFontPath {
3954                        path: path_str.clone(),
3955                        font_index: face.font_index,
3956                        bytes_hash,
3957                    },
3958                )
3959            })
3960            .collect(),
3961    )
3962}
3963
3964/// Coverage info returned by a fast-probe parse.
3965///
3966/// Produced by [`FcParseFontFaceFast`] / [`FcProbeCoverage`] — the
3967/// v4.2 "cheap cmap-only" entry point. Unlike `parse_font_faces`,
3968/// this path does **not** read NAME, OS/2, POST, HHEA, HMTX, HEAD's
3969/// style metadata, or anything else. It only reads the table
3970/// directory, `head.macStyle` (2 bytes), and the cmap subtable that
3971/// matches the codepoints we care about. ~1 ms/face on warm FS
3972/// cache vs ~13 ms for the full parse.
3973///
3974/// The `pattern.unicode_ranges` is populated from the *actual* cmap
3975/// contents (one `UnicodeRange` per covered codepoint in the input
3976/// set) rather than the OS/2 `ulUnicodeRange` bitfield. That's more
3977/// precise (OS/2 bits lie on many fonts — they're hints, not ground
3978/// truth) and means `FontFallbackChain::resolve_char`'s coverage
3979/// check matches what the shaper can actually render.
3980#[cfg(all(feature = "std", feature = "parsing"))]
3981#[derive(Debug, Clone)]
3982pub struct FastCoverage {
3983    /// Metadata pattern with `unicode_ranges` populated from the
3984    /// codepoints this face covered from the request set. `name` /
3985    /// `family` fields are left empty — callers already have the
3986    /// filename-guessed family in [`FcFontRegistry.known_paths`];
3987    /// we avoid the NAME table read entirely.
3988    pub pattern: FcPattern,
3989    /// Subset of the input codepoints that this face covers (maps
3990    /// to a non-zero gid via the best cmap subtable). May be empty
3991    /// if the face covers none, in which case callers should fall
3992    /// through to the next candidate path.
3993    pub covered: alloc::collections::BTreeSet<char>,
3994    /// `head.macStyle.bold` (bit 0).
3995    pub is_bold: bool,
3996    /// `head.macStyle.italic` (bit 1).
3997    pub is_italic: bool,
3998}
3999
4000/// Fast per-face coverage probe.
4001///
4002/// Opens the provided font bytes as a `FontData` (detects TTC
4003/// collections), walks the given face, reads `head.macStyle` for
4004/// bold/italic flags, picks the best cmap subtable, and records
4005/// which of the requested codepoints have a non-zero gid.
4006///
4007/// Cost: table-dir parse + head (54 bytes) + cmap (5-100 KiB,
4008/// faulted in from mmap). No heap allocation besides the
4009/// covered-codepoints set and the returned `FcPattern`.
4010///
4011/// Returns `None` only if the font bytes are structurally bad or
4012/// the face index is out of range — empty coverage returns
4013/// `Some` with `covered.is_empty()`, so the caller can distinguish
4014/// "this face doesn't have the char we want" (try next face) from
4015/// "this file is corrupt" (give up on the whole file).
4016#[cfg(all(feature = "std", feature = "parsing"))]
4017#[allow(non_snake_case)]
4018pub fn FcParseFontFaceFast(
4019    font_bytes: &[u8],
4020    font_index: usize,
4021    codepoints: &alloc::collections::BTreeSet<char>,
4022) -> Option<FastCoverage> {
4023    use allsorts::{
4024        binary::read::ReadScope,
4025        font_data::FontData,
4026        tables::{
4027            cmap::{Cmap, CmapSubtable},
4028            FontTableProvider, HeadTable,
4029        },
4030        tag,
4031    };
4032
4033    let scope = ReadScope::new(font_bytes);
4034    let font_file = scope.read::<FontData<'_>>().ok()?;
4035    let provider = font_file.table_provider(font_index).ok()?;
4036
4037    // head — 54 bytes, macStyle at offset 44. Cheap.
4038    let head_data = provider.table_data(tag::HEAD).ok()??;
4039    let head_table = ReadScope::new(&head_data).read::<HeadTable>().ok()?;
4040    let is_bold = head_table.is_bold();
4041    let is_italic = head_table.is_italic();
4042
4043    // cmap — find the best Unicode subtable, probe each codepoint.
4044    // The mmap page-cache only faults in the bytes we touch.
4045    let cmap_data = provider.table_data(tag::CMAP).ok()??;
4046    let cmap = ReadScope::new(&cmap_data).read::<Cmap<'_>>().ok()?;
4047    let encoding_record = find_best_cmap_subtable(&cmap)?;
4048    let cmap_subtable = ReadScope::new(&cmap_data)
4049        .offset(encoding_record.offset as usize)
4050        .read::<CmapSubtable<'_>>()
4051        .ok()?;
4052
4053    let mut covered: alloc::collections::BTreeSet<char> =
4054        alloc::collections::BTreeSet::new();
4055    let mut covered_ranges: Vec<UnicodeRange> = Vec::new();
4056    for ch in codepoints {
4057        let cp = *ch as u32;
4058        if let Ok(Some(gid)) = cmap_subtable.map_glyph(cp) {
4059            if gid != 0 {
4060                covered.insert(*ch);
4061                // Accumulate into ranges for the FcPattern. Merge
4062                // adjacent codepoints so `unicode_ranges` stays
4063                // compact (common case on Western text: one range).
4064                if let Some(last) = covered_ranges.last_mut() {
4065                    if cp == last.end + 1 {
4066                        last.end = cp;
4067                        continue;
4068                    }
4069                }
4070                covered_ranges.push(UnicodeRange { start: cp, end: cp });
4071            }
4072        }
4073    }
4074
4075    let weight = if is_bold {
4076        FcWeight::Bold
4077    } else {
4078        FcWeight::Normal
4079    };
4080    let italic_match = if is_italic {
4081        PatternMatch::True
4082    } else {
4083        PatternMatch::False
4084    };
4085
4086    let pattern = FcPattern {
4087        name: None,
4088        family: None,
4089        weight,
4090        italic: italic_match,
4091        oblique: PatternMatch::DontCare,
4092        monospace: PatternMatch::DontCare,
4093        unicode_ranges: covered_ranges,
4094        ..Default::default()
4095    };
4096
4097    Some(FastCoverage {
4098        pattern,
4099        covered,
4100        is_bold,
4101        is_italic,
4102    })
4103}
4104
4105/// Count the number of faces inside a TTC, or `1` for a single-face
4106/// font file. Used by [`FcFontRegistry::request_fonts_fast`] to
4107/// iterate every face in a `.ttc` without paying the full-parse
4108/// cost (the TTC header is 12 bytes).
4109#[cfg(all(feature = "std", feature = "parsing"))]
4110#[allow(non_snake_case)]
4111pub fn FcCountFontFaces(font_bytes: &[u8]) -> usize {
4112    if font_bytes.len() >= 12 && &font_bytes[0..4] == b"ttcf" {
4113        let num_fonts = u32::from_be_bytes([
4114            font_bytes[8], font_bytes[9], font_bytes[10], font_bytes[11],
4115        ]);
4116        // Same cap as parse_font_faces, for safety.
4117        std::cmp::min(num_fonts as usize, 100).max(1)
4118    } else {
4119        1
4120    }
4121}
4122
4123/// Parse font bytes and extract font patterns for in-memory fonts.
4124///
4125/// This is the public API for parsing in-memory font data to create
4126/// `(FcPattern, FcFont)` tuples that can be added to an `FcFontCache`
4127/// via `with_memory_fonts()`.
4128///
4129/// # Arguments
4130/// * `font_bytes` - The raw bytes of a TrueType/OpenType font file
4131/// * `font_id` - An identifier string for this font (used internally)
4132///
4133/// # Returns
4134/// A vector of `(FcPattern, FcFont)` tuples, one for each font face in the file.
4135/// Returns `None` if the font could not be parsed.
4136///
4137/// # Example
4138/// ```ignore
4139/// use rust_fontconfig::{FcFontCache, FcParseFontBytes};
4140///
4141/// let font_bytes = include_bytes!("path/to/font.ttf");
4142/// let mut cache = FcFontCache::default();
4143///
4144/// if let Some(fonts) = FcParseFontBytes(font_bytes, "MyFont") {
4145///     cache.with_memory_fonts(fonts);
4146/// }
4147/// ```
4148#[cfg(all(feature = "std", feature = "parsing"))]
4149#[allow(non_snake_case)]
4150pub fn FcParseFontBytes(font_bytes: &[u8], font_id: &str) -> Option<Vec<(FcPattern, FcFont)>> {
4151    FcParseFontBytesInner(font_bytes, font_id)
4152}
4153
4154/// Internal implementation for parsing font bytes.
4155/// Delegates to `parse_font_faces` for shared parsing logic and wraps results as `FcFont`.
4156#[cfg(all(feature = "std", feature = "parsing"))]
4157fn FcParseFontBytesInner(font_bytes: &[u8], font_id: &str) -> Option<Vec<(FcPattern, FcFont)>> {
4158    let faces = parse_font_faces(font_bytes)?;
4159    let id = font_id.to_string();
4160    let bytes = font_bytes.to_vec();
4161
4162    Some(
4163        faces
4164            .into_iter()
4165            .map(|face| {
4166                (
4167                    face.pattern,
4168                    FcFont {
4169                        bytes: bytes.clone(),
4170                        font_index: face.font_index,
4171                        id: id.clone(),
4172                    },
4173                )
4174            })
4175            .collect(),
4176    )
4177}
4178
4179#[cfg(all(feature = "std", feature = "parsing"))]
4180fn FcScanDirectoriesInner(paths: &[(Option<String>, String)]) -> Vec<(FcPattern, FcFontPath)> {
4181    #[cfg(feature = "multithreading")]
4182    {
4183        use rayon::prelude::*;
4184
4185        // scan directories in parallel
4186        paths
4187            .par_iter()
4188            .filter_map(|(prefix, p)| {
4189                process_path(prefix, PathBuf::from(p), false).map(FcScanSingleDirectoryRecursive)
4190            })
4191            .flatten()
4192            .collect()
4193    }
4194    #[cfg(not(feature = "multithreading"))]
4195    {
4196        paths
4197            .iter()
4198            .filter_map(|(prefix, p)| {
4199                process_path(prefix, PathBuf::from(p), false).map(FcScanSingleDirectoryRecursive)
4200            })
4201            .flatten()
4202            .collect()
4203    }
4204}
4205
4206/// Recursively collect all files from a directory (no parsing, no allsorts).
4207#[cfg(feature = "std")]
4208fn FcCollectFontFilesRecursive(dir: PathBuf) -> Vec<PathBuf> {
4209    let mut files = Vec::new();
4210    let mut dirs_to_parse = vec![dir];
4211
4212    loop {
4213        let mut new_dirs = Vec::new();
4214        for dir in &dirs_to_parse {
4215            let entries = match std::fs::read_dir(dir) {
4216                Ok(o) => o,
4217                Err(_) => continue,
4218            };
4219            for entry in entries.flatten() {
4220                let path = entry.path();
4221                if path.is_dir() {
4222                    new_dirs.push(path);
4223                } else {
4224                    files.push(path);
4225                }
4226            }
4227        }
4228        if new_dirs.is_empty() {
4229            break;
4230        }
4231        dirs_to_parse = new_dirs;
4232    }
4233
4234    files
4235}
4236
4237#[cfg(all(feature = "std", feature = "parsing"))]
4238fn FcScanSingleDirectoryRecursive(dir: PathBuf) -> Vec<(FcPattern, FcFontPath)> {
4239    let files = FcCollectFontFilesRecursive(dir);
4240    FcParseFontFiles(&files)
4241}
4242
4243#[cfg(all(feature = "std", feature = "parsing"))]
4244fn FcParseFontFiles(files_to_parse: &[PathBuf]) -> Vec<(FcPattern, FcFontPath)> {
4245    let result = {
4246        #[cfg(feature = "multithreading")]
4247        {
4248            use rayon::prelude::*;
4249
4250            files_to_parse
4251                .par_iter()
4252                .filter_map(|file| FcParseFont(file))
4253                .collect::<Vec<Vec<_>>>()
4254        }
4255        #[cfg(not(feature = "multithreading"))]
4256        {
4257            files_to_parse
4258                .iter()
4259                .filter_map(|file| FcParseFont(file))
4260                .collect::<Vec<Vec<_>>>()
4261        }
4262    };
4263
4264    result.into_iter().flat_map(|f| f.into_iter()).collect()
4265}
4266
4267#[cfg(all(feature = "std", feature = "parsing"))]
4268/// Takes a path & prefix and resolves them to a usable path, or `None` if they're unsupported/unavailable.
4269///
4270/// Behaviour is based on: https://www.freedesktop.org/software/fontconfig/fontconfig-user.html
4271fn process_path(
4272    prefix: &Option<String>,
4273    mut path: PathBuf,
4274    is_include_path: bool,
4275) -> Option<PathBuf> {
4276    use std::env::var;
4277
4278    const HOME_SHORTCUT: &str = "~";
4279    const CWD_PATH: &str = ".";
4280
4281    const HOME_ENV_VAR: &str = "HOME";
4282    const XDG_CONFIG_HOME_ENV_VAR: &str = "XDG_CONFIG_HOME";
4283    const XDG_CONFIG_HOME_DEFAULT_PATH_SUFFIX: &str = ".config";
4284    const XDG_DATA_HOME_ENV_VAR: &str = "XDG_DATA_HOME";
4285    const XDG_DATA_HOME_DEFAULT_PATH_SUFFIX: &str = ".local/share";
4286
4287    const PREFIX_CWD: &str = "cwd";
4288    const PREFIX_DEFAULT: &str = "default";
4289    const PREFIX_XDG: &str = "xdg";
4290
4291    // These three could, in theory, be cached, but the work required to do so outweighs the minor benefits
4292    fn get_home_value() -> Option<PathBuf> {
4293        var(HOME_ENV_VAR).ok().map(PathBuf::from)
4294    }
4295    fn get_xdg_config_home_value() -> Option<PathBuf> {
4296        var(XDG_CONFIG_HOME_ENV_VAR)
4297            .ok()
4298            .map(PathBuf::from)
4299            .or_else(|| {
4300                get_home_value()
4301                    .map(|home_path| home_path.join(XDG_CONFIG_HOME_DEFAULT_PATH_SUFFIX))
4302            })
4303    }
4304    fn get_xdg_data_home_value() -> Option<PathBuf> {
4305        var(XDG_DATA_HOME_ENV_VAR)
4306            .ok()
4307            .map(PathBuf::from)
4308            .or_else(|| {
4309                get_home_value().map(|home_path| home_path.join(XDG_DATA_HOME_DEFAULT_PATH_SUFFIX))
4310            })
4311    }
4312
4313    // Resolve the tilde character in the path, if present
4314    if path.starts_with(HOME_SHORTCUT) {
4315        if let Some(home_path) = get_home_value() {
4316            path = home_path.join(
4317                path.strip_prefix(HOME_SHORTCUT)
4318                    .expect("already checked that it starts with the prefix"),
4319            );
4320        } else {
4321            return None;
4322        }
4323    }
4324
4325    // Resolve prefix values
4326    match prefix {
4327        Some(prefix) => match prefix.as_str() {
4328            PREFIX_CWD | PREFIX_DEFAULT => {
4329                let mut new_path = PathBuf::from(CWD_PATH);
4330                new_path.push(path);
4331
4332                Some(new_path)
4333            }
4334            PREFIX_XDG => {
4335                if is_include_path {
4336                    get_xdg_config_home_value()
4337                        .map(|xdg_config_home_path| xdg_config_home_path.join(path))
4338                } else {
4339                    get_xdg_data_home_value()
4340                        .map(|xdg_data_home_path| xdg_data_home_path.join(path))
4341                }
4342            }
4343            _ => None, // Unsupported prefix
4344        },
4345        None => Some(path),
4346    }
4347}
4348
4349// Helper function to extract a string from the name table
4350#[cfg(all(feature = "std", feature = "parsing"))]
4351fn get_name_string(name_data: &[u8], name_id: u16) -> Option<String> {
4352    fontcode_get_name(name_data, name_id)
4353        .ok()
4354        .flatten()
4355        .map(|name| String::from_utf8_lossy(name.to_bytes()).to_string())
4356}
4357
4358/// Representative test codepoints for each Unicode block.
4359/// These are carefully chosen to be actual script characters (not punctuation/symbols)
4360/// that a font claiming to support this script should definitely have.
4361#[cfg(all(feature = "std", feature = "parsing"))]
4362fn get_verification_codepoints(start: u32, end: u32) -> Vec<u32> {
4363    match start {
4364        // Basic Latin - test uppercase, lowercase, and digits
4365        0x0000 => vec!['A' as u32, 'M' as u32, 'Z' as u32, 'a' as u32, 'm' as u32, 'z' as u32],
4366        // Latin-1 Supplement - common accented letters
4367        0x0080 => vec![0x00C0, 0x00C9, 0x00D1, 0x00E0, 0x00E9, 0x00F1], // À É Ñ à é ñ
4368        // Latin Extended-A
4369        0x0100 => vec![0x0100, 0x0110, 0x0141, 0x0152, 0x0160], // Ā Đ Ł Œ Š
4370        // Latin Extended-B
4371        0x0180 => vec![0x0180, 0x01A0, 0x01B0, 0x01CD], // ƀ Ơ ư Ǎ
4372        // IPA Extensions
4373        0x0250 => vec![0x0250, 0x0259, 0x026A, 0x0279], // ɐ ə ɪ ɹ
4374        // Greek and Coptic
4375        0x0370 => vec![0x0391, 0x0392, 0x0393, 0x03B1, 0x03B2, 0x03C9], // Α Β Γ α β ω
4376        // Cyrillic
4377        0x0400 => vec![0x0410, 0x0411, 0x0412, 0x0430, 0x0431, 0x042F], // А Б В а б Я
4378        // Armenian
4379        0x0530 => vec![0x0531, 0x0532, 0x0533, 0x0561, 0x0562], // Ա Բ Գ ա բ
4380        // Hebrew
4381        0x0590 => vec![0x05D0, 0x05D1, 0x05D2, 0x05E9, 0x05EA], // א ב ג ש ת
4382        // Arabic
4383        0x0600 => vec![0x0627, 0x0628, 0x062A, 0x062C, 0x0645], // ا ب ت ج م
4384        // Syriac
4385        0x0700 => vec![0x0710, 0x0712, 0x0713, 0x0715], // ܐ ܒ ܓ ܕ
4386        // Devanagari
4387        0x0900 => vec![0x0905, 0x0906, 0x0915, 0x0916, 0x0939], // अ आ क ख ह
4388        // Bengali
4389        0x0980 => vec![0x0985, 0x0986, 0x0995, 0x0996], // অ আ ক খ
4390        // Gurmukhi
4391        0x0A00 => vec![0x0A05, 0x0A06, 0x0A15, 0x0A16], // ਅ ਆ ਕ ਖ
4392        // Gujarati
4393        0x0A80 => vec![0x0A85, 0x0A86, 0x0A95, 0x0A96], // અ આ ક ખ
4394        // Oriya
4395        0x0B00 => vec![0x0B05, 0x0B06, 0x0B15, 0x0B16], // ଅ ଆ କ ଖ
4396        // Tamil
4397        0x0B80 => vec![0x0B85, 0x0B86, 0x0B95, 0x0BA4], // அ ஆ க த
4398        // Telugu
4399        0x0C00 => vec![0x0C05, 0x0C06, 0x0C15, 0x0C16], // అ ఆ క ఖ
4400        // Kannada
4401        0x0C80 => vec![0x0C85, 0x0C86, 0x0C95, 0x0C96], // ಅ ಆ ಕ ಖ
4402        // Malayalam
4403        0x0D00 => vec![0x0D05, 0x0D06, 0x0D15, 0x0D16], // അ ആ ക ഖ
4404        // Thai
4405        0x0E00 => vec![0x0E01, 0x0E02, 0x0E04, 0x0E07, 0x0E40], // ก ข ค ง เ
4406        // Lao
4407        0x0E80 => vec![0x0E81, 0x0E82, 0x0E84, 0x0E87], // ກ ຂ ຄ ງ
4408        // Myanmar
4409        0x1000 => vec![0x1000, 0x1001, 0x1002, 0x1010, 0x1019], // က ခ ဂ တ မ
4410        // Georgian
4411        0x10A0 => vec![0x10D0, 0x10D1, 0x10D2, 0x10D3], // ა ბ გ დ
4412        // Hangul Jamo
4413        0x1100 => vec![0x1100, 0x1102, 0x1103, 0x1161, 0x1162], // ᄀ ᄂ ᄃ ᅡ ᅢ
4414        // Ethiopic
4415        0x1200 => vec![0x1200, 0x1208, 0x1210, 0x1218], // ሀ ለ ሐ መ
4416        // Cherokee
4417        0x13A0 => vec![0x13A0, 0x13A1, 0x13A2, 0x13A3], // Ꭰ Ꭱ Ꭲ Ꭳ
4418        // Khmer
4419        0x1780 => vec![0x1780, 0x1781, 0x1782, 0x1783], // ក ខ គ ឃ
4420        // Mongolian
4421        0x1800 => vec![0x1820, 0x1821, 0x1822, 0x1823], // ᠠ ᠡ ᠢ ᠣ
4422        // Hiragana
4423        0x3040 => vec![0x3042, 0x3044, 0x3046, 0x304B, 0x304D, 0x3093], // あ い う か き ん
4424        // Katakana
4425        0x30A0 => vec![0x30A2, 0x30A4, 0x30A6, 0x30AB, 0x30AD, 0x30F3], // ア イ ウ カ キ ン
4426        // Bopomofo
4427        0x3100 => vec![0x3105, 0x3106, 0x3107, 0x3108], // ㄅ ㄆ ㄇ ㄈ
4428        // CJK Unified Ideographs - common characters
4429        0x4E00 => vec![0x4E00, 0x4E2D, 0x4EBA, 0x5927, 0x65E5, 0x6708], // 一 中 人 大 日 月
4430        // Hangul Syllables
4431        0xAC00 => vec![0xAC00, 0xAC01, 0xAC04, 0xB098, 0xB2E4], // 가 각 간 나 다
4432        // CJK Compatibility Ideographs
4433        0xF900 => vec![0xF900, 0xF901, 0xF902], // 豈 更 車
4434        // Arabic Presentation Forms-A
4435        0xFB50 => vec![0xFB50, 0xFB51, 0xFB52, 0xFB56], // ﭐ ﭑ ﭒ ﭖ
4436        // Arabic Presentation Forms-B
4437        0xFE70 => vec![0xFE70, 0xFE72, 0xFE74, 0xFE76], // ﹰ ﹲ ﹴ ﹶ
4438        // Halfwidth and Fullwidth Forms
4439        0xFF00 => vec![0xFF01, 0xFF21, 0xFF41, 0xFF61], // ! A a 。
4440        // Default: sample at regular intervals
4441        _ => {
4442            let range_size = end - start;
4443            if range_size > 20 {
4444                vec![
4445                    start + range_size / 5,
4446                    start + 2 * range_size / 5,
4447                    start + 3 * range_size / 5,
4448                    start + 4 * range_size / 5,
4449                ]
4450            } else {
4451                vec![start, start + range_size / 2]
4452            }
4453        }
4454    }
4455}
4456
4457/// Find the best Unicode CMAP subtable from a font provider.
4458/// Tries multiple platform/encoding combinations in priority order.
4459#[cfg(all(feature = "std", feature = "parsing"))]
4460fn find_best_cmap_subtable<'a>(
4461    cmap: &allsorts::tables::cmap::Cmap<'a>,
4462) -> Option<allsorts::tables::cmap::EncodingRecord> {
4463    use allsorts::tables::cmap::{PlatformId, EncodingId};
4464
4465    cmap.find_subtable(PlatformId::UNICODE, EncodingId(3))
4466        .or_else(|| cmap.find_subtable(PlatformId::UNICODE, EncodingId(4)))
4467        .or_else(|| cmap.find_subtable(PlatformId::WINDOWS, EncodingId(1)))
4468        .or_else(|| cmap.find_subtable(PlatformId::WINDOWS, EncodingId(10)))
4469        .or_else(|| cmap.find_subtable(PlatformId::UNICODE, EncodingId(0)))
4470        .or_else(|| cmap.find_subtable(PlatformId::UNICODE, EncodingId(1)))
4471}
4472
4473/// Verify OS/2 reported Unicode ranges against actual CMAP support.
4474/// Returns only ranges that are actually supported by the font's CMAP table.
4475#[cfg(all(feature = "std", feature = "parsing"))]
4476fn verify_unicode_ranges_with_cmap(
4477    provider: &impl FontTableProvider,
4478    os2_ranges: Vec<UnicodeRange>
4479) -> Vec<UnicodeRange> {
4480    use allsorts::tables::cmap::{Cmap, CmapSubtable};
4481
4482    if os2_ranges.is_empty() {
4483        return Vec::new();
4484    }
4485
4486    // Try to get CMAP subtable
4487    let cmap_data = match provider.table_data(tag::CMAP) {
4488        Ok(Some(data)) => data,
4489        _ => return os2_ranges, // Can't verify, trust OS/2
4490    };
4491
4492    let cmap = match ReadScope::new(&cmap_data).read::<Cmap<'_>>() {
4493        Ok(c) => c,
4494        Err(_) => return os2_ranges,
4495    };
4496
4497    let encoding_record = match find_best_cmap_subtable(&cmap) {
4498        Some(r) => r,
4499        None => return os2_ranges, // No suitable subtable, trust OS/2
4500    };
4501
4502    let cmap_subtable = match ReadScope::new(&cmap_data)
4503        .offset(encoding_record.offset as usize)
4504        .read::<CmapSubtable<'_>>()
4505    {
4506        Ok(st) => st,
4507        Err(_) => return os2_ranges,
4508    };
4509
4510    // Verify each range
4511    let mut verified_ranges = Vec::new();
4512
4513    for range in os2_ranges {
4514        let test_codepoints = get_verification_codepoints(range.start, range.end);
4515
4516        // Require at least 50% of test codepoints to have valid glyphs
4517        // This is stricter than before to avoid false positives
4518        let required_hits = (test_codepoints.len() + 1) / 2; // ceil(len/2)
4519        let mut hits = 0;
4520
4521        for cp in test_codepoints {
4522            if cp >= range.start && cp <= range.end {
4523                if let Ok(Some(gid)) = cmap_subtable.map_glyph(cp) {
4524                    if gid != 0 {
4525                        hits += 1;
4526                        if hits >= required_hits {
4527                            break;
4528                        }
4529                    }
4530                }
4531            }
4532        }
4533
4534        if hits >= required_hits {
4535            verified_ranges.push(range);
4536        }
4537    }
4538
4539    verified_ranges
4540}
4541
4542/// Analyze CMAP table to discover font coverage when OS/2 provides no info.
4543/// This is the fallback when OS/2 ulUnicodeRange bits are all zero.
4544#[cfg(all(feature = "std", feature = "parsing"))]
4545fn analyze_cmap_coverage(provider: &impl FontTableProvider) -> Option<Vec<UnicodeRange>> {
4546    use allsorts::tables::cmap::{Cmap, CmapSubtable};
4547
4548    let cmap_data = provider.table_data(tag::CMAP).ok()??;
4549    let cmap = ReadScope::new(&cmap_data).read::<Cmap<'_>>().ok()?;
4550
4551    let encoding_record = find_best_cmap_subtable(&cmap)?;
4552
4553    let cmap_subtable = ReadScope::new(&cmap_data)
4554        .offset(encoding_record.offset as usize)
4555        .read::<CmapSubtable<'_>>()
4556        .ok()?;
4557
4558    // Standard Unicode blocks to probe
4559    let blocks_to_check: &[(u32, u32)] = &[
4560        (0x0000, 0x007F), // Basic Latin
4561        (0x0080, 0x00FF), // Latin-1 Supplement
4562        (0x0100, 0x017F), // Latin Extended-A
4563        (0x0180, 0x024F), // Latin Extended-B
4564        (0x0250, 0x02AF), // IPA Extensions
4565        (0x0300, 0x036F), // Combining Diacritical Marks
4566        (0x0370, 0x03FF), // Greek and Coptic
4567        (0x0400, 0x04FF), // Cyrillic
4568        (0x0500, 0x052F), // Cyrillic Supplement
4569        (0x0530, 0x058F), // Armenian
4570        (0x0590, 0x05FF), // Hebrew
4571        (0x0600, 0x06FF), // Arabic
4572        (0x0700, 0x074F), // Syriac
4573        (0x0900, 0x097F), // Devanagari
4574        (0x0980, 0x09FF), // Bengali
4575        (0x0A00, 0x0A7F), // Gurmukhi
4576        (0x0A80, 0x0AFF), // Gujarati
4577        (0x0B00, 0x0B7F), // Oriya
4578        (0x0B80, 0x0BFF), // Tamil
4579        (0x0C00, 0x0C7F), // Telugu
4580        (0x0C80, 0x0CFF), // Kannada
4581        (0x0D00, 0x0D7F), // Malayalam
4582        (0x0E00, 0x0E7F), // Thai
4583        (0x0E80, 0x0EFF), // Lao
4584        (0x1000, 0x109F), // Myanmar
4585        (0x10A0, 0x10FF), // Georgian
4586        (0x1100, 0x11FF), // Hangul Jamo
4587        (0x1200, 0x137F), // Ethiopic
4588        (0x13A0, 0x13FF), // Cherokee
4589        (0x1780, 0x17FF), // Khmer
4590        (0x1800, 0x18AF), // Mongolian
4591        (0x2000, 0x206F), // General Punctuation
4592        (0x20A0, 0x20CF), // Currency Symbols
4593        (0x2100, 0x214F), // Letterlike Symbols
4594        (0x2190, 0x21FF), // Arrows
4595        (0x2200, 0x22FF), // Mathematical Operators
4596        (0x2500, 0x257F), // Box Drawing
4597        (0x25A0, 0x25FF), // Geometric Shapes
4598        (0x2600, 0x26FF), // Miscellaneous Symbols
4599        (0x3000, 0x303F), // CJK Symbols and Punctuation
4600        (0x3040, 0x309F), // Hiragana
4601        (0x30A0, 0x30FF), // Katakana
4602        (0x3100, 0x312F), // Bopomofo
4603        (0x3130, 0x318F), // Hangul Compatibility Jamo
4604        (0x4E00, 0x9FFF), // CJK Unified Ideographs
4605        (0xAC00, 0xD7AF), // Hangul Syllables
4606        (0xF900, 0xFAFF), // CJK Compatibility Ideographs
4607        (0xFB50, 0xFDFF), // Arabic Presentation Forms-A
4608        (0xFE70, 0xFEFF), // Arabic Presentation Forms-B
4609        (0xFF00, 0xFFEF), // Halfwidth and Fullwidth Forms
4610    ];
4611
4612    let mut ranges = Vec::new();
4613
4614    for &(start, end) in blocks_to_check {
4615        let test_codepoints = get_verification_codepoints(start, end);
4616        let required_hits = (test_codepoints.len() + 1) / 2;
4617        let mut hits = 0;
4618
4619        for cp in test_codepoints {
4620            if let Ok(Some(gid)) = cmap_subtable.map_glyph(cp) {
4621                if gid != 0 {
4622                    hits += 1;
4623                    if hits >= required_hits {
4624                        break;
4625                    }
4626                }
4627            }
4628        }
4629
4630        if hits >= required_hits {
4631            ranges.push(UnicodeRange { start, end });
4632        }
4633    }
4634
4635    if ranges.is_empty() {
4636        None
4637    } else {
4638        Some(ranges)
4639    }
4640}
4641
4642// Helper function to extract unicode ranges (unused, kept for reference)
4643#[cfg(all(feature = "std", feature = "parsing"))]
4644#[allow(dead_code)]
4645fn extract_unicode_ranges(os2_table: &Os2) -> Vec<UnicodeRange> {
4646    let mut unicode_ranges = Vec::new();
4647
4648    let ranges = [
4649        os2_table.ul_unicode_range1,
4650        os2_table.ul_unicode_range2,
4651        os2_table.ul_unicode_range3,
4652        os2_table.ul_unicode_range4,
4653    ];
4654
4655    for &(bit, start, end) in UNICODE_RANGE_MAPPINGS {
4656        let range_idx = bit / 32;
4657        let bit_pos = bit % 32;
4658        if range_idx < 4 && (ranges[range_idx] & (1 << bit_pos)) != 0 {
4659            unicode_ranges.push(UnicodeRange { start, end });
4660        }
4661    }
4662
4663    unicode_ranges
4664}
4665
4666// Helper function to detect if a font is monospace
4667#[cfg(all(feature = "std", feature = "parsing"))]
4668fn detect_monospace(
4669    provider: &impl FontTableProvider,
4670    os2_table: &Os2,
4671    detected_monospace: Option<bool>,
4672) -> Option<bool> {
4673    if let Some(is_monospace) = detected_monospace {
4674        return Some(is_monospace);
4675    }
4676
4677    // Try using PANOSE classification
4678    if os2_table.panose[0] == 2 {
4679        // 2 = Latin Text
4680        return Some(os2_table.panose[3] == 9); // 9 = Monospaced
4681    }
4682
4683    // Check glyph widths in hmtx table
4684    let hhea_data = provider.table_data(tag::HHEA).ok()??;
4685    let hhea_table = ReadScope::new(&hhea_data).read::<HheaTable>().ok()?;
4686    let maxp_data = provider.table_data(tag::MAXP).ok()??;
4687    let maxp_table = ReadScope::new(&maxp_data).read::<MaxpTable>().ok()?;
4688    let hmtx_data = provider.table_data(tag::HMTX).ok()??;
4689    let hmtx_table = ReadScope::new(&hmtx_data)
4690        .read_dep::<HmtxTable<'_>>((
4691            usize::from(maxp_table.num_glyphs),
4692            usize::from(hhea_table.num_h_metrics),
4693        ))
4694        .ok()?;
4695
4696    let mut monospace = true;
4697    let mut last_advance = 0;
4698
4699    // Check if all advance widths are the same
4700    for i in 0..hhea_table.num_h_metrics as usize {
4701        let advance = hmtx_table.h_metrics.read_item(i).ok()?.advance_width;
4702        if i > 0 && advance != last_advance {
4703            monospace = false;
4704            break;
4705        }
4706        last_advance = advance;
4707    }
4708
4709    Some(monospace)
4710}
4711
4712/// Guess font metadata from a filename using the existing tokenizer.
4713///
4714/// Uses [`config::tokenize_font_stem`] and [`config::FONT_STYLE_TOKENS`]
4715/// to extract the family name and detect style hints from the filename.
4716#[cfg(feature = "std")]
4717fn pattern_from_filename(path: &std::path::Path) -> Option<FcPattern> {
4718    let ext = path.extension()?.to_str()?.to_lowercase();
4719    match ext.as_str() {
4720        "ttf" | "otf" | "ttc" | "woff" | "woff2" => {}
4721        _ => return None,
4722    }
4723
4724    let stem = path.file_stem()?.to_str()?;
4725    let all_tokens = crate::config::tokenize_lowercase(stem);
4726
4727    // Style detection: check if any token matches a known style keyword
4728    let has_token = |kw: &str| all_tokens.iter().any(|t| t == kw);
4729    let is_bold = has_token("bold") || has_token("heavy");
4730    let is_italic = has_token("italic");
4731    let is_oblique = has_token("oblique");
4732    let is_mono = has_token("mono") || has_token("monospace");
4733    let is_condensed = has_token("condensed");
4734
4735    // Family = non-style tokens joined
4736    let family_tokens = crate::config::tokenize_font_stem(stem);
4737    if family_tokens.is_empty() { return None; }
4738    let family = family_tokens.join(" ");
4739
4740    Some(FcPattern {
4741        name: Some(stem.to_string()),
4742        family: Some(family),
4743        bold: if is_bold { PatternMatch::True } else { PatternMatch::False },
4744        italic: if is_italic { PatternMatch::True } else { PatternMatch::False },
4745        oblique: if is_oblique { PatternMatch::True } else { PatternMatch::DontCare },
4746        monospace: if is_mono { PatternMatch::True } else { PatternMatch::DontCare },
4747        condensed: if is_condensed { PatternMatch::True } else { PatternMatch::DontCare },
4748        weight: if is_bold { FcWeight::Bold } else { FcWeight::Normal },
4749        stretch: if is_condensed { FcStretch::Condensed } else { FcStretch::Normal },
4750        unicode_ranges: Vec::new(),
4751        metadata: FcFontMetadata::default(),
4752        render_config: FcFontRenderConfig::default(),
4753    })
4754}