Skip to main content

rust_fontconfig/
lib.rs

1//! # rust-fontconfig
2//!
3//! Pure-Rust rewrite of the Linux fontconfig library (no system dependencies) - using allsorts as a font parser to support `.woff`, `.woff2`, `.ttc`, `.otf` and `.ttf`
4//!
5//! **NOTE**: Also works on Windows, macOS and WASM - without external dependencies!
6//!
7//! ## Usage
8//!
9//! ### Basic Font Query
10//!
11//! ```rust,no_run
12//! use rust_fontconfig::{FcFontCache, FcPattern};
13//!
14//! fn main() {
15//!     // Build the font cache
16//!     let cache = FcFontCache::build();
17//!
18//!     // Query a font by name
19//!     let results = cache.query(
20//!         &FcPattern {
21//!             name: Some(String::from("Arial")),
22//!             ..Default::default()
23//!         },
24//!         &mut Vec::new() // Trace messages container
25//!     );
26//!
27//!     if let Some(font_match) = results {
28//!         println!("Font match ID: {:?}", font_match.id);
29//!         println!("Font unicode ranges: {:?}", font_match.unicode_ranges);
30//!     } else {
31//!         println!("No matching font found");
32//!     }
33//! }
34//! ```
35//!
36//! ### Resolve Font Chain and Query for Text
37//!
38//! ```rust,no_run
39//! use rust_fontconfig::{FcFontCache, FcWeight, PatternMatch};
40//!
41//! fn main() {
42//!     # #[cfg(feature = "std")]
43//!     # {
44//!     let cache = FcFontCache::build();
45//!
46//!     // Build font fallback chain (without text parameter)
47//!     let font_chain = cache.resolve_font_chain(
48//!         &["Arial".to_string(), "sans-serif".to_string()],
49//!         FcWeight::Normal,
50//!         PatternMatch::DontCare,
51//!         PatternMatch::DontCare,
52//!         &mut Vec::new(),
53//!     );
54//!
55//!     // Query which fonts to use for specific text
56//!     let text = "Hello 你好 Здравствуйте";
57//!     let font_runs = font_chain.query_for_text(&cache, text);
58//!
59//!     println!("Text split into {} font runs:", font_runs.len());
60//!     for run in font_runs {
61//!         println!("  '{}' -> font {:?}", run.text, run.font_id);
62//!     }
63//!     # }
64//! }
65//! ```
66
67#![allow(non_snake_case)]
68#![cfg_attr(not(feature = "std"), no_std)]
69
70extern crate alloc;
71
72#[cfg(all(feature = "std", feature = "parsing"))]
73use alloc::borrow::ToOwned;
74use alloc::collections::btree_map::BTreeMap;
75use alloc::string::{String, ToString};
76use alloc::vec::Vec;
77use alloc::{format, vec};
78#[cfg(all(feature = "std", feature = "parsing"))]
79use allsorts::binary::read::ReadScope;
80#[cfg(all(feature = "std", feature = "parsing"))]
81use allsorts::get_name::fontcode_get_name;
82#[cfg(all(feature = "std", feature = "parsing"))]
83use allsorts::tables::os2::Os2;
84#[cfg(all(feature = "std", feature = "parsing"))]
85use allsorts::tables::{FontTableProvider, HheaTable, HmtxTable, MaxpTable};
86#[cfg(all(feature = "std", feature = "parsing"))]
87use allsorts::tag;
88#[cfg(feature = "std")]
89use std::path::PathBuf;
90
91pub mod utils;
92#[cfg(feature = "std")]
93pub mod config;
94
95#[cfg(feature = "ffi")]
96pub mod ffi;
97
98#[cfg(feature = "async-registry")]
99pub mod scoring;
100#[cfg(feature = "async-registry")]
101pub mod registry;
102#[cfg(feature = "async-registry")]
103pub mod multithread;
104#[cfg(feature = "cache")]
105pub mod disk_cache;
106
107/// Operating system type for generic font family resolution
108#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash)]
109pub enum OperatingSystem {
110    Windows,
111    Linux,
112    MacOS,
113    Wasm,
114}
115
116impl OperatingSystem {
117    /// Detect the current operating system at compile time
118    pub fn current() -> Self {
119        #[cfg(target_os = "windows")]
120        return OperatingSystem::Windows;
121        
122        #[cfg(target_os = "linux")]
123        return OperatingSystem::Linux;
124        
125        #[cfg(target_os = "macos")]
126        return OperatingSystem::MacOS;
127        
128        #[cfg(target_family = "wasm")]
129        return OperatingSystem::Wasm;
130        
131        #[cfg(not(any(target_os = "windows", target_os = "linux", target_os = "macos", target_family = "wasm")))]
132        return OperatingSystem::Linux; // Default fallback
133    }
134    
135    /// Get system-specific fonts for the "serif" generic family
136    /// Prioritizes fonts based on Unicode range coverage
137    pub fn get_serif_fonts(&self, unicode_ranges: &[UnicodeRange]) -> Vec<String> {
138        let has_cjk = has_cjk_ranges(unicode_ranges);
139        let has_arabic = has_arabic_ranges(unicode_ranges);
140        let _has_cyrillic = has_cyrillic_ranges(unicode_ranges);
141        
142        match self {
143            OperatingSystem::Windows => {
144                let mut fonts = Vec::new();
145                if has_cjk {
146                    fonts.extend_from_slice(&["MS Mincho", "SimSun", "MingLiU"]);
147                }
148                if has_arabic {
149                    fonts.push("Traditional Arabic");
150                }
151                fonts.push("Times New Roman");
152                fonts.iter().map(|s| s.to_string()).collect()
153            }
154            OperatingSystem::Linux => {
155                let mut fonts = Vec::new();
156                if has_cjk {
157                    fonts.extend_from_slice(&["Noto Serif CJK SC", "Noto Serif CJK JP", "Noto Serif CJK KR"]);
158                }
159                if has_arabic {
160                    fonts.push("Noto Serif Arabic");
161                }
162                fonts.extend_from_slice(&[
163                    "Times", "Times New Roman", "DejaVu Serif", "Free Serif", 
164                    "Noto Serif", "Bitstream Vera Serif", "Roman", "Regular"
165                ]);
166                fonts.iter().map(|s| s.to_string()).collect()
167            }
168            OperatingSystem::MacOS => {
169                let mut fonts = Vec::new();
170                if has_cjk {
171                    fonts.extend_from_slice(&["Hiragino Mincho ProN", "STSong", "AppleMyungjo"]);
172                }
173                if has_arabic {
174                    fonts.push("Geeza Pro");
175                }
176                fonts.extend_from_slice(&["Times", "New York", "Palatino"]);
177                fonts.iter().map(|s| s.to_string()).collect()
178            }
179            OperatingSystem::Wasm => Vec::new(),
180        }
181    }
182    
183    /// Get system-specific fonts for the "sans-serif" generic family
184    /// Prioritizes fonts based on Unicode range coverage
185    pub fn get_sans_serif_fonts(&self, unicode_ranges: &[UnicodeRange]) -> Vec<String> {
186        let has_cjk = has_cjk_ranges(unicode_ranges);
187        let has_arabic = has_arabic_ranges(unicode_ranges);
188        let _has_cyrillic = has_cyrillic_ranges(unicode_ranges);
189        let has_hebrew = has_hebrew_ranges(unicode_ranges);
190        let has_thai = has_thai_ranges(unicode_ranges);
191        
192        match self {
193            OperatingSystem::Windows => {
194                let mut fonts = Vec::new();
195                if has_cjk {
196                    fonts.extend_from_slice(&["Microsoft YaHei", "MS Gothic", "Malgun Gothic", "SimHei"]);
197                }
198                if has_arabic {
199                    fonts.push("Segoe UI Arabic");
200                }
201                if has_hebrew {
202                    fonts.push("Segoe UI Hebrew");
203                }
204                if has_thai {
205                    fonts.push("Leelawadee UI");
206                }
207                fonts.extend_from_slice(&["Segoe UI", "Tahoma", "Microsoft Sans Serif", "MS Sans Serif", "Helv"]);
208                fonts.iter().map(|s| s.to_string()).collect()
209            }
210            OperatingSystem::Linux => {
211                let mut fonts = Vec::new();
212                if has_cjk {
213                    fonts.extend_from_slice(&[
214                        "Noto Sans CJK SC", "Noto Sans CJK JP", "Noto Sans CJK KR",
215                        "WenQuanYi Micro Hei", "Droid Sans Fallback"
216                    ]);
217                }
218                if has_arabic {
219                    fonts.push("Noto Sans Arabic");
220                }
221                if has_hebrew {
222                    fonts.push("Noto Sans Hebrew");
223                }
224                if has_thai {
225                    fonts.push("Noto Sans Thai");
226                }
227                fonts.extend_from_slice(&["Ubuntu", "Arial", "DejaVu Sans", "Noto Sans", "Liberation Sans"]);
228                fonts.iter().map(|s| s.to_string()).collect()
229            }
230            OperatingSystem::MacOS => {
231                let mut fonts = Vec::new();
232                if has_cjk {
233                    fonts.extend_from_slice(&[
234                        "Hiragino Sans", "Hiragino Kaku Gothic ProN", 
235                        "PingFang SC", "PingFang TC", "Apple SD Gothic Neo"
236                    ]);
237                }
238                if has_arabic {
239                    fonts.push("Geeza Pro");
240                }
241                if has_hebrew {
242                    fonts.push("Arial Hebrew");
243                }
244                if has_thai {
245                    fonts.push("Thonburi");
246                }
247                fonts.extend_from_slice(&["San Francisco", "Helvetica Neue", "Lucida Grande"]);
248                fonts.iter().map(|s| s.to_string()).collect()
249            }
250            OperatingSystem::Wasm => Vec::new(),
251        }
252    }
253    
254    /// Get system-specific fonts for the "monospace" generic family
255    /// Prioritizes fonts based on Unicode range coverage
256    pub fn get_monospace_fonts(&self, unicode_ranges: &[UnicodeRange]) -> Vec<String> {
257        let has_cjk = has_cjk_ranges(unicode_ranges);
258        
259        match self {
260            OperatingSystem::Windows => {
261                let mut fonts = Vec::new();
262                if has_cjk {
263                    fonts.extend_from_slice(&["MS Gothic", "SimHei"]);
264                }
265                fonts.extend_from_slice(&["Segoe UI Mono", "Courier New", "Cascadia Code", "Cascadia Mono", "Consolas"]);
266                fonts.iter().map(|s| s.to_string()).collect()
267            }
268            OperatingSystem::Linux => {
269                let mut fonts = Vec::new();
270                if has_cjk {
271                    fonts.extend_from_slice(&["Noto Sans Mono CJK SC", "Noto Sans Mono CJK JP", "WenQuanYi Zen Hei Mono"]);
272                }
273                fonts.extend_from_slice(&[
274                    "Source Code Pro", "Cantarell", "DejaVu Sans Mono", 
275                    "Roboto Mono", "Ubuntu Monospace", "Droid Sans Mono"
276                ]);
277                fonts.iter().map(|s| s.to_string()).collect()
278            }
279            OperatingSystem::MacOS => {
280                let mut fonts = Vec::new();
281                if has_cjk {
282                    fonts.extend_from_slice(&["Hiragino Sans", "PingFang SC"]);
283                }
284                fonts.extend_from_slice(&["SF Mono", "Menlo", "Monaco", "Courier", "Oxygen Mono", "Source Code Pro", "Fira Mono"]);
285                fonts.iter().map(|s| s.to_string()).collect()
286            }
287            OperatingSystem::Wasm => Vec::new(),
288        }
289    }
290    
291    /// Expand a generic CSS font family to system-specific font names
292    /// Returns the original name if not a generic family
293    /// Prioritizes fonts based on Unicode range coverage
294    pub fn expand_generic_family(&self, family: &str, unicode_ranges: &[UnicodeRange]) -> Vec<String> {
295        match family.to_lowercase().as_str() {
296            "serif" => self.get_serif_fonts(unicode_ranges),
297            "sans-serif" => self.get_sans_serif_fonts(unicode_ranges),
298            "monospace" => self.get_monospace_fonts(unicode_ranges),
299            "cursive" | "fantasy" | "system-ui" => {
300                // Use sans-serif as fallback for these
301                self.get_sans_serif_fonts(unicode_ranges)
302            }
303            _ => vec![family.to_string()],
304        }
305    }
306}
307
308/// Expand a CSS font-family stack with generic families resolved to OS-specific fonts
309/// Prioritizes fonts based on Unicode range coverage
310/// Example: ["Arial", "sans-serif"] on macOS with CJK ranges -> ["Arial", "PingFang SC", "Hiragino Sans", ...]
311pub fn expand_font_families(families: &[String], os: OperatingSystem, unicode_ranges: &[UnicodeRange]) -> Vec<String> {
312    let mut expanded = Vec::new();
313    
314    for family in families {
315        expanded.extend(os.expand_generic_family(family, unicode_ranges));
316    }
317    
318    expanded
319}
320
321/// UUID to identify a font (collections are broken up into separate fonts)
322#[derive(Clone, Copy, PartialOrd, Ord, PartialEq, Eq, Hash)]
323#[cfg_attr(feature = "cache", derive(serde::Serialize, serde::Deserialize))]
324pub struct FontId(pub u128);
325
326impl core::fmt::Debug for FontId {
327    fn fmt(&self, f: &mut core::fmt::Formatter<'_>) -> core::fmt::Result {
328        core::fmt::Display::fmt(self, f)
329    }
330}
331
332impl core::fmt::Display for FontId {
333    fn fmt(&self, f: &mut core::fmt::Formatter<'_>) -> core::fmt::Result {
334        let id = self.0;
335        write!(
336            f,
337            "{:08x}-{:04x}-{:04x}-{:04x}-{:012x}",
338            (id >> 96) & 0xFFFFFFFF,
339            (id >> 80) & 0xFFFF,
340            (id >> 64) & 0xFFFF,
341            (id >> 48) & 0xFFFF,
342            id & 0xFFFFFFFFFFFF
343        )
344    }
345}
346
347impl FontId {
348    /// Generate a new unique FontId using an atomic counter
349    pub fn new() -> Self {
350        use core::sync::atomic::{AtomicU64, Ordering};
351        static COUNTER: AtomicU64 = AtomicU64::new(1);
352        let id = COUNTER.fetch_add(1, Ordering::Relaxed) as u128;
353        FontId(id)
354    }
355}
356
357/// Whether a field is required to match (yes / no / don't care)
358#[derive(Debug, Default, Copy, Clone, PartialOrd, Ord, PartialEq, Eq, Hash)]
359#[cfg_attr(feature = "cache", derive(serde::Serialize, serde::Deserialize))]
360#[repr(C)]
361pub enum PatternMatch {
362    /// Default: don't particularly care whether the requirement matches
363    #[default]
364    DontCare,
365    /// Requirement has to be true for the selected font
366    True,
367    /// Requirement has to be false for the selected font
368    False,
369}
370
371impl PatternMatch {
372    fn needs_to_match(&self) -> bool {
373        matches!(self, PatternMatch::True | PatternMatch::False)
374    }
375
376    fn matches(&self, other: &PatternMatch) -> bool {
377        match (self, other) {
378            (PatternMatch::DontCare, _) => true,
379            (_, PatternMatch::DontCare) => true,
380            (a, b) => a == b,
381        }
382    }
383}
384
385/// Font weight values as defined in CSS specification
386#[derive(Debug, Clone, Copy, PartialOrd, Ord, PartialEq, Eq, Hash)]
387#[cfg_attr(feature = "cache", derive(serde::Serialize, serde::Deserialize))]
388#[repr(C)]
389pub enum FcWeight {
390    Thin = 100,
391    ExtraLight = 200,
392    Light = 300,
393    Normal = 400,
394    Medium = 500,
395    SemiBold = 600,
396    Bold = 700,
397    ExtraBold = 800,
398    Black = 900,
399}
400
401impl FcWeight {
402    pub fn from_u16(weight: u16) -> Self {
403        match weight {
404            0..=149 => FcWeight::Thin,
405            150..=249 => FcWeight::ExtraLight,
406            250..=349 => FcWeight::Light,
407            350..=449 => FcWeight::Normal,
408            450..=549 => FcWeight::Medium,
409            550..=649 => FcWeight::SemiBold,
410            650..=749 => FcWeight::Bold,
411            750..=849 => FcWeight::ExtraBold,
412            _ => FcWeight::Black,
413        }
414    }
415
416    pub fn find_best_match(&self, available: &[FcWeight]) -> Option<FcWeight> {
417        if available.is_empty() {
418            return None;
419        }
420
421        // Exact match
422        if available.contains(self) {
423            return Some(*self);
424        }
425
426        // Get numeric value
427        let self_value = *self as u16;
428
429        match *self {
430            FcWeight::Normal => {
431                // For Normal (400), try Medium (500) first
432                if available.contains(&FcWeight::Medium) {
433                    return Some(FcWeight::Medium);
434                }
435                // Then try lighter weights
436                for weight in &[FcWeight::Light, FcWeight::ExtraLight, FcWeight::Thin] {
437                    if available.contains(weight) {
438                        return Some(*weight);
439                    }
440                }
441                // Last, try heavier weights
442                for weight in &[
443                    FcWeight::SemiBold,
444                    FcWeight::Bold,
445                    FcWeight::ExtraBold,
446                    FcWeight::Black,
447                ] {
448                    if available.contains(weight) {
449                        return Some(*weight);
450                    }
451                }
452            }
453            FcWeight::Medium => {
454                // For Medium (500), try Normal (400) first
455                if available.contains(&FcWeight::Normal) {
456                    return Some(FcWeight::Normal);
457                }
458                // Then try lighter weights
459                for weight in &[FcWeight::Light, FcWeight::ExtraLight, FcWeight::Thin] {
460                    if available.contains(weight) {
461                        return Some(*weight);
462                    }
463                }
464                // Last, try heavier weights
465                for weight in &[
466                    FcWeight::SemiBold,
467                    FcWeight::Bold,
468                    FcWeight::ExtraBold,
469                    FcWeight::Black,
470                ] {
471                    if available.contains(weight) {
472                        return Some(*weight);
473                    }
474                }
475            }
476            FcWeight::Thin | FcWeight::ExtraLight | FcWeight::Light => {
477                // For lightweight fonts (<400), first try lighter or equal weights
478                let mut best_match = None;
479                let mut smallest_diff = u16::MAX;
480
481                // Find the closest lighter weight
482                for weight in available {
483                    let weight_value = *weight as u16;
484                    // Only consider weights <= self (per test expectation)
485                    if weight_value <= self_value {
486                        let diff = self_value - weight_value;
487                        if diff < smallest_diff {
488                            smallest_diff = diff;
489                            best_match = Some(*weight);
490                        }
491                    }
492                }
493
494                if best_match.is_some() {
495                    return best_match;
496                }
497
498                // If no lighter weight, find the closest heavier weight
499                best_match = None;
500                smallest_diff = u16::MAX;
501
502                for weight in available {
503                    let weight_value = *weight as u16;
504                    if weight_value > self_value {
505                        let diff = weight_value - self_value;
506                        if diff < smallest_diff {
507                            smallest_diff = diff;
508                            best_match = Some(*weight);
509                        }
510                    }
511                }
512
513                return best_match;
514            }
515            FcWeight::SemiBold | FcWeight::Bold | FcWeight::ExtraBold | FcWeight::Black => {
516                // For heavyweight fonts (>500), first try heavier or equal weights
517                let mut best_match = None;
518                let mut smallest_diff = u16::MAX;
519
520                // Find the closest heavier weight
521                for weight in available {
522                    let weight_value = *weight as u16;
523                    // Only consider weights >= self
524                    if weight_value >= self_value {
525                        let diff = weight_value - self_value;
526                        if diff < smallest_diff {
527                            smallest_diff = diff;
528                            best_match = Some(*weight);
529                        }
530                    }
531                }
532
533                if best_match.is_some() {
534                    return best_match;
535                }
536
537                // If no heavier weight, find the closest lighter weight
538                best_match = None;
539                smallest_diff = u16::MAX;
540
541                for weight in available {
542                    let weight_value = *weight as u16;
543                    if weight_value < self_value {
544                        let diff = self_value - weight_value;
545                        if diff < smallest_diff {
546                            smallest_diff = diff;
547                            best_match = Some(*weight);
548                        }
549                    }
550                }
551
552                return best_match;
553            }
554        }
555
556        // If nothing matches by now, return the first available weight
557        Some(available[0])
558    }
559}
560
561impl Default for FcWeight {
562    fn default() -> Self {
563        FcWeight::Normal
564    }
565}
566
567/// CSS font-stretch values
568#[derive(Debug, Clone, Copy, PartialOrd, Ord, PartialEq, Eq, Hash)]
569#[cfg_attr(feature = "cache", derive(serde::Serialize, serde::Deserialize))]
570#[repr(C)]
571pub enum FcStretch {
572    UltraCondensed = 1,
573    ExtraCondensed = 2,
574    Condensed = 3,
575    SemiCondensed = 4,
576    Normal = 5,
577    SemiExpanded = 6,
578    Expanded = 7,
579    ExtraExpanded = 8,
580    UltraExpanded = 9,
581}
582
583impl FcStretch {
584    pub fn is_condensed(&self) -> bool {
585        use self::FcStretch::*;
586        match self {
587            UltraCondensed => true,
588            ExtraCondensed => true,
589            Condensed => true,
590            SemiCondensed => true,
591            Normal => false,
592            SemiExpanded => false,
593            Expanded => false,
594            ExtraExpanded => false,
595            UltraExpanded => false,
596        }
597    }
598    pub fn from_u16(width_class: u16) -> Self {
599        match width_class {
600            1 => FcStretch::UltraCondensed,
601            2 => FcStretch::ExtraCondensed,
602            3 => FcStretch::Condensed,
603            4 => FcStretch::SemiCondensed,
604            5 => FcStretch::Normal,
605            6 => FcStretch::SemiExpanded,
606            7 => FcStretch::Expanded,
607            8 => FcStretch::ExtraExpanded,
608            9 => FcStretch::UltraExpanded,
609            _ => FcStretch::Normal,
610        }
611    }
612
613    /// Follows CSS spec for stretch matching
614    pub fn find_best_match(&self, available: &[FcStretch]) -> Option<FcStretch> {
615        if available.is_empty() {
616            return None;
617        }
618
619        if available.contains(self) {
620            return Some(*self);
621        }
622
623        // For 'normal' or condensed values, narrower widths are checked first, then wider values
624        if *self <= FcStretch::Normal {
625            // Find narrower values first
626            let mut closest_narrower = None;
627            for stretch in available.iter() {
628                if *stretch < *self
629                    && (closest_narrower.is_none() || *stretch > closest_narrower.unwrap())
630                {
631                    closest_narrower = Some(*stretch);
632                }
633            }
634
635            if closest_narrower.is_some() {
636                return closest_narrower;
637            }
638
639            // Otherwise, find wider values
640            let mut closest_wider = None;
641            for stretch in available.iter() {
642                if *stretch > *self
643                    && (closest_wider.is_none() || *stretch < closest_wider.unwrap())
644                {
645                    closest_wider = Some(*stretch);
646                }
647            }
648
649            return closest_wider;
650        } else {
651            // For expanded values, wider values are checked first, then narrower values
652            let mut closest_wider = None;
653            for stretch in available.iter() {
654                if *stretch > *self
655                    && (closest_wider.is_none() || *stretch < closest_wider.unwrap())
656                {
657                    closest_wider = Some(*stretch);
658                }
659            }
660
661            if closest_wider.is_some() {
662                return closest_wider;
663            }
664
665            // Otherwise, find narrower values
666            let mut closest_narrower = None;
667            for stretch in available.iter() {
668                if *stretch < *self
669                    && (closest_narrower.is_none() || *stretch > closest_narrower.unwrap())
670                {
671                    closest_narrower = Some(*stretch);
672                }
673            }
674
675            return closest_narrower;
676        }
677    }
678}
679
680impl Default for FcStretch {
681    fn default() -> Self {
682        FcStretch::Normal
683    }
684}
685
686/// Unicode range representation for font matching
687#[repr(C)]
688#[derive(Debug, Clone, Copy, PartialEq, Eq, PartialOrd, Ord, Hash)]
689#[cfg_attr(feature = "cache", derive(serde::Serialize, serde::Deserialize))]
690pub struct UnicodeRange {
691    pub start: u32,
692    pub end: u32,
693}
694
695/// The default set of Unicode-block fallback scripts that
696/// [`FcFontCache::resolve_font_chain`] pulls in when no explicit
697/// `scripts_hint` is supplied.
698///
699/// Keeping this exposed lets callers that *do* want the default
700/// behaviour build the set explicitly — typically by union-ing it
701/// with a detected-from-document set before calling
702/// [`FcFontCache::resolve_font_chain_with_scripts`].
703pub const DEFAULT_UNICODE_FALLBACK_SCRIPTS: &[UnicodeRange] = &[
704    UnicodeRange { start: 0x0400, end: 0x04FF }, // Cyrillic
705    UnicodeRange { start: 0x0600, end: 0x06FF }, // Arabic
706    UnicodeRange { start: 0x0900, end: 0x097F }, // Devanagari
707    UnicodeRange { start: 0x3040, end: 0x309F }, // Hiragana
708    UnicodeRange { start: 0x30A0, end: 0x30FF }, // Katakana
709    UnicodeRange { start: 0x4E00, end: 0x9FFF }, // CJK Unified Ideographs
710    UnicodeRange { start: 0xAC00, end: 0xD7A3 }, // Hangul Syllables
711];
712
713impl UnicodeRange {
714    pub fn contains(&self, c: char) -> bool {
715        let c = c as u32;
716        c >= self.start && c <= self.end
717    }
718
719    pub fn overlaps(&self, other: &UnicodeRange) -> bool {
720        self.start <= other.end && other.start <= self.end
721    }
722
723    pub fn is_subset_of(&self, other: &UnicodeRange) -> bool {
724        self.start >= other.start && self.end <= other.end
725    }
726}
727
728/// Check if any range covers CJK Unified Ideographs, Hiragana, Katakana, or Hangul
729pub fn has_cjk_ranges(ranges: &[UnicodeRange]) -> bool {
730    ranges.iter().any(|r| {
731        (r.start >= 0x4E00 && r.start <= 0x9FFF) ||
732        (r.start >= 0x3040 && r.start <= 0x309F) ||
733        (r.start >= 0x30A0 && r.start <= 0x30FF) ||
734        (r.start >= 0xAC00 && r.start <= 0xD7AF)
735    })
736}
737
738/// Check if any range covers the Arabic block
739pub fn has_arabic_ranges(ranges: &[UnicodeRange]) -> bool {
740    ranges.iter().any(|r| r.start >= 0x0600 && r.start <= 0x06FF)
741}
742
743/// Check if any range covers the Cyrillic block
744pub fn has_cyrillic_ranges(ranges: &[UnicodeRange]) -> bool {
745    ranges.iter().any(|r| r.start >= 0x0400 && r.start <= 0x04FF)
746}
747
748/// Check if any range covers the Hebrew block
749pub fn has_hebrew_ranges(ranges: &[UnicodeRange]) -> bool {
750    ranges.iter().any(|r| r.start >= 0x0590 && r.start <= 0x05FF)
751}
752
753/// Check if any range covers the Thai block
754pub fn has_thai_ranges(ranges: &[UnicodeRange]) -> bool {
755    ranges.iter().any(|r| r.start >= 0x0E00 && r.start <= 0x0E7F)
756}
757
758/// Log levels for trace messages
759#[derive(Debug, Clone, Copy, PartialOrd, Ord, PartialEq, Eq, Hash)]
760pub enum TraceLevel {
761    Debug,
762    Info,
763    Warning,
764    Error,
765}
766
767/// Reason for font matching failure or success
768#[derive(Debug, Clone, PartialEq, Eq, Hash)]
769pub enum MatchReason {
770    NameMismatch {
771        requested: Option<String>,
772        found: Option<String>,
773    },
774    FamilyMismatch {
775        requested: Option<String>,
776        found: Option<String>,
777    },
778    StyleMismatch {
779        property: &'static str,
780        requested: String,
781        found: String,
782    },
783    WeightMismatch {
784        requested: FcWeight,
785        found: FcWeight,
786    },
787    StretchMismatch {
788        requested: FcStretch,
789        found: FcStretch,
790    },
791    UnicodeRangeMismatch {
792        character: char,
793        ranges: Vec<UnicodeRange>,
794    },
795    Success,
796}
797
798/// Trace message for debugging font matching
799#[derive(Debug, Clone, PartialEq, Eq)]
800pub struct TraceMsg {
801    pub level: TraceLevel,
802    pub path: String,
803    pub reason: MatchReason,
804}
805
806/// Hinting style for font rendering.
807#[repr(C)]
808#[derive(Debug, Default, Clone, Copy, PartialEq, Eq, PartialOrd, Ord, Hash)]
809#[cfg_attr(feature = "cache", derive(serde::Serialize, serde::Deserialize))]
810pub enum FcHintStyle {
811    #[default]
812    None = 0,
813    Slight = 1,
814    Medium = 2,
815    Full = 3,
816}
817
818/// Subpixel rendering order.
819#[repr(C)]
820#[derive(Debug, Default, Clone, Copy, PartialEq, Eq, PartialOrd, Ord, Hash)]
821#[cfg_attr(feature = "cache", derive(serde::Serialize, serde::Deserialize))]
822pub enum FcRgba {
823    #[default]
824    Unknown = 0,
825    Rgb = 1,
826    Bgr = 2,
827    Vrgb = 3,
828    Vbgr = 4,
829    None = 5,
830}
831
832/// LCD filter mode for subpixel rendering.
833#[repr(C)]
834#[derive(Debug, Default, Clone, Copy, PartialEq, Eq, PartialOrd, Ord, Hash)]
835#[cfg_attr(feature = "cache", derive(serde::Serialize, serde::Deserialize))]
836pub enum FcLcdFilter {
837    #[default]
838    None = 0,
839    Default = 1,
840    Light = 2,
841    Legacy = 3,
842}
843
844/// Per-font rendering configuration from system font config (Linux fonts.conf).
845///
846/// All fields are `Option<T>` -- `None` means "use system default".
847/// On non-Linux platforms, this is always all-None (no per-font overrides).
848#[derive(Debug, Default, Clone, PartialEq, PartialOrd)]
849#[cfg_attr(feature = "cache", derive(serde::Serialize, serde::Deserialize))]
850pub struct FcFontRenderConfig {
851    pub antialias: Option<bool>,
852    pub hinting: Option<bool>,
853    pub hintstyle: Option<FcHintStyle>,
854    pub autohint: Option<bool>,
855    pub rgba: Option<FcRgba>,
856    pub lcdfilter: Option<FcLcdFilter>,
857    pub embeddedbitmap: Option<bool>,
858    pub embolden: Option<bool>,
859    pub dpi: Option<f64>,
860    pub scale: Option<f64>,
861    pub minspace: Option<bool>,
862}
863
864/// Helper newtype to provide Eq/Ord for Option<f64> via total-order bit comparison.
865/// This allows FcFontRenderConfig to be used inside FcPattern which derives Eq + Ord.
866impl Eq for FcFontRenderConfig {}
867
868impl Ord for FcFontRenderConfig {
869    fn cmp(&self, other: &Self) -> core::cmp::Ordering {
870        // Compare all non-f64 fields first
871        let ord = self.antialias.cmp(&other.antialias)
872            .then_with(|| self.hinting.cmp(&other.hinting))
873            .then_with(|| self.hintstyle.cmp(&other.hintstyle))
874            .then_with(|| self.autohint.cmp(&other.autohint))
875            .then_with(|| self.rgba.cmp(&other.rgba))
876            .then_with(|| self.lcdfilter.cmp(&other.lcdfilter))
877            .then_with(|| self.embeddedbitmap.cmp(&other.embeddedbitmap))
878            .then_with(|| self.embolden.cmp(&other.embolden))
879            .then_with(|| self.minspace.cmp(&other.minspace));
880
881        // For f64 fields, use to_bits() for total ordering
882        let ord = ord.then_with(|| {
883            let a = self.dpi.map(|v| v.to_bits());
884            let b = other.dpi.map(|v| v.to_bits());
885            a.cmp(&b)
886        });
887        ord.then_with(|| {
888            let a = self.scale.map(|v| v.to_bits());
889            let b = other.scale.map(|v| v.to_bits());
890            a.cmp(&b)
891        })
892    }
893}
894
895/// Font pattern for matching
896#[derive(Default, Clone, PartialOrd, Ord, PartialEq, Eq)]
897#[cfg_attr(feature = "cache", derive(serde::Serialize, serde::Deserialize))]
898#[repr(C)]
899pub struct FcPattern {
900    // font name
901    pub name: Option<String>,
902    // family name
903    pub family: Option<String>,
904    // "italic" property
905    pub italic: PatternMatch,
906    // "oblique" property
907    pub oblique: PatternMatch,
908    // "bold" property
909    pub bold: PatternMatch,
910    // "monospace" property
911    pub monospace: PatternMatch,
912    // "condensed" property
913    pub condensed: PatternMatch,
914    // font weight
915    pub weight: FcWeight,
916    // font stretch
917    pub stretch: FcStretch,
918    // unicode ranges to match
919    pub unicode_ranges: Vec<UnicodeRange>,
920    // extended font metadata
921    pub metadata: FcFontMetadata,
922    // per-font rendering configuration (from system fonts.conf on Linux)
923    pub render_config: FcFontRenderConfig,
924}
925
926impl core::fmt::Debug for FcPattern {
927    fn fmt(&self, f: &mut core::fmt::Formatter<'_>) -> core::fmt::Result {
928        let mut d = f.debug_struct("FcPattern");
929
930        if let Some(name) = &self.name {
931            d.field("name", name);
932        }
933
934        if let Some(family) = &self.family {
935            d.field("family", family);
936        }
937
938        if self.italic != PatternMatch::DontCare {
939            d.field("italic", &self.italic);
940        }
941
942        if self.oblique != PatternMatch::DontCare {
943            d.field("oblique", &self.oblique);
944        }
945
946        if self.bold != PatternMatch::DontCare {
947            d.field("bold", &self.bold);
948        }
949
950        if self.monospace != PatternMatch::DontCare {
951            d.field("monospace", &self.monospace);
952        }
953
954        if self.condensed != PatternMatch::DontCare {
955            d.field("condensed", &self.condensed);
956        }
957
958        if self.weight != FcWeight::Normal {
959            d.field("weight", &self.weight);
960        }
961
962        if self.stretch != FcStretch::Normal {
963            d.field("stretch", &self.stretch);
964        }
965
966        if !self.unicode_ranges.is_empty() {
967            d.field("unicode_ranges", &self.unicode_ranges);
968        }
969
970        // Only show non-empty metadata fields
971        let empty_metadata = FcFontMetadata::default();
972        if self.metadata != empty_metadata {
973            d.field("metadata", &self.metadata);
974        }
975
976        // Only show render_config when it differs from default
977        let empty_render_config = FcFontRenderConfig::default();
978        if self.render_config != empty_render_config {
979            d.field("render_config", &self.render_config);
980        }
981
982        d.finish()
983    }
984}
985
986/// Font metadata from the OS/2 table
987#[derive(Debug, Default, Clone, PartialEq, Eq, PartialOrd, Ord)]
988#[cfg_attr(feature = "cache", derive(serde::Serialize, serde::Deserialize))]
989pub struct FcFontMetadata {
990    pub copyright: Option<String>,
991    pub designer: Option<String>,
992    pub designer_url: Option<String>,
993    pub font_family: Option<String>,
994    pub font_subfamily: Option<String>,
995    pub full_name: Option<String>,
996    pub id_description: Option<String>,
997    pub license: Option<String>,
998    pub license_url: Option<String>,
999    pub manufacturer: Option<String>,
1000    pub manufacturer_url: Option<String>,
1001    pub postscript_name: Option<String>,
1002    pub preferred_family: Option<String>,
1003    pub preferred_subfamily: Option<String>,
1004    pub trademark: Option<String>,
1005    pub unique_id: Option<String>,
1006    pub version: Option<String>,
1007}
1008
1009impl FcPattern {
1010    /// Check if this pattern would match the given character
1011    pub fn contains_char(&self, c: char) -> bool {
1012        if self.unicode_ranges.is_empty() {
1013            return true; // No ranges specified means match all characters
1014        }
1015
1016        for range in &self.unicode_ranges {
1017            if range.contains(c) {
1018                return true;
1019            }
1020        }
1021
1022        false
1023    }
1024}
1025
1026/// Font match result with UUID
1027#[derive(Debug, Clone, PartialEq, Eq)]
1028pub struct FontMatch {
1029    pub id: FontId,
1030    pub unicode_ranges: Vec<UnicodeRange>,
1031    pub fallbacks: Vec<FontMatchNoFallback>,
1032}
1033
1034/// Font match result with UUID (without fallback)
1035#[derive(Debug, Clone, PartialEq, Eq)]
1036pub struct FontMatchNoFallback {
1037    pub id: FontId,
1038    pub unicode_ranges: Vec<UnicodeRange>,
1039}
1040
1041/// A run of text that uses the same font
1042/// Returned by FontFallbackChain::query_for_text()
1043#[derive(Debug, Clone, PartialEq, Eq)]
1044pub struct ResolvedFontRun {
1045    /// The text content of this run
1046    pub text: String,
1047    /// Start byte index in the original text
1048    pub start_byte: usize,
1049    /// End byte index in the original text (exclusive)
1050    pub end_byte: usize,
1051    /// The font to use for this run (None if no font found)
1052    pub font_id: Option<FontId>,
1053    /// Which CSS font-family this came from
1054    pub css_source: String,
1055}
1056
1057/// Resolved font fallback chain for a CSS font-family stack
1058/// This represents the complete chain of fonts to use for rendering text
1059#[derive(Debug, Clone, PartialEq, Eq)]
1060pub struct FontFallbackChain {
1061    /// CSS-based fallbacks: Each CSS font expanded to its system fallbacks
1062    /// Example: ["NotoSansJP" -> [Hiragino Sans, PingFang SC], "sans-serif" -> [Helvetica]]
1063    pub css_fallbacks: Vec<CssFallbackGroup>,
1064    
1065    /// Unicode-based fallbacks: Fonts added to cover missing Unicode ranges
1066    /// Only populated if css_fallbacks don't cover all requested characters
1067    pub unicode_fallbacks: Vec<FontMatch>,
1068    
1069    /// The original CSS font-family stack that was requested
1070    pub original_stack: Vec<String>,
1071}
1072
1073impl FontFallbackChain {
1074    /// Resolve which font should be used for a specific character
1075    /// Returns (FontId, css_source_name) where css_source_name indicates which CSS font matched
1076    /// Returns None if no font in the chain can render this character
1077    pub fn resolve_char(&self, cache: &FcFontCache, ch: char) -> Option<(FontId, String)> {
1078        let codepoint = ch as u32;
1079
1080        // Check CSS fallbacks in order
1081        for group in &self.css_fallbacks {
1082            for font in &group.fonts {
1083                let Some(meta) = cache.get_metadata_by_id(&font.id) else { continue };
1084                if meta.unicode_ranges.is_empty() {
1085                    continue; // No range info — don't assume it covers everything
1086                }
1087                if meta.unicode_ranges.iter().any(|r| codepoint >= r.start && codepoint <= r.end) {
1088                    return Some((font.id, group.css_name.clone()));
1089                }
1090            }
1091        }
1092
1093        // Check Unicode fallbacks
1094        for font in &self.unicode_fallbacks {
1095            let Some(meta) = cache.get_metadata_by_id(&font.id) else { continue };
1096            if meta.unicode_ranges.iter().any(|r| codepoint >= r.start && codepoint <= r.end) {
1097                return Some((font.id, "(unicode-fallback)".to_string()));
1098            }
1099        }
1100
1101        None
1102    }
1103    
1104    /// Resolve all characters in a text string to their fonts
1105    /// Returns a vector of (character, FontId, css_source) tuples
1106    pub fn resolve_text(&self, cache: &FcFontCache, text: &str) -> Vec<(char, Option<(FontId, String)>)> {
1107        text.chars()
1108            .map(|ch| (ch, self.resolve_char(cache, ch)))
1109            .collect()
1110    }
1111    
1112    /// Query which fonts should be used for a text string, grouped by font
1113    /// Returns runs of consecutive characters that use the same font
1114    /// This is the main API for text shaping - call this to get font runs, then shape each run
1115    pub fn query_for_text(&self, cache: &FcFontCache, text: &str) -> Vec<ResolvedFontRun> {
1116        if text.is_empty() {
1117            return Vec::new();
1118        }
1119        
1120        let mut runs: Vec<ResolvedFontRun> = Vec::new();
1121        let mut current_font: Option<FontId> = None;
1122        let mut current_css_source: Option<String> = None;
1123        let mut current_start_byte: usize = 0;
1124        
1125        for (byte_idx, ch) in text.char_indices() {
1126            let resolved = self.resolve_char(cache, ch);
1127            let (font_id, css_source) = match &resolved {
1128                Some((id, source)) => (Some(*id), Some(source.clone())),
1129                None => (None, None),
1130            };
1131            
1132            // Check if we need to start a new run
1133            let font_changed = font_id != current_font;
1134            
1135            if font_changed && byte_idx > 0 {
1136                // Finalize the current run
1137                let run_text = &text[current_start_byte..byte_idx];
1138                runs.push(ResolvedFontRun {
1139                    text: run_text.to_string(),
1140                    start_byte: current_start_byte,
1141                    end_byte: byte_idx,
1142                    font_id: current_font,
1143                    css_source: current_css_source.clone().unwrap_or_default(),
1144                });
1145                current_start_byte = byte_idx;
1146            }
1147            
1148            current_font = font_id;
1149            current_css_source = css_source;
1150        }
1151        
1152        // Finalize the last run
1153        if current_start_byte < text.len() {
1154            let run_text = &text[current_start_byte..];
1155            runs.push(ResolvedFontRun {
1156                text: run_text.to_string(),
1157                start_byte: current_start_byte,
1158                end_byte: text.len(),
1159                font_id: current_font,
1160                css_source: current_css_source.unwrap_or_default(),
1161            });
1162        }
1163        
1164        runs
1165    }
1166}
1167
1168/// A group of fonts that are fallbacks for a single CSS font-family name
1169#[derive(Debug, Clone, PartialEq, Eq)]
1170pub struct CssFallbackGroup {
1171    /// The CSS font name (e.g., "NotoSansJP", "sans-serif")
1172    pub css_name: String,
1173    
1174    /// System fonts that match this CSS name
1175    /// First font in list is the best match
1176    pub fonts: Vec<FontMatch>,
1177}
1178
1179/// Cache key for font fallback chain queries
1180///
1181/// IMPORTANT: This key intentionally does NOT include per-text unicode
1182/// ranges — fallback chains are cached by CSS properties only. Different
1183/// texts with the same CSS font-stack share the same chain.
1184///
1185/// `scripts_hint_hash` distinguishes *which set of Unicode-fallback
1186/// scripts* the caller asked for. `None` means "the default set of 7
1187/// major scripts" (Cyrillic/Arabic/Devanagari/Hiragana/Katakana/CJK/Hangul,
1188/// back-compat behaviour of `resolve_font_chain`). `Some(h)` is a
1189/// stable hash of a caller-supplied script list so an ASCII-only
1190/// query doesn't collide with a CJK-aware one.
1191#[cfg(feature = "std")]
1192#[derive(Debug, Clone, PartialEq, Eq, Hash)]
1193pub(crate) struct FontChainCacheKey {
1194    /// CSS font stack (expanded to OS-specific fonts)
1195    pub(crate) font_families: Vec<String>,
1196    /// Font weight
1197    pub(crate) weight: FcWeight,
1198    /// Font style flags
1199    pub(crate) italic: PatternMatch,
1200    pub(crate) oblique: PatternMatch,
1201    /// Hash of the caller-supplied script hint (or `None` for the default set).
1202    pub(crate) scripts_hint_hash: Option<u64>,
1203}
1204
1205/// Hash a `scripts_hint` slice into a stable u64 for use as a
1206/// [`FontChainCacheKey`] component. Order-insensitive: we sort a
1207/// local copy before hashing so `[CJK, Arabic]` and `[Arabic, CJK]`
1208/// key into the same cache slot.
1209#[cfg(feature = "std")]
1210fn hash_scripts_hint(ranges: &[UnicodeRange]) -> u64 {
1211    let mut sorted: Vec<UnicodeRange> = ranges.to_vec();
1212    sorted.sort();
1213    let mut buf = Vec::with_capacity(sorted.len() * 8);
1214    for r in &sorted {
1215        buf.extend_from_slice(&r.start.to_le_bytes());
1216        buf.extend_from_slice(&r.end.to_le_bytes());
1217    }
1218    crate::utils::content_hash_u64(&buf)
1219}
1220
1221/// Path to a font file
1222///
1223/// `bytes_hash` is a deterministic 64-bit hash of the file's full
1224/// byte contents (see [`crate::utils::content_hash_u64`]). All faces
1225/// of a given `.ttc` file share the same `bytes_hash`, and two
1226/// different paths pointing at the same file contents also do —
1227/// so the cache can share a single `Arc<[u8]>` across them via
1228/// [`FcFontCache::get_font_bytes`]. A value of `0` means "hash
1229/// not computed" (e.g. built from a filename-only scan, or loaded
1230/// from a legacy v1 disk cache); callers must treat `0` as opaque
1231/// and fall back to unshared reads.
1232#[derive(Debug, Clone, PartialOrd, Ord, PartialEq, Eq)]
1233#[cfg_attr(feature = "cache", derive(serde::Serialize, serde::Deserialize))]
1234#[repr(C)]
1235pub struct FcFontPath {
1236    pub path: String,
1237    pub font_index: usize,
1238    /// 64-bit content hash of the file's bytes. 0 = not computed.
1239    #[cfg_attr(feature = "cache", serde(default))]
1240    pub bytes_hash: u64,
1241}
1242
1243/// In-memory font data
1244#[derive(Debug, Clone, PartialEq, Eq)]
1245#[repr(C)]
1246pub struct FcFont {
1247    pub bytes: Vec<u8>,
1248    pub font_index: usize,
1249    pub id: String, // For identification in tests
1250}
1251
1252/// Font source enum to represent either disk or memory fonts
1253#[derive(Debug, Clone)]
1254pub enum FontSource<'a> {
1255    /// Font loaded from memory
1256    Memory(&'a FcFont),
1257    /// Font loaded from disk
1258    Disk(&'a FcFontPath),
1259}
1260
1261/// A handle to font bytes returned by [`FcFontCache::get_font_bytes`].
1262///
1263/// On disk, an `Mmap` is used so untouched pages don't count toward
1264/// process RSS. In-memory fonts (`FcFont`) come back as `Owned` since
1265/// they're already on the heap.
1266///
1267/// `FontBytes` derefs to `[u8]` and implements `AsRef<[u8]>`, so any
1268/// existing API that wants `&[u8]` (allsorts, ttf-parser, …) can
1269/// accept it without code changes.
1270///
1271/// Both variants are `Send + Sync` (mmaps and `Arc<[u8]>` are both
1272/// safe to share across threads).
1273#[cfg(feature = "std")]
1274pub enum FontBytes {
1275    /// Heap-owned bytes. Used for `FontSource::Memory` and as a
1276    /// fallback when mmap is unavailable.
1277    Owned(std::sync::Arc<[u8]>),
1278    /// File-backed mmap. Read-only; pages are demand-loaded by the
1279    /// kernel.
1280    Mmapped(mmapio::Mmap),
1281}
1282
1283#[cfg(feature = "std")]
1284impl FontBytes {
1285    /// Borrow the underlying byte slice.
1286    #[inline]
1287    pub fn as_slice(&self) -> &[u8] {
1288        match self {
1289            FontBytes::Owned(arc) => arc,
1290            FontBytes::Mmapped(m) => &m[..],
1291        }
1292    }
1293}
1294
1295#[cfg(feature = "std")]
1296impl core::ops::Deref for FontBytes {
1297    type Target = [u8];
1298    #[inline]
1299    fn deref(&self) -> &[u8] {
1300        self.as_slice()
1301    }
1302}
1303
1304#[cfg(feature = "std")]
1305impl AsRef<[u8]> for FontBytes {
1306    #[inline]
1307    fn as_ref(&self) -> &[u8] {
1308        self.as_slice()
1309    }
1310}
1311
1312#[cfg(feature = "std")]
1313impl core::fmt::Debug for FontBytes {
1314    fn fmt(&self, f: &mut core::fmt::Formatter<'_>) -> core::fmt::Result {
1315        let kind = match self {
1316            FontBytes::Owned(_) => "Owned",
1317            FontBytes::Mmapped(_) => "Mmapped",
1318        };
1319        write!(f, "FontBytes::{}({} bytes)", kind, self.as_slice().len())
1320    }
1321}
1322
1323/// Open a font file as an mmap-backed [`FontBytes`]. Falls back to a
1324/// heap read if mmap fails (e.g. the file is on a network share that
1325/// doesn't support mmap, or we're on a target without `std`-mmap).
1326#[cfg(feature = "std")]
1327fn open_font_bytes_mmap(path: &str) -> Option<std::sync::Arc<FontBytes>> {
1328    use std::fs::File;
1329    use std::sync::Arc;
1330
1331    #[cfg(not(target_family = "wasm"))]
1332    {
1333        if let Ok(file) = File::open(path) {
1334            // Safety: `Mmap::map` requires that the file is not
1335            // mutated while mapped. For system fonts that's the
1336            // overwhelming common case; if a user replaces the file
1337            // we accept reading the snapshot we mapped earlier.
1338            if let Ok(mmap) = unsafe { mmapio::MmapOptions::new().map(&file) } {
1339                return Some(Arc::new(FontBytes::Mmapped(mmap)));
1340            }
1341        }
1342    }
1343    let bytes = std::fs::read(path).ok()?;
1344    Some(Arc::new(FontBytes::Owned(Arc::from(bytes))))
1345}
1346
1347/// A named font to be added to the font cache from memory.
1348/// This is the primary way to supply custom fonts to the application.
1349#[derive(Debug, Clone)]
1350pub struct NamedFont {
1351    /// Human-readable name for this font (e.g., "My Custom Font")
1352    pub name: String,
1353    /// The raw font file bytes (TTF, OTF, WOFF, WOFF2, TTC)
1354    pub bytes: Vec<u8>,
1355}
1356
1357impl NamedFont {
1358    /// Create a new named font from bytes
1359    pub fn new(name: impl Into<String>, bytes: Vec<u8>) -> Self {
1360        Self {
1361            name: name.into(),
1362            bytes,
1363        }
1364    }
1365}
1366
1367/// Font cache, initialized at startup
1368#[derive(Debug)]
1369pub struct FcFontCache {
1370    // Pattern to FontId mapping (query index)
1371    pub(crate) patterns: BTreeMap<FcPattern, FontId>,
1372    // On-disk font paths
1373    pub(crate) disk_fonts: BTreeMap<FontId, FcFontPath>,
1374    // In-memory fonts
1375    pub(crate) memory_fonts: BTreeMap<FontId, FcFont>,
1376    // Metadata cache (patterns stored by ID for quick lookup)
1377    pub(crate) metadata: BTreeMap<FontId, FcPattern>,
1378    // Token index: maps lowercase tokens ("noto", "sans", "jp") to sets of FontIds
1379    // This enables fast fuzzy search by intersecting token sets
1380    pub(crate) token_index: BTreeMap<String, alloc::collections::BTreeSet<FontId>>,
1381    // Pre-tokenized font names (lowercase): FontId -> Vec<lowercase tokens>
1382    // Avoids re-tokenization during fuzzy search
1383    pub(crate) font_tokens: BTreeMap<FontId, Vec<String>>,
1384    // Font fallback chain cache (CSS stack + unicode -> resolved chain)
1385    #[cfg(feature = "std")]
1386    pub(crate) chain_cache: std::sync::Mutex<std::collections::HashMap<FontChainCacheKey, FontFallbackChain>>,
1387    /// Shared file-bytes cache: content-hash → weak [`FontBytes`].
1388    ///
1389    /// [`FcFontCache::get_font_bytes`] populates this so that multiple
1390    /// FontIds backed by the same file (e.g. every face of a `.ttc`)
1391    /// return the same `Arc<FontBytes>` — and therefore the same mmap
1392    /// — instead of each allocating their own buffer. We hold `Weak`
1393    /// references so the mmap unmap as soon as no parsed font holds
1394    /// it alive.
1395    #[cfg(feature = "std")]
1396    pub(crate) shared_bytes: std::sync::Mutex<std::collections::HashMap<u64, std::sync::Weak<FontBytes>>>,
1397}
1398
1399impl Clone for FcFontCache {
1400    fn clone(&self) -> Self {
1401        Self {
1402            patterns: self.patterns.clone(),
1403            disk_fonts: self.disk_fonts.clone(),
1404            memory_fonts: self.memory_fonts.clone(),
1405            metadata: self.metadata.clone(),
1406            token_index: self.token_index.clone(),
1407            font_tokens: self.font_tokens.clone(),
1408            #[cfg(feature = "std")]
1409            chain_cache: std::sync::Mutex::new(std::collections::HashMap::new()), // Empty cache for cloned instance
1410            #[cfg(feature = "std")]
1411            shared_bytes: std::sync::Mutex::new(std::collections::HashMap::new()), // Weak refs don't survive clones
1412        }
1413    }
1414}
1415
1416impl Default for FcFontCache {
1417    fn default() -> Self {
1418        Self {
1419            patterns: BTreeMap::new(),
1420            disk_fonts: BTreeMap::new(),
1421            memory_fonts: BTreeMap::new(),
1422            metadata: BTreeMap::new(),
1423            token_index: BTreeMap::new(),
1424            font_tokens: BTreeMap::new(),
1425            #[cfg(feature = "std")]
1426            chain_cache: std::sync::Mutex::new(std::collections::HashMap::new()),
1427            #[cfg(feature = "std")]
1428            shared_bytes: std::sync::Mutex::new(std::collections::HashMap::new()),
1429        }
1430    }
1431}
1432
1433impl FcFontCache {
1434    /// Helper method to add a font pattern to the token index
1435    pub(crate) fn index_pattern_tokens(&mut self, pattern: &FcPattern, id: FontId) {
1436        // Extract tokens from both name and family
1437        let mut all_tokens = Vec::new();
1438        
1439        if let Some(name) = &pattern.name {
1440            all_tokens.extend(Self::extract_font_name_tokens(name));
1441        }
1442        
1443        if let Some(family) = &pattern.family {
1444            all_tokens.extend(Self::extract_font_name_tokens(family));
1445        }
1446        
1447        // Convert tokens to lowercase and store them
1448        let tokens_lower: Vec<String> = all_tokens.iter().map(|t| t.to_lowercase()).collect();
1449        
1450        // Add each token (lowercase) to the index
1451        for token_lower in &tokens_lower {
1452            self.token_index
1453                .entry(token_lower.clone())
1454                .or_insert_with(alloc::collections::BTreeSet::new)
1455                .insert(id);
1456        }
1457        
1458        // Store pre-tokenized font name for fast lookup (no re-tokenization needed)
1459        self.font_tokens.insert(id, tokens_lower);
1460    }
1461
1462    /// Adds in-memory font files
1463    pub fn with_memory_fonts(&mut self, fonts: Vec<(FcPattern, FcFont)>) -> &mut Self {
1464        for (pattern, font) in fonts {
1465            let id = FontId::new();
1466            self.patterns.insert(pattern.clone(), id);
1467            self.metadata.insert(id, pattern.clone());
1468            self.memory_fonts.insert(id, font);
1469            self.index_pattern_tokens(&pattern, id);
1470        }
1471        self
1472    }
1473
1474    /// Adds a memory font with a specific ID (for testing)
1475    pub fn with_memory_font_with_id(
1476        &mut self,
1477        id: FontId,
1478        pattern: FcPattern,
1479        font: FcFont,
1480    ) -> &mut Self {
1481        self.patterns.insert(pattern.clone(), id);
1482        self.metadata.insert(id, pattern.clone());
1483        self.memory_fonts.insert(id, font);
1484        self.index_pattern_tokens(&pattern, id);
1485        self
1486    }
1487
1488    /// Get font data for a given font ID
1489    pub fn get_font_by_id<'a>(&'a self, id: &FontId) -> Option<FontSource<'a>> {
1490        // Check memory fonts first
1491        if let Some(font) = self.memory_fonts.get(id) {
1492            return Some(FontSource::Memory(font));
1493        }
1494        // Then check disk fonts
1495        if let Some(path) = self.disk_fonts.get(id) {
1496            return Some(FontSource::Disk(path));
1497        }
1498        None
1499    }
1500
1501    /// Get metadata directly from an ID
1502    pub fn get_metadata_by_id(&self, id: &FontId) -> Option<&FcPattern> {
1503        self.metadata.get(id)
1504    }
1505
1506    /// Get the font bytes for `id` as a shared [`FontBytes`].
1507    ///
1508    /// On disk the returned `Arc<FontBytes>` wraps an mmap of the file
1509    /// (`FontBytes::Mmapped`). Untouched pages of the file never count
1510    /// toward the process's RSS — for a font where layout shapes only
1511    /// a handful of glyphs, this is the difference between paying for
1512    /// the whole 4 MiB `.ttc` and paying for the cmap + a few glyf
1513    /// pages.
1514    ///
1515    /// In-memory fonts (`FontSource::Memory`) come back as
1516    /// `FontBytes::Owned`, since the bytes are already on the heap.
1517    ///
1518    /// Multiple `FontId`s backed by the same file content (every face
1519    /// of a `.ttc`, or two paths with identical bytes) return the
1520    /// *same* `Arc<FontBytes>` thanks to a content-hash → `Weak`
1521    /// cache. Bytes get unmapped automatically when the last consumer
1522    /// drops the Arc.
1523    ///
1524    /// `FontBytes` derefs to `[u8]`, so callers that only need
1525    /// `&[u8]` (allsorts, ttf-parser, …) can pass it through without
1526    /// thinking about the backing.
1527    ///
1528    /// Failure modes: returns `None` if the path is unknown, or the
1529    /// file no longer exists / cannot be opened, or the mmap call
1530    /// fails. Callers may retry with a fresh `get_font_bytes` if they
1531    /// suspect the file was replaced underneath them; the next call
1532    /// re-opens cleanly.
1533    #[cfg(feature = "std")]
1534    pub fn get_font_bytes(&self, id: &FontId) -> Option<std::sync::Arc<FontBytes>> {
1535        use std::sync::Arc;
1536        match self.get_font_by_id(id)? {
1537            FontSource::Memory(font) => Some(Arc::new(FontBytes::Owned(
1538                Arc::from(font.bytes.as_slice()),
1539            ))),
1540            FontSource::Disk(path) => {
1541                let hash = path.bytes_hash;
1542                if hash != 0 {
1543                    if let Ok(guard) = self.shared_bytes.lock() {
1544                        if let Some(weak) = guard.get(&hash) {
1545                            if let Some(arc) = weak.upgrade() {
1546                                return Some(arc);
1547                            }
1548                        }
1549                    }
1550                }
1551
1552                let arc = open_font_bytes_mmap(&path.path)?;
1553                if hash != 0 {
1554                    if let Ok(mut guard) = self.shared_bytes.lock() {
1555                        // Overwrite any stale weak ref that failed to upgrade.
1556                        guard.insert(hash, Arc::downgrade(&arc));
1557                    }
1558                }
1559                Some(arc)
1560            }
1561        }
1562    }
1563
1564    /// Returns an empty font cache (no_std / no filesystem).
1565    #[cfg(not(feature = "std"))]
1566    pub fn build() -> Self { Self::default() }
1567
1568    /// Scans system font directories using filename heuristics (no allsorts).
1569    #[cfg(all(feature = "std", not(feature = "parsing")))]
1570    pub fn build() -> Self { Self::build_from_filenames() }
1571
1572    /// Scans and parses all system fonts via allsorts for full metadata.
1573    #[cfg(all(feature = "std", feature = "parsing"))]
1574    pub fn build() -> Self { Self::build_inner(None) }
1575
1576    /// Filename-only scan: discovers fonts on disk, guesses metadata from
1577    /// the filename using [`config::tokenize_font_stem`].
1578    #[cfg(all(feature = "std", not(feature = "parsing")))]
1579    fn build_from_filenames() -> Self {
1580        let mut cache = Self::default();
1581        for dir in crate::config::font_directories(OperatingSystem::current()) {
1582            for path in FcCollectFontFilesRecursive(dir) {
1583                let pattern = match pattern_from_filename(&path) {
1584                    Some(p) => p,
1585                    None => continue,
1586                };
1587                let id = FontId::new();
1588                cache.disk_fonts.insert(id, FcFontPath {
1589                    path: path.to_string_lossy().to_string(),
1590                    font_index: 0,
1591                    // Filename-only scan — we never read the bytes,
1592                    // so there's no dedup key. Leave as 0.
1593                    bytes_hash: 0,
1594                });
1595                cache.index_pattern_tokens(&pattern, id);
1596                cache.metadata.insert(id, pattern.clone());
1597                cache.patterns.insert(pattern, id);
1598            }
1599        }
1600        cache
1601    }
1602    
1603    /// Builds a font cache with only specific font families (and their fallbacks).
1604    /// 
1605    /// This is a performance optimization for applications that know ahead of time
1606    /// which fonts they need. Instead of scanning all system fonts (which can be slow
1607    /// on systems with many fonts), only fonts matching the specified families are loaded.
1608    /// 
1609    /// Generic family names like "sans-serif", "serif", "monospace" are expanded
1610    /// to OS-specific font names (e.g., "sans-serif" on macOS becomes "Helvetica Neue", 
1611    /// "San Francisco", etc.).
1612    /// 
1613    /// **Note**: This will NOT automatically load fallback fonts for scripts not covered
1614    /// by the requested families. If you need Arabic, CJK, or emoji support, either:
1615    /// - Add those families explicitly to the filter
1616    /// - Use `with_memory_fonts()` to add bundled fonts
1617    /// - Use `build()` to load all system fonts
1618    /// 
1619    /// # Arguments
1620    /// * `families` - Font family names to load (e.g., ["Arial", "sans-serif"])
1621    /// 
1622    /// # Example
1623    /// ```ignore
1624    /// // Only load Arial and sans-serif fallback fonts
1625    /// let cache = FcFontCache::build_with_families(&["Arial", "sans-serif"]);
1626    /// ```
1627    #[cfg(all(feature = "std", feature = "parsing"))]
1628    pub fn build_with_families(families: &[impl AsRef<str>]) -> Self {
1629        // Expand generic families to OS-specific names
1630        let os = OperatingSystem::current();
1631        let mut target_families: Vec<String> = Vec::new();
1632        
1633        for family in families {
1634            let family_str = family.as_ref();
1635            let expanded = os.expand_generic_family(family_str, &[]);
1636            if expanded.is_empty() || (expanded.len() == 1 && expanded[0] == family_str) {
1637                target_families.push(family_str.to_string());
1638            } else {
1639                target_families.extend(expanded);
1640            }
1641        }
1642        
1643        Self::build_inner(Some(&target_families))
1644    }
1645    
1646    /// Inner build function that handles both filtered and unfiltered font loading.
1647    /// 
1648    /// # Arguments
1649    /// * `family_filter` - If Some, only load fonts matching these family names.
1650    ///                     If None, load all fonts.
1651    #[cfg(all(feature = "std", feature = "parsing"))]
1652    fn build_inner(family_filter: Option<&[String]>) -> Self {
1653        let mut cache = FcFontCache::default();
1654        
1655        // Normalize filter families for matching
1656        let filter_normalized: Option<Vec<String>> = family_filter.map(|families| {
1657            families
1658                .iter()
1659                .map(|f| crate::utils::normalize_family_name(f))
1660                .collect()
1661        });
1662
1663        // Helper closure to check if a pattern matches the filter
1664        let matches_filter = |pattern: &FcPattern| -> bool {
1665            match &filter_normalized {
1666                None => true, // No filter = accept all
1667                Some(targets) => {
1668                    pattern.name.as_ref().map_or(false, |name| {
1669                        let name_norm = crate::utils::normalize_family_name(name);
1670                        targets.iter().any(|target| name_norm.contains(target))
1671                    }) || pattern.family.as_ref().map_or(false, |family| {
1672                        let family_norm = crate::utils::normalize_family_name(family);
1673                        targets.iter().any(|target| family_norm.contains(target))
1674                    })
1675                }
1676            }
1677        };
1678
1679        #[cfg(target_os = "linux")]
1680        {
1681            if let Some((font_entries, render_configs)) = FcScanDirectories() {
1682                for (mut pattern, path) in font_entries {
1683                    if matches_filter(&pattern) {
1684                        // Apply per-font render config if a matching family rule exists
1685                        if let Some(family) = pattern.name.as_ref().or(pattern.family.as_ref()) {
1686                            if let Some(rc) = render_configs.get(family) {
1687                                pattern.render_config = rc.clone();
1688                            }
1689                        }
1690                        let id = FontId::new();
1691                        cache.patterns.insert(pattern.clone(), id);
1692                        cache.metadata.insert(id, pattern.clone());
1693                        cache.disk_fonts.insert(id, path);
1694                        cache.index_pattern_tokens(&pattern, id);
1695                    }
1696                }
1697            }
1698        }
1699
1700        #[cfg(target_os = "windows")]
1701        {
1702            let system_root = std::env::var("SystemRoot")
1703                .or_else(|_| std::env::var("WINDIR"))
1704                .unwrap_or_else(|_| "C:\\Windows".to_string());
1705            
1706            let user_profile = std::env::var("USERPROFILE")
1707                .unwrap_or_else(|_| "C:\\Users\\Default".to_string());
1708            
1709            let font_dirs = vec![
1710                (None, format!("{}\\Fonts\\", system_root)),
1711                (None, format!("{}\\AppData\\Local\\Microsoft\\Windows\\Fonts\\", user_profile)),
1712            ];
1713
1714            let font_entries = FcScanDirectoriesInner(&font_dirs);
1715            for (pattern, path) in font_entries {
1716                if matches_filter(&pattern) {
1717                    let id = FontId::new();
1718                    cache.patterns.insert(pattern.clone(), id);
1719                    cache.metadata.insert(id, pattern.clone());
1720                    cache.disk_fonts.insert(id, path);
1721                    cache.index_pattern_tokens(&pattern, id);
1722                }
1723            }
1724        }
1725
1726        #[cfg(target_os = "macos")]
1727        {
1728            let font_dirs = vec![
1729                (None, "~/Library/Fonts".to_owned()),
1730                (None, "/System/Library/Fonts".to_owned()),
1731                (None, "/Library/Fonts".to_owned()),
1732                (None, "/System/Library/AssetsV2".to_owned()),
1733            ];
1734
1735            let font_entries = FcScanDirectoriesInner(&font_dirs);
1736            for (pattern, path) in font_entries {
1737                if matches_filter(&pattern) {
1738                    let id = FontId::new();
1739                    cache.patterns.insert(pattern.clone(), id);
1740                    cache.metadata.insert(id, pattern.clone());
1741                    cache.disk_fonts.insert(id, path);
1742                    cache.index_pattern_tokens(&pattern, id);
1743                }
1744            }
1745        }
1746
1747        cache
1748    }
1749    
1750    /// Check if a font ID is a memory font (preferred over disk fonts)
1751    pub fn is_memory_font(&self, id: &FontId) -> bool {
1752        self.memory_fonts.contains_key(id)
1753    }
1754
1755    /// Returns the list of fonts and font patterns
1756    pub fn list(&self) -> Vec<(&FcPattern, FontId)> {
1757        self.patterns
1758            .iter()
1759            .map(|(pattern, id)| (pattern, *id))
1760            .collect()
1761    }
1762
1763    /// Returns true if the cache contains no font patterns
1764    pub fn is_empty(&self) -> bool {
1765        self.patterns.is_empty()
1766    }
1767
1768    /// Returns the number of font patterns in the cache
1769    pub fn len(&self) -> usize {
1770        self.patterns.len()
1771    }
1772
1773    /// Queries a font from the in-memory cache, returns the first found font (early return)
1774    /// Memory fonts are always preferred over disk fonts with the same match quality.
1775    pub fn query(&self, pattern: &FcPattern, trace: &mut Vec<TraceMsg>) -> Option<FontMatch> {
1776        let mut matches = Vec::new();
1777
1778        for (stored_pattern, id) in &self.patterns {
1779            if Self::query_matches_internal(stored_pattern, pattern, trace) {
1780                let metadata = self.metadata.get(id).unwrap_or(stored_pattern);
1781                
1782                // Calculate Unicode compatibility score
1783                let unicode_compatibility = if pattern.unicode_ranges.is_empty() {
1784                    // No specific Unicode requirements, use general coverage
1785                    Self::calculate_unicode_coverage(&metadata.unicode_ranges) as i32
1786                } else {
1787                    // Calculate how well this font covers the requested Unicode ranges
1788                    Self::calculate_unicode_compatibility(&pattern.unicode_ranges, &metadata.unicode_ranges)
1789                };
1790                
1791                let style_score = Self::calculate_style_score(pattern, metadata);
1792                
1793                // Memory fonts get a bonus to prefer them over disk fonts
1794                let is_memory = self.memory_fonts.contains_key(id);
1795                
1796                matches.push((*id, unicode_compatibility, style_score, metadata.clone(), is_memory));
1797            }
1798        }
1799
1800        // Sort by: 1. Memory font (preferred), 2. Unicode compatibility, 3. Style score
1801        matches.sort_by(|a, b| {
1802            // Memory fonts first
1803            b.4.cmp(&a.4)
1804                .then_with(|| b.1.cmp(&a.1)) // Unicode compatibility (higher is better)
1805                .then_with(|| a.2.cmp(&b.2)) // Style score (lower is better)
1806        });
1807
1808        matches.first().map(|(id, _, _, metadata, _)| {
1809            FontMatch {
1810                id: *id,
1811                unicode_ranges: metadata.unicode_ranges.clone(),
1812                fallbacks: Vec::new(), // Fallbacks computed lazily via compute_fallbacks()
1813            }
1814        })
1815    }
1816
1817    /// Queries all fonts matching a pattern (internal use only)
1818    /// 
1819    /// Note: This function is now private. Use resolve_font_chain() to build a font fallback chain,
1820    /// then call FontFallbackChain::query_for_text() to resolve fonts for specific text.
1821    fn query_internal(&self, pattern: &FcPattern, trace: &mut Vec<TraceMsg>) -> Vec<FontMatch> {
1822        let mut matches = Vec::new();
1823
1824        for (stored_pattern, id) in &self.patterns {
1825            if Self::query_matches_internal(stored_pattern, pattern, trace) {
1826                let metadata = self.metadata.get(id).unwrap_or(stored_pattern);
1827                
1828                // Calculate Unicode compatibility score
1829                let unicode_compatibility = if pattern.unicode_ranges.is_empty() {
1830                    Self::calculate_unicode_coverage(&metadata.unicode_ranges) as i32
1831                } else {
1832                    Self::calculate_unicode_compatibility(&pattern.unicode_ranges, &metadata.unicode_ranges)
1833                };
1834                
1835                let style_score = Self::calculate_style_score(pattern, metadata);
1836                matches.push((*id, unicode_compatibility, style_score, metadata.clone()));
1837            }
1838        }
1839
1840        // Sort by style score (lowest first), THEN by Unicode compatibility (highest first)
1841        // Style matching (weight, italic, etc.) is now the primary criterion
1842        // Deterministic tiebreaker: prefer non-italic, then alphabetical by name
1843        matches.sort_by(|a, b| {
1844            a.2.cmp(&b.2) // Style score (lower is better)
1845                .then_with(|| b.1.cmp(&a.1)) // Unicode compatibility (higher is better)
1846                .then_with(|| a.3.italic.cmp(&b.3.italic)) // Prefer non-italic
1847                .then_with(|| a.3.name.cmp(&b.3.name)) // Alphabetical tiebreaker
1848        });
1849
1850        matches
1851            .into_iter()
1852            .map(|(id, _, _, metadata)| {
1853                FontMatch {
1854                    id,
1855                    unicode_ranges: metadata.unicode_ranges.clone(),
1856                    fallbacks: Vec::new(), // Fallbacks computed lazily via compute_fallbacks()
1857                }
1858            })
1859            .collect()
1860    }
1861
1862    /// Compute fallback fonts for a given font
1863    /// This is a lazy operation that can be expensive - only call when actually needed
1864    /// (e.g., for FFI or debugging, not needed for resolve_char)
1865    pub fn compute_fallbacks(
1866        &self,
1867        font_id: &FontId,
1868        trace: &mut Vec<TraceMsg>,
1869    ) -> Vec<FontMatchNoFallback> {
1870        // Get the pattern for this font
1871        let pattern = match self.metadata.get(font_id) {
1872            Some(p) => p,
1873            None => return Vec::new(),
1874        };
1875        
1876        self.compute_fallbacks_for_pattern(pattern, Some(font_id), trace)
1877    }
1878    
1879    fn compute_fallbacks_for_pattern(
1880        &self,
1881        pattern: &FcPattern,
1882        exclude_id: Option<&FontId>,
1883        _trace: &mut Vec<TraceMsg>,
1884    ) -> Vec<FontMatchNoFallback> {
1885        let mut candidates = Vec::new();
1886
1887        // Collect all potential fallbacks (excluding original pattern)
1888        for (stored_pattern, id) in &self.patterns {
1889            // Skip if this is the original font
1890            if exclude_id.is_some() && exclude_id.unwrap() == id {
1891                continue;
1892            }
1893
1894            // Check if this font supports any of the unicode ranges
1895            if !stored_pattern.unicode_ranges.is_empty() && !pattern.unicode_ranges.is_empty() {
1896                // Calculate Unicode compatibility
1897                let unicode_compatibility = Self::calculate_unicode_compatibility(
1898                    &pattern.unicode_ranges,
1899                    &stored_pattern.unicode_ranges
1900                );
1901                
1902                // Only include if there's actual overlap
1903                if unicode_compatibility > 0 {
1904                    let style_score = Self::calculate_style_score(pattern, stored_pattern);
1905                    candidates.push((
1906                        FontMatchNoFallback {
1907                            id: *id,
1908                            unicode_ranges: stored_pattern.unicode_ranges.clone(),
1909                        },
1910                        unicode_compatibility,
1911                        style_score,
1912                        stored_pattern.clone(),
1913                    ));
1914                }
1915            } else if pattern.unicode_ranges.is_empty() && !stored_pattern.unicode_ranges.is_empty() {
1916                // No specific Unicode requirements, use general coverage
1917                let coverage = Self::calculate_unicode_coverage(&stored_pattern.unicode_ranges) as i32;
1918                let style_score = Self::calculate_style_score(pattern, stored_pattern);
1919                candidates.push((
1920                    FontMatchNoFallback {
1921                        id: *id,
1922                        unicode_ranges: stored_pattern.unicode_ranges.clone(),
1923                    },
1924                    coverage,
1925                    style_score,
1926                    stored_pattern.clone(),
1927                ));
1928            }
1929        }
1930
1931        // Sort by Unicode compatibility (highest first), THEN by style score (lowest first)
1932        candidates.sort_by(|a, b| {
1933            b.1.cmp(&a.1)
1934                .then_with(|| a.2.cmp(&b.2))
1935        });
1936
1937        // Deduplicate by keeping only the best match per unique unicode range
1938        let mut seen_ranges = Vec::new();
1939        let mut deduplicated = Vec::new();
1940
1941        for (id, _, _, pattern) in candidates {
1942            let mut is_new_range = false;
1943
1944            for range in &pattern.unicode_ranges {
1945                if !seen_ranges.iter().any(|r: &UnicodeRange| r.overlaps(range)) {
1946                    seen_ranges.push(*range);
1947                    is_new_range = true;
1948                }
1949            }
1950
1951            if is_new_range {
1952                deduplicated.push(id);
1953            }
1954        }
1955
1956        deduplicated
1957    }
1958
1959    /// Get in-memory font data
1960    pub fn get_memory_font(&self, id: &FontId) -> Option<&FcFont> {
1961        self.memory_fonts.get(id)
1962    }
1963
1964    /// Check if a pattern matches the query, with detailed tracing
1965    fn trace_path(k: &FcPattern) -> String {
1966        k.name.as_ref().cloned().unwrap_or_else(|| "<unknown>".to_string())
1967    }
1968
1969    pub fn query_matches_internal(
1970        k: &FcPattern,
1971        pattern: &FcPattern,
1972        trace: &mut Vec<TraceMsg>,
1973    ) -> bool {
1974        // Check name - substring match
1975        if let Some(ref name) = pattern.name {
1976            if !k.name.as_ref().map_or(false, |kn| kn.contains(name)) {
1977                trace.push(TraceMsg {
1978                    level: TraceLevel::Info,
1979                    path: Self::trace_path(k),
1980                    reason: MatchReason::NameMismatch {
1981                        requested: pattern.name.clone(),
1982                        found: k.name.clone(),
1983                    },
1984                });
1985                return false;
1986            }
1987        }
1988
1989        // Check family - substring match
1990        if let Some(ref family) = pattern.family {
1991            if !k.family.as_ref().map_or(false, |kf| kf.contains(family)) {
1992                trace.push(TraceMsg {
1993                    level: TraceLevel::Info,
1994                    path: Self::trace_path(k),
1995                    reason: MatchReason::FamilyMismatch {
1996                        requested: pattern.family.clone(),
1997                        found: k.family.clone(),
1998                    },
1999                });
2000                return false;
2001            }
2002        }
2003
2004        // Check style properties
2005        let style_properties = [
2006            (
2007                "italic",
2008                pattern.italic.needs_to_match(),
2009                pattern.italic.matches(&k.italic),
2010            ),
2011            (
2012                "oblique",
2013                pattern.oblique.needs_to_match(),
2014                pattern.oblique.matches(&k.oblique),
2015            ),
2016            (
2017                "bold",
2018                pattern.bold.needs_to_match(),
2019                pattern.bold.matches(&k.bold),
2020            ),
2021            (
2022                "monospace",
2023                pattern.monospace.needs_to_match(),
2024                pattern.monospace.matches(&k.monospace),
2025            ),
2026            (
2027                "condensed",
2028                pattern.condensed.needs_to_match(),
2029                pattern.condensed.matches(&k.condensed),
2030            ),
2031        ];
2032
2033        for (property_name, needs_to_match, matches) in style_properties {
2034            if needs_to_match && !matches {
2035                let (requested, found) = match property_name {
2036                    "italic" => (format!("{:?}", pattern.italic), format!("{:?}", k.italic)),
2037                    "oblique" => (format!("{:?}", pattern.oblique), format!("{:?}", k.oblique)),
2038                    "bold" => (format!("{:?}", pattern.bold), format!("{:?}", k.bold)),
2039                    "monospace" => (
2040                        format!("{:?}", pattern.monospace),
2041                        format!("{:?}", k.monospace),
2042                    ),
2043                    "condensed" => (
2044                        format!("{:?}", pattern.condensed),
2045                        format!("{:?}", k.condensed),
2046                    ),
2047                    _ => (String::new(), String::new()),
2048                };
2049
2050                trace.push(TraceMsg {
2051                    level: TraceLevel::Info,
2052                    path: Self::trace_path(k),
2053                    reason: MatchReason::StyleMismatch {
2054                        property: property_name,
2055                        requested,
2056                        found,
2057                    },
2058                });
2059                return false;
2060            }
2061        }
2062
2063        // Check weight - hard filter if non-normal weight is requested
2064        if pattern.weight != FcWeight::Normal && pattern.weight != k.weight {
2065            trace.push(TraceMsg {
2066                level: TraceLevel::Info,
2067                path: Self::trace_path(k),
2068                reason: MatchReason::WeightMismatch {
2069                    requested: pattern.weight,
2070                    found: k.weight,
2071                },
2072            });
2073            return false;
2074        }
2075
2076        // Check stretch - hard filter if non-normal stretch is requested
2077        if pattern.stretch != FcStretch::Normal && pattern.stretch != k.stretch {
2078            trace.push(TraceMsg {
2079                level: TraceLevel::Info,
2080                path: Self::trace_path(k),
2081                reason: MatchReason::StretchMismatch {
2082                    requested: pattern.stretch,
2083                    found: k.stretch,
2084                },
2085            });
2086            return false;
2087        }
2088
2089        // Check unicode ranges if specified
2090        if !pattern.unicode_ranges.is_empty() {
2091            let mut has_overlap = false;
2092
2093            for p_range in &pattern.unicode_ranges {
2094                for k_range in &k.unicode_ranges {
2095                    if p_range.overlaps(k_range) {
2096                        has_overlap = true;
2097                        break;
2098                    }
2099                }
2100                if has_overlap {
2101                    break;
2102                }
2103            }
2104
2105            if !has_overlap {
2106                trace.push(TraceMsg {
2107                    level: TraceLevel::Info,
2108                    path: Self::trace_path(k),
2109                    reason: MatchReason::UnicodeRangeMismatch {
2110                        character: '\0', // No specific character to report
2111                        ranges: k.unicode_ranges.clone(),
2112                    },
2113                });
2114                return false;
2115            }
2116        }
2117
2118        true
2119    }
2120    
2121    /// Resolve a complete font fallback chain for a CSS font-family stack
2122    /// This is the main entry point for font resolution with caching
2123    /// Automatically expands generic CSS families (serif, sans-serif, monospace) to OS-specific fonts
2124    /// 
2125    /// # Arguments
2126    /// * `font_families` - CSS font-family stack (e.g., ["Arial", "sans-serif"])
2127    /// * `text` - The text to render (used to extract Unicode ranges)
2128    /// * `weight` - Font weight
2129    /// * `italic` - Italic style requirement
2130    /// * `oblique` - Oblique style requirement
2131    /// * `trace` - Debug trace messages
2132    /// 
2133    /// # Returns
2134    /// A complete font fallback chain with CSS fallbacks and Unicode fallbacks
2135    /// 
2136    /// # Example
2137    /// ```no_run
2138    /// # use rust_fontconfig::{FcFontCache, FcWeight, PatternMatch};
2139    /// let cache = FcFontCache::build();
2140    /// let families = vec!["Arial".to_string(), "sans-serif".to_string()];
2141    /// let chain = cache.resolve_font_chain(&families, FcWeight::Normal, 
2142    ///                                       PatternMatch::DontCare, PatternMatch::DontCare, 
2143    ///                                       &mut Vec::new());
2144    /// // On macOS: families expanded to ["Arial", "San Francisco", "Helvetica Neue", "Lucida Grande"]
2145    /// ```
2146    #[cfg(feature = "std")]
2147    pub fn resolve_font_chain(
2148        &self,
2149        font_families: &[String],
2150        weight: FcWeight,
2151        italic: PatternMatch,
2152        oblique: PatternMatch,
2153        trace: &mut Vec<TraceMsg>,
2154    ) -> FontFallbackChain {
2155        self.resolve_font_chain_with_os(font_families, weight, italic, oblique, trace, OperatingSystem::current())
2156    }
2157    
2158    /// Resolve font chain with explicit OS specification (useful for testing)
2159    #[cfg(feature = "std")]
2160    pub fn resolve_font_chain_with_os(
2161        &self,
2162        font_families: &[String],
2163        weight: FcWeight,
2164        italic: PatternMatch,
2165        oblique: PatternMatch,
2166        trace: &mut Vec<TraceMsg>,
2167        os: OperatingSystem,
2168    ) -> FontFallbackChain {
2169        self.resolve_font_chain_impl(font_families, weight, italic, oblique, None, trace, os)
2170    }
2171
2172    /// Resolve a font fallback chain, restricting Unicode fallbacks to the
2173    /// caller-supplied set of scripts (usually derived from the actual
2174    /// text content of the document).
2175    ///
2176    /// - `scripts_hint: None` → back-compat behaviour, equivalent to
2177    ///   [`FcFontCache::resolve_font_chain`]: pulls in fallback fonts for
2178    ///   the full [`DEFAULT_UNICODE_FALLBACK_SCRIPTS`] set.
2179    /// - `scripts_hint: Some(&[])` → no Unicode fallbacks attached. For
2180    ///   an ASCII-only page this avoids pulling Arial Unicode MS,
2181    ///   CJK fonts, etc. into memory when they're not needed.
2182    /// - `scripts_hint: Some(&[CJK])` → only CJK fallback attached.
2183    ///
2184    /// The chain cache is keyed so an ASCII-only resolution cannot be
2185    /// served from a slot populated by a default/all-scripts resolution.
2186    #[cfg(feature = "std")]
2187    pub fn resolve_font_chain_with_scripts(
2188        &self,
2189        font_families: &[String],
2190        weight: FcWeight,
2191        italic: PatternMatch,
2192        oblique: PatternMatch,
2193        scripts_hint: Option<&[UnicodeRange]>,
2194        trace: &mut Vec<TraceMsg>,
2195    ) -> FontFallbackChain {
2196        self.resolve_font_chain_impl(
2197            font_families, weight, italic, oblique, scripts_hint,
2198            trace, OperatingSystem::current(),
2199        )
2200    }
2201
2202    /// Shared entry used by [`resolve_font_chain_with_os`] and
2203    /// [`resolve_font_chain_with_scripts`]. Handles the cache lookup,
2204    /// generic-family expansion, and delegation to the uncached builder.
2205    #[cfg(feature = "std")]
2206    fn resolve_font_chain_impl(
2207        &self,
2208        font_families: &[String],
2209        weight: FcWeight,
2210        italic: PatternMatch,
2211        oblique: PatternMatch,
2212        scripts_hint: Option<&[UnicodeRange]>,
2213        trace: &mut Vec<TraceMsg>,
2214        os: OperatingSystem,
2215    ) -> FontFallbackChain {
2216        // Check cache FIRST - key uses original (unexpanded) families
2217        // plus a hash over the scripts_hint so ASCII-only callers don't
2218        // consume a slot filled by a default-scripts caller.
2219        let scripts_hint_hash = scripts_hint.map(hash_scripts_hint);
2220        let cache_key = FontChainCacheKey {
2221            font_families: font_families.to_vec(),
2222            weight,
2223            italic,
2224            oblique,
2225            scripts_hint_hash,
2226        };
2227
2228        if let Some(cached) = self.chain_cache.lock().ok().and_then(|c| c.get(&cache_key).cloned()) {
2229            return cached;
2230        }
2231
2232        // Expand generic CSS families to OS-specific fonts
2233        let expanded_families = expand_font_families(font_families, os, &[]);
2234
2235        // Build the chain
2236        let chain = self.resolve_font_chain_uncached(
2237            &expanded_families,
2238            weight,
2239            italic,
2240            oblique,
2241            scripts_hint,
2242            trace,
2243        );
2244
2245        // Cache the result
2246        if let Ok(mut cache) = self.chain_cache.lock() {
2247            cache.insert(cache_key, chain.clone());
2248        }
2249
2250        chain
2251    }
2252    
2253    /// Internal implementation without caching.
2254    ///
2255    /// `scripts_hint`:
2256    /// - `None` pulls in the full [`DEFAULT_UNICODE_FALLBACK_SCRIPTS`]
2257    ///   set (the original, back-compat behaviour).
2258    /// - `Some(&[])` attaches no Unicode fallbacks.
2259    /// - `Some(ranges)` attaches fallbacks only for those ranges.
2260    #[cfg(feature = "std")]
2261    fn resolve_font_chain_uncached(
2262        &self,
2263        font_families: &[String],
2264        weight: FcWeight,
2265        italic: PatternMatch,
2266        oblique: PatternMatch,
2267        scripts_hint: Option<&[UnicodeRange]>,
2268        trace: &mut Vec<TraceMsg>,
2269    ) -> FontFallbackChain {
2270        let mut css_fallbacks = Vec::new();
2271        
2272        // Resolve each CSS font-family to its system fallbacks
2273        for (_i, family) in font_families.iter().enumerate() {
2274            // Check if this is a generic font family
2275            let (pattern, is_generic) = if config::is_generic_family(family) {
2276                let monospace = if family.eq_ignore_ascii_case("monospace") {
2277                    PatternMatch::True
2278                } else {
2279                    PatternMatch::False
2280                };
2281                let pattern = FcPattern {
2282                    name: None,
2283                    weight,
2284                    italic,
2285                    oblique,
2286                    monospace,
2287                    unicode_ranges: Vec::new(),
2288                    ..Default::default()
2289                };
2290                (pattern, true)
2291            } else {
2292                // Specific font family name
2293                let pattern = FcPattern {
2294                    name: Some(family.clone()),
2295                    weight,
2296                    italic,
2297                    oblique,
2298                    unicode_ranges: Vec::new(),
2299                    ..Default::default()
2300                };
2301                (pattern, false)
2302            };
2303            
2304            // Use fuzzy matching for specific fonts (fast token-based lookup)
2305            // For generic families, use query (slower but necessary for property matching)
2306            let mut matches = if is_generic {
2307                // Generic families need full pattern matching
2308                self.query_internal(&pattern, trace)
2309            } else {
2310                // Specific font names: use fast token-based fuzzy matching
2311                self.fuzzy_query_by_name(family, weight, italic, oblique, &[], trace)
2312            };
2313            
2314            // For generic families, limit to top 5 fonts to avoid too many matches
2315            if is_generic && matches.len() > 5 {
2316                matches.truncate(5);
2317            }
2318            
2319            // Always add the CSS fallback group to preserve CSS ordering
2320            // even if no fonts were found for this family
2321            css_fallbacks.push(CssFallbackGroup {
2322                css_name: family.clone(),
2323                fonts: matches,
2324            });
2325        }
2326        
2327        // Populate unicode_fallbacks. CSS fallback fonts may falsely claim
2328        // coverage of a script via the OS/2 unicode-range bits without
2329        // actually having glyphs, so we supplement the CSS chain with an
2330        // explicit lookup for each requested script block. resolve_char()
2331        // prefers CSS fallbacks first (earlier in the chain wins).
2332        //
2333        // The set of script blocks to cover is caller-controlled via
2334        // `scripts_hint`: `None` keeps the back-compat DEFAULT_UNICODE_FALLBACK_SCRIPTS
2335        // behaviour (7 scripts) so existing `resolve_font_chain` consumers
2336        // stay unchanged; `Some(&[])` opts into "no unicode fallbacks at all"
2337        // for ASCII-only documents, eliminating the big CJK / Arabic fonts
2338        // from the resolved chain (and therefore from eager downstream parses).
2339        let important_ranges: &[UnicodeRange] =
2340            scripts_hint.unwrap_or(DEFAULT_UNICODE_FALLBACK_SCRIPTS);
2341        let unicode_fallbacks = if important_ranges.is_empty() {
2342            Vec::new()
2343        } else {
2344            let all_uncovered = vec![false; important_ranges.len()];
2345            self.find_unicode_fallbacks(
2346                important_ranges,
2347                &all_uncovered,
2348                &css_fallbacks,
2349                weight,
2350                italic,
2351                oblique,
2352                trace,
2353            )
2354        };
2355
2356        FontFallbackChain {
2357            css_fallbacks,
2358            unicode_fallbacks,
2359            original_stack: font_families.to_vec(),
2360        }
2361    }
2362    
2363    /// Extract Unicode ranges from text
2364    #[allow(dead_code)]
2365    fn extract_unicode_ranges(text: &str) -> Vec<UnicodeRange> {
2366        let mut chars: Vec<char> = text.chars().collect();
2367        chars.sort_unstable();
2368        chars.dedup();
2369        
2370        if chars.is_empty() {
2371            return Vec::new();
2372        }
2373        
2374        let mut ranges = Vec::new();
2375        let mut range_start = chars[0] as u32;
2376        let mut range_end = range_start;
2377        
2378        for &c in &chars[1..] {
2379            let codepoint = c as u32;
2380            if codepoint == range_end + 1 {
2381                range_end = codepoint;
2382            } else {
2383                ranges.push(UnicodeRange { start: range_start, end: range_end });
2384                range_start = codepoint;
2385                range_end = codepoint;
2386            }
2387        }
2388        
2389        ranges.push(UnicodeRange { start: range_start, end: range_end });
2390        ranges
2391    }
2392    
2393    /// Fuzzy query for fonts by name when exact match fails
2394    /// Uses intelligent token-based matching with inverted index for speed:
2395    /// 1. Break name into tokens (e.g., "NotoSansJP" -> ["noto", "sans", "jp"])
2396    /// 2. Use token_index to find candidate fonts via BTreeSet intersection
2397    /// 3. Score only the candidate fonts (instead of all 800+ patterns)
2398    /// 4. Prioritize fonts matching more tokens + Unicode coverage
2399    #[cfg(feature = "std")]
2400    fn fuzzy_query_by_name(
2401        &self,
2402        requested_name: &str,
2403        weight: FcWeight,
2404        italic: PatternMatch,
2405        oblique: PatternMatch,
2406        unicode_ranges: &[UnicodeRange],
2407        _trace: &mut Vec<TraceMsg>,
2408    ) -> Vec<FontMatch> {
2409        // Extract tokens from the requested name (e.g., "NotoSansJP" -> ["noto", "sans", "jp"])
2410        let tokens = Self::extract_font_name_tokens(requested_name);
2411        
2412        if tokens.is_empty() {
2413            return Vec::new();
2414        }
2415        
2416        // Convert tokens to lowercase for case-insensitive lookup
2417        let tokens_lower: Vec<String> = tokens.iter().map(|t| t.to_lowercase()).collect();
2418        
2419        // Progressive token matching strategy:
2420        // Start with first token, then progressively narrow down with each additional token
2421        // If adding a token results in 0 matches, use the previous (broader) set
2422        // Example: ["Noto"] -> 10 fonts, ["Noto","Sans"] -> 2 fonts, ["Noto","Sans","JP"] -> 0 fonts => use 2 fonts
2423        
2424        // Start with the first token
2425        let first_token = &tokens_lower[0];
2426        let mut candidate_ids = match self.token_index.get(first_token) {
2427            Some(ids) if !ids.is_empty() => ids.clone(),
2428            _ => {
2429                // First token not found - no fonts match, quit immediately
2430                return Vec::new();
2431            }
2432        };
2433        
2434        // Progressively narrow down with each additional token
2435        for token in &tokens_lower[1..] {
2436            if let Some(token_ids) = self.token_index.get(token) {
2437                // Calculate intersection
2438                let intersection: alloc::collections::BTreeSet<FontId> = 
2439                    candidate_ids.intersection(token_ids).copied().collect();
2440                
2441                if intersection.is_empty() {
2442                    // Adding this token results in 0 matches - keep previous set and stop
2443                    break;
2444                } else {
2445                    // Successfully narrowed down - use intersection
2446                    candidate_ids = intersection;
2447                }
2448            } else {
2449                // Token not in index - keep current set and stop
2450                break;
2451            }
2452        }
2453        
2454        // Now score only the candidate fonts (HUGE speedup!)
2455        let mut candidates = Vec::new();
2456        
2457        for id in candidate_ids {
2458            let pattern = match self.metadata.get(&id) {
2459                Some(p) => p,
2460                None => continue,
2461            };
2462            
2463            // Get pre-tokenized font name (already lowercase)
2464            let font_tokens_lower = match self.font_tokens.get(&id) {
2465                Some(tokens) => tokens,
2466                None => continue,
2467            };
2468            
2469            if font_tokens_lower.is_empty() {
2470                continue;
2471            }
2472            
2473            // Calculate token match score (how many requested tokens appear in font name)
2474            // Both tokens_lower and font_tokens_lower are already lowercase, so direct comparison
2475            let token_matches = tokens_lower.iter()
2476                .filter(|req_token| {
2477                    font_tokens_lower.iter().any(|font_token| {
2478                        // Both already lowercase — exact token match (index guarantees candidates)
2479                        font_token == *req_token
2480                    })
2481                })
2482                .count();
2483            
2484            // Skip if no tokens match (shouldn't happen due to index, but safety check)
2485            if token_matches == 0 {
2486                continue;
2487            }
2488            
2489            // Calculate token similarity score (0-100)
2490            let token_similarity = (token_matches * 100 / tokens.len()) as i32;
2491            
2492            // Calculate Unicode range similarity
2493            let unicode_similarity = if !unicode_ranges.is_empty() && !pattern.unicode_ranges.is_empty() {
2494                Self::calculate_unicode_compatibility(unicode_ranges, &pattern.unicode_ranges)
2495            } else {
2496                0
2497            };
2498            
2499            // CRITICAL: If we have Unicode requirements, ONLY accept fonts that cover them
2500            // A font with great name match but no Unicode coverage is useless
2501            if !unicode_ranges.is_empty() && unicode_similarity == 0 {
2502                continue;
2503            }
2504            
2505            let style_score = Self::calculate_style_score(&FcPattern {
2506                weight,
2507                italic,
2508                oblique,
2509                ..Default::default()
2510            }, pattern);
2511            
2512            candidates.push((
2513                id,
2514                token_similarity,
2515                unicode_similarity,
2516                style_score,
2517                pattern.clone(),
2518            ));
2519        }
2520        
2521        // Sort by:
2522        // 1. Token matches (more matches = better)
2523        // 2. Unicode compatibility (if ranges provided)
2524        // 3. Style score (lower is better)
2525        // 4. Deterministic tiebreaker: prefer non-italic, then by font name
2526        candidates.sort_by(|a, b| {
2527            if !unicode_ranges.is_empty() {
2528                // When we have Unicode requirements, prioritize coverage
2529                b.1.cmp(&a.1) // Token similarity (higher is better) - PRIMARY
2530                    .then_with(|| b.2.cmp(&a.2)) // Unicode similarity (higher is better) - SECONDARY
2531                    .then_with(|| a.3.cmp(&b.3)) // Style score (lower is better) - TERTIARY
2532                    .then_with(|| a.4.italic.cmp(&b.4.italic)) // Prefer non-italic (False < True)
2533                    .then_with(|| a.4.name.cmp(&b.4.name)) // Alphabetical by name
2534            } else {
2535                // No Unicode requirements, token similarity is primary
2536                b.1.cmp(&a.1) // Token similarity (higher is better)
2537                    .then_with(|| a.3.cmp(&b.3)) // Style score (lower is better)
2538                    .then_with(|| a.4.italic.cmp(&b.4.italic)) // Prefer non-italic (False < True)
2539                    .then_with(|| a.4.name.cmp(&b.4.name)) // Alphabetical by name
2540            }
2541        });
2542        
2543        // Take top 5 matches
2544        candidates.truncate(5);
2545        
2546        // Convert to FontMatch
2547        candidates
2548            .into_iter()
2549            .map(|(id, _token_sim, _unicode_sim, _style, pattern)| {
2550                FontMatch {
2551                    id,
2552                    unicode_ranges: pattern.unicode_ranges.clone(),
2553                    fallbacks: Vec::new(), // Fallbacks computed lazily via compute_fallbacks()
2554                }
2555            })
2556            .collect()
2557    }
2558    
2559    /// Extract tokens from a font name
2560    /// E.g., "NotoSansJP" -> ["Noto", "Sans", "JP"]
2561    /// E.g., "Noto Sans CJK JP" -> ["Noto", "Sans", "CJK", "JP"]
2562    pub fn extract_font_name_tokens(name: &str) -> Vec<String> {
2563        let mut tokens = Vec::new();
2564        let mut current_token = String::new();
2565        let mut last_was_lower = false;
2566        
2567        for c in name.chars() {
2568            if c.is_whitespace() || c == '-' || c == '_' {
2569                // Word separator
2570                if !current_token.is_empty() {
2571                    tokens.push(current_token.clone());
2572                    current_token.clear();
2573                }
2574                last_was_lower = false;
2575            } else if c.is_uppercase() && last_was_lower && !current_token.is_empty() {
2576                // CamelCase boundary (e.g., "Noto" | "Sans")
2577                tokens.push(current_token.clone());
2578                current_token.clear();
2579                current_token.push(c);
2580                last_was_lower = false;
2581            } else {
2582                current_token.push(c);
2583                last_was_lower = c.is_lowercase();
2584            }
2585        }
2586        
2587        if !current_token.is_empty() {
2588            tokens.push(current_token);
2589        }
2590        
2591        tokens
2592    }
2593    
2594    /// Find fonts to cover missing Unicode ranges
2595    /// Uses intelligent matching: prefers fonts with similar names to existing ones
2596    /// Early quits once all Unicode ranges are covered for performance
2597    fn find_unicode_fallbacks(
2598        &self,
2599        unicode_ranges: &[UnicodeRange],
2600        covered_chars: &[bool],
2601        existing_groups: &[CssFallbackGroup],
2602        _weight: FcWeight,
2603        _italic: PatternMatch,
2604        _oblique: PatternMatch,
2605        trace: &mut Vec<TraceMsg>,
2606    ) -> Vec<FontMatch> {
2607        // Extract uncovered ranges
2608        let mut uncovered_ranges = Vec::new();
2609        for (i, &covered) in covered_chars.iter().enumerate() {
2610            if !covered && i < unicode_ranges.len() {
2611                uncovered_ranges.push(unicode_ranges[i].clone());
2612            }
2613        }
2614        
2615        if uncovered_ranges.is_empty() {
2616            return Vec::new();
2617        }
2618
2619        // Query for fonts that cover these ranges.
2620        // Use DontCare for weight/italic/oblique — we want ANY font that covers
2621        // the missing characters, regardless of style. The similarity sort below
2622        // will prefer fonts matching the existing chain's style anyway.
2623        let pattern = FcPattern {
2624            name: None,
2625            weight: FcWeight::Normal, // Normal weight is not filtered by query_matches_internal (line 1836)
2626            italic: PatternMatch::DontCare,
2627            oblique: PatternMatch::DontCare,
2628            unicode_ranges: uncovered_ranges.clone(),
2629            ..Default::default()
2630        };
2631        
2632        let mut candidates = self.query_internal(&pattern, trace);
2633
2634        // Intelligent sorting: prefer fonts with similar names to existing ones
2635        // Extract font family prefixes from existing fonts (e.g., "Noto Sans" from "Noto Sans JP")
2636        let existing_prefixes: Vec<String> = existing_groups
2637            .iter()
2638            .flat_map(|group| {
2639                group.fonts.iter().filter_map(|font| {
2640                    self.get_metadata_by_id(&font.id)
2641                        .and_then(|meta| meta.family.clone())
2642                        .and_then(|family| {
2643                            // Extract prefix (e.g., "Noto Sans" from "Noto Sans JP")
2644                            family.split_whitespace()
2645                                .take(2)
2646                                .collect::<Vec<_>>()
2647                                .join(" ")
2648                                .into()
2649                        })
2650                })
2651            })
2652            .collect();
2653        
2654        // Sort candidates by:
2655        // 1. Name similarity to existing fonts (highest priority)
2656        // 2. Unicode coverage (secondary)
2657        candidates.sort_by(|a, b| {
2658            let a_meta = self.get_metadata_by_id(&a.id);
2659            let b_meta = self.get_metadata_by_id(&b.id);
2660            
2661            let a_score = Self::calculate_font_similarity_score(a_meta, &existing_prefixes);
2662            let b_score = Self::calculate_font_similarity_score(b_meta, &existing_prefixes);
2663            
2664            b_score.cmp(&a_score) // Higher score = better match
2665                .then_with(|| {
2666                    let a_coverage = Self::calculate_unicode_compatibility(&uncovered_ranges, &a.unicode_ranges);
2667                    let b_coverage = Self::calculate_unicode_compatibility(&uncovered_ranges, &b.unicode_ranges);
2668                    b_coverage.cmp(&a_coverage)
2669                })
2670        });
2671        
2672        // Early quit optimization: only take fonts until all ranges are covered
2673        let mut result = Vec::new();
2674        let mut remaining_uncovered: Vec<bool> = vec![true; uncovered_ranges.len()];
2675        
2676        for candidate in candidates {
2677            // Check which ranges this font covers
2678            let mut covers_new_range = false;
2679            
2680            for (i, range) in uncovered_ranges.iter().enumerate() {
2681                if remaining_uncovered[i] {
2682                    // Check if this font covers this range
2683                    for font_range in &candidate.unicode_ranges {
2684                        if font_range.overlaps(range) {
2685                            remaining_uncovered[i] = false;
2686                            covers_new_range = true;
2687                            break;
2688                        }
2689                    }
2690                }
2691            }
2692            
2693            // Only add fonts that cover at least one new range
2694            if covers_new_range {
2695                result.push(candidate);
2696                
2697                // Early quit: if all ranges are covered, stop
2698                if remaining_uncovered.iter().all(|&uncovered| !uncovered) {
2699                    break;
2700                }
2701            }
2702        }
2703        
2704        result
2705    }
2706    
2707    /// Calculate similarity score between a font and existing font prefixes
2708    /// Higher score = more similar
2709    fn calculate_font_similarity_score(
2710        font_meta: Option<&FcPattern>,
2711        existing_prefixes: &[String],
2712    ) -> i32 {
2713        let Some(meta) = font_meta else { return 0; };
2714        let Some(family) = &meta.family else { return 0; };
2715        
2716        // Check if this font's family matches any existing prefix
2717        for prefix in existing_prefixes {
2718            if family.starts_with(prefix) {
2719                return 100; // Strong match
2720            }
2721            if family.contains(prefix) {
2722                return 50; // Partial match
2723            }
2724        }
2725        
2726        0 // No match
2727    }
2728    
2729    /// Find fallback fonts for a given pattern
2730    // Helper to calculate total unicode coverage
2731    pub fn calculate_unicode_coverage(ranges: &[UnicodeRange]) -> u64 {
2732        ranges
2733            .iter()
2734            .map(|range| (range.end - range.start + 1) as u64)
2735            .sum()
2736    }
2737
2738    /// Calculate how well a font's Unicode ranges cover the requested ranges
2739    /// Returns a compatibility score (higher is better, 0 means no overlap)
2740    pub fn calculate_unicode_compatibility(
2741        requested: &[UnicodeRange],
2742        available: &[UnicodeRange],
2743    ) -> i32 {
2744        if requested.is_empty() {
2745            // No specific requirements, return total coverage
2746            return Self::calculate_unicode_coverage(available) as i32;
2747        }
2748        
2749        let mut total_coverage = 0u32;
2750        
2751        for req_range in requested {
2752            for avail_range in available {
2753                // Calculate overlap between requested and available ranges
2754                let overlap_start = req_range.start.max(avail_range.start);
2755                let overlap_end = req_range.end.min(avail_range.end);
2756                
2757                if overlap_start <= overlap_end {
2758                    // There is overlap
2759                    let overlap_size = overlap_end - overlap_start + 1;
2760                    total_coverage += overlap_size;
2761                }
2762            }
2763        }
2764        
2765        total_coverage as i32
2766    }
2767
2768    pub fn calculate_style_score(original: &FcPattern, candidate: &FcPattern) -> i32 {
2769
2770        let mut score = 0_i32;
2771
2772        // Weight calculation with special handling for bold property
2773        if (original.bold == PatternMatch::True && candidate.weight == FcWeight::Bold)
2774            || (original.bold == PatternMatch::False && candidate.weight != FcWeight::Bold)
2775        {
2776            // No weight penalty when bold is requested and font has Bold weight
2777            // No weight penalty when non-bold is requested and font has non-Bold weight
2778        } else {
2779            // Apply normal weight difference penalty
2780            let weight_diff = (original.weight as i32 - candidate.weight as i32).abs();
2781            score += weight_diff as i32;
2782        }
2783
2784        // Exact weight match bonus: reward fonts whose weight matches the request exactly,
2785        // with an extra bonus when both are Normal (the most common case for body text)
2786        if original.weight == candidate.weight {
2787            score -= 15;
2788            if original.weight == FcWeight::Normal {
2789                score -= 10; // Extra bonus for Normal-Normal match
2790            }
2791        }
2792
2793        // Stretch calculation with special handling for condensed property
2794        if (original.condensed == PatternMatch::True && candidate.stretch.is_condensed())
2795            || (original.condensed == PatternMatch::False && !candidate.stretch.is_condensed())
2796        {
2797            // No stretch penalty when condensed is requested and font has condensed stretch
2798            // No stretch penalty when non-condensed is requested and font has non-condensed stretch
2799        } else {
2800            // Apply normal stretch difference penalty
2801            let stretch_diff = (original.stretch as i32 - candidate.stretch as i32).abs();
2802            score += (stretch_diff * 100) as i32;
2803        }
2804
2805        // Handle style properties with standard penalties and bonuses
2806        let style_props = [
2807            (original.italic, candidate.italic, 300, 150),
2808            (original.oblique, candidate.oblique, 200, 100),
2809            (original.bold, candidate.bold, 300, 150),
2810            (original.monospace, candidate.monospace, 100, 50),
2811            (original.condensed, candidate.condensed, 100, 50),
2812        ];
2813
2814        for (orig, cand, mismatch_penalty, dontcare_penalty) in style_props {
2815            if orig.needs_to_match() {
2816                if orig == PatternMatch::False && cand == PatternMatch::DontCare {
2817                    // Requesting non-italic but font doesn't declare: small penalty
2818                    // (less than a full mismatch but more than a perfect match)
2819                    score += dontcare_penalty / 2;
2820                } else if !orig.matches(&cand) {
2821                    if cand == PatternMatch::DontCare {
2822                        score += dontcare_penalty;
2823                    } else {
2824                        score += mismatch_penalty;
2825                    }
2826                } else if orig == PatternMatch::True && cand == PatternMatch::True {
2827                    // Give bonus for exact True match
2828                    score -= 20;
2829                } else if orig == PatternMatch::False && cand == PatternMatch::False {
2830                    // Give bonus for exact False match (prefer explicitly non-italic
2831                    // over fonts with unknown/DontCare italic status)
2832                    score -= 20;
2833                }
2834            } else {
2835                // orig == DontCare: prefer "normal" fonts over styled ones.
2836                // When the caller doesn't specify italic/bold/etc., a font
2837                // that IS italic/bold should score slightly worse than one
2838                // that isn't, so Regular is chosen over Italic by default.
2839                if cand == PatternMatch::True {
2840                    score += dontcare_penalty / 3;
2841                }
2842            }
2843        }
2844
2845        // ── Name-based "base font" detection ──
2846        // The shorter the font name relative to its family, the more "basic" the
2847        // variant.  E.g. "System Font" (the base) should score better than
2848        // "System Font Regular Italic" (a variant) when the user hasn't
2849        // explicitly requested italic.
2850        if let (Some(name), Some(family)) = (&candidate.name, &candidate.family) {
2851            let name_lower = name.to_lowercase();
2852            let family_lower = family.to_lowercase();
2853
2854            // Strip the family prefix from the name to get the "extra" part
2855            let extra = if name_lower.starts_with(&family_lower) {
2856                name_lower[family_lower.len()..].to_string()
2857            } else {
2858                String::new()
2859            };
2860
2861            // Strip common neutral descriptors that don't indicate a style variant
2862            let stripped = extra
2863                .replace("regular", "")
2864                .replace("normal", "")
2865                .replace("book", "")
2866                .replace("roman", "");
2867            let stripped = stripped.trim();
2868
2869            if stripped.is_empty() {
2870                // This is a "base font" – name is just the family (± "Regular")
2871                score -= 50;
2872            } else {
2873                // Name has extra style descriptors – add a penalty per extra word
2874                let extra_words = stripped.split_whitespace().count();
2875                score += (extra_words as i32) * 25;
2876            }
2877        }
2878
2879        // ── Subfamily "Regular" bonus ──
2880        // Fonts whose OpenType subfamily is exactly "Regular" are the canonical
2881        // base variant and should be strongly preferred.
2882        if let Some(ref subfamily) = candidate.metadata.font_subfamily {
2883            let sf_lower = subfamily.to_lowercase();
2884            if sf_lower == "regular" {
2885                score -= 30;
2886            }
2887        }
2888
2889        score
2890    }
2891}
2892
2893#[cfg(all(feature = "std", feature = "parsing", target_os = "linux"))]
2894fn FcScanDirectories() -> Option<(Vec<(FcPattern, FcFontPath)>, BTreeMap<String, FcFontRenderConfig>)> {
2895    use std::fs;
2896    use std::path::Path;
2897
2898    const BASE_FONTCONFIG_PATH: &str = "/etc/fonts/fonts.conf";
2899
2900    if !Path::new(BASE_FONTCONFIG_PATH).exists() {
2901        return None;
2902    }
2903
2904    let mut font_paths = Vec::with_capacity(32);
2905    let mut paths_to_visit = vec![(None, PathBuf::from(BASE_FONTCONFIG_PATH))];
2906    let mut render_configs: BTreeMap<String, FcFontRenderConfig> = BTreeMap::new();
2907
2908    while let Some((prefix, path_to_visit)) = paths_to_visit.pop() {
2909        let path = match process_path(&prefix, path_to_visit, true) {
2910            Some(path) => path,
2911            None => continue,
2912        };
2913
2914        let metadata = match fs::metadata(&path) {
2915            Ok(metadata) => metadata,
2916            Err(_) => continue,
2917        };
2918
2919        if metadata.is_file() {
2920            let xml_utf8 = match fs::read_to_string(&path) {
2921                Ok(xml_utf8) => xml_utf8,
2922                Err(_) => continue,
2923            };
2924
2925            if ParseFontsConf(&xml_utf8, &mut paths_to_visit, &mut font_paths).is_none() {
2926                continue;
2927            }
2928
2929            // Also parse render config blocks from this file
2930            ParseFontsConfRenderConfig(&xml_utf8, &mut render_configs);
2931        } else if metadata.is_dir() {
2932            let dir_entries = match fs::read_dir(&path) {
2933                Ok(dir_entries) => dir_entries,
2934                Err(_) => continue,
2935            };
2936
2937            for entry_result in dir_entries {
2938                let entry = match entry_result {
2939                    Ok(entry) => entry,
2940                    Err(_) => continue,
2941                };
2942
2943                let entry_path = entry.path();
2944
2945                // `fs::metadata` traverses symbolic links
2946                let entry_metadata = match fs::metadata(&entry_path) {
2947                    Ok(metadata) => metadata,
2948                    Err(_) => continue,
2949                };
2950
2951                if !entry_metadata.is_file() {
2952                    continue;
2953                }
2954
2955                let file_name = match entry_path.file_name() {
2956                    Some(name) => name,
2957                    None => continue,
2958                };
2959
2960                let file_name_str = file_name.to_string_lossy();
2961                if file_name_str.starts_with(|c: char| c.is_ascii_digit())
2962                    && file_name_str.ends_with(".conf")
2963                {
2964                    paths_to_visit.push((None, entry_path));
2965                }
2966            }
2967        }
2968    }
2969
2970    if font_paths.is_empty() {
2971        return None;
2972    }
2973
2974    Some((FcScanDirectoriesInner(&font_paths), render_configs))
2975}
2976
2977// Parses the fonts.conf file
2978#[cfg(all(feature = "std", feature = "parsing", target_os = "linux"))]
2979fn ParseFontsConf(
2980    input: &str,
2981    paths_to_visit: &mut Vec<(Option<String>, PathBuf)>,
2982    font_paths: &mut Vec<(Option<String>, String)>,
2983) -> Option<()> {
2984    use xmlparser::Token::*;
2985    use xmlparser::Tokenizer;
2986
2987    const TAG_INCLUDE: &str = "include";
2988    const TAG_DIR: &str = "dir";
2989    const ATTRIBUTE_PREFIX: &str = "prefix";
2990
2991    let mut current_prefix: Option<&str> = None;
2992    let mut current_path: Option<&str> = None;
2993    let mut is_in_include = false;
2994    let mut is_in_dir = false;
2995
2996    for token_result in Tokenizer::from(input) {
2997        let token = match token_result {
2998            Ok(token) => token,
2999            Err(_) => return None,
3000        };
3001
3002        match token {
3003            ElementStart { local, .. } => {
3004                if is_in_include || is_in_dir {
3005                    return None; /* error: nested tags */
3006                }
3007
3008                match local.as_str() {
3009                    TAG_INCLUDE => {
3010                        is_in_include = true;
3011                    }
3012                    TAG_DIR => {
3013                        is_in_dir = true;
3014                    }
3015                    _ => continue,
3016                }
3017
3018                current_path = None;
3019            }
3020            Text { text, .. } => {
3021                let text = text.as_str().trim();
3022                if text.is_empty() {
3023                    continue;
3024                }
3025                if is_in_include || is_in_dir {
3026                    current_path = Some(text);
3027                }
3028            }
3029            Attribute { local, value, .. } => {
3030                if !is_in_include && !is_in_dir {
3031                    continue;
3032                }
3033                // attribute on <include> or <dir> node
3034                if local.as_str() == ATTRIBUTE_PREFIX {
3035                    current_prefix = Some(value.as_str());
3036                }
3037            }
3038            ElementEnd { end, .. } => {
3039                let end_tag = match end {
3040                    xmlparser::ElementEnd::Close(_, a) => a,
3041                    _ => continue,
3042                };
3043
3044                match end_tag.as_str() {
3045                    TAG_INCLUDE => {
3046                        if !is_in_include {
3047                            continue;
3048                        }
3049
3050                        if let Some(current_path) = current_path.as_ref() {
3051                            paths_to_visit.push((
3052                                current_prefix.map(ToOwned::to_owned),
3053                                PathBuf::from(*current_path),
3054                            ));
3055                        }
3056                    }
3057                    TAG_DIR => {
3058                        if !is_in_dir {
3059                            continue;
3060                        }
3061
3062                        if let Some(current_path) = current_path.as_ref() {
3063                            font_paths.push((
3064                                current_prefix.map(ToOwned::to_owned),
3065                                (*current_path).to_owned(),
3066                            ));
3067                        }
3068                    }
3069                    _ => continue,
3070                }
3071
3072                is_in_include = false;
3073                is_in_dir = false;
3074                current_path = None;
3075                current_prefix = None;
3076            }
3077            _ => {}
3078        }
3079    }
3080
3081    Some(())
3082}
3083
3084/// Parses `<match target="font">` blocks from fonts.conf XML and returns
3085/// a map from family name to per-font rendering configuration.
3086///
3087/// Example fonts.conf snippet that this handles:
3088/// ```xml
3089/// <match target="font">
3090///   <test name="family"><string>Inconsolata</string></test>
3091///   <edit name="antialias" mode="assign"><bool>true</bool></edit>
3092///   <edit name="hintstyle" mode="assign"><const>hintslight</const></edit>
3093/// </match>
3094/// ```
3095#[cfg(all(feature = "std", feature = "parsing", target_os = "linux"))]
3096fn ParseFontsConfRenderConfig(
3097    input: &str,
3098    configs: &mut BTreeMap<String, FcFontRenderConfig>,
3099) {
3100    use xmlparser::Token::*;
3101    use xmlparser::Tokenizer;
3102
3103    // Parser state machine
3104    #[derive(Clone, Copy, PartialEq)]
3105    enum State {
3106        /// Outside any relevant block
3107        Idle,
3108        /// Inside <match target="font">
3109        InMatchFont,
3110        /// Inside <test name="family"> within a match block
3111        InTestFamily,
3112        /// Inside <edit name="..."> within a match block
3113        InEdit,
3114        /// Inside a value element (<bool>, <double>, <const>, <string>) within <edit> or <test>
3115        InValue,
3116    }
3117
3118    let mut state = State::Idle;
3119    let mut match_is_font_target = false;
3120    let mut current_family: Option<String> = None;
3121    let mut current_edit_name: Option<String> = None;
3122    let mut current_value: Option<String> = None;
3123    let mut value_tag: Option<String> = None;
3124    let mut config = FcFontRenderConfig::default();
3125    let mut in_test = false;
3126    let mut test_name: Option<String> = None;
3127
3128    for token_result in Tokenizer::from(input) {
3129        let token = match token_result {
3130            Ok(token) => token,
3131            Err(_) => continue,
3132        };
3133
3134        match token {
3135            ElementStart { local, .. } => {
3136                let tag = local.as_str();
3137                match tag {
3138                    "match" => {
3139                        // Reset state for a new match block
3140                        match_is_font_target = false;
3141                        current_family = None;
3142                        config = FcFontRenderConfig::default();
3143                    }
3144                    "test" if state == State::InMatchFont => {
3145                        in_test = true;
3146                        test_name = None;
3147                    }
3148                    "edit" if state == State::InMatchFont => {
3149                        current_edit_name = None;
3150                    }
3151                    "bool" | "double" | "const" | "string" | "int" => {
3152                        if state == State::InTestFamily || state == State::InEdit {
3153                            value_tag = Some(tag.to_owned());
3154                            current_value = None;
3155                        }
3156                    }
3157                    _ => {}
3158                }
3159            }
3160            Attribute { local, value, .. } => {
3161                let attr_name = local.as_str();
3162                let attr_value = value.as_str();
3163
3164                match attr_name {
3165                    "target" => {
3166                        if attr_value == "font" {
3167                            match_is_font_target = true;
3168                        }
3169                    }
3170                    "name" => {
3171                        if in_test && state == State::InMatchFont {
3172                            test_name = Some(attr_value.to_owned());
3173                        } else if state == State::InMatchFont {
3174                            current_edit_name = Some(attr_value.to_owned());
3175                        }
3176                    }
3177                    _ => {}
3178                }
3179            }
3180            Text { text, .. } => {
3181                let text = text.as_str().trim();
3182                if !text.is_empty() && (state == State::InTestFamily || state == State::InEdit) {
3183                    current_value = Some(text.to_owned());
3184                }
3185            }
3186            ElementEnd { end, .. } => {
3187                match end {
3188                    xmlparser::ElementEnd::Open => {
3189                        // Tag just opened (after attributes processed)
3190                        if match_is_font_target && state == State::Idle {
3191                            state = State::InMatchFont;
3192                            match_is_font_target = false;
3193                        } else if in_test {
3194                            if test_name.as_deref() == Some("family") {
3195                                state = State::InTestFamily;
3196                            }
3197                            in_test = false;
3198                        } else if current_edit_name.is_some() && state == State::InMatchFont {
3199                            state = State::InEdit;
3200                        }
3201                    }
3202                    xmlparser::ElementEnd::Close(_, local) => {
3203                        let tag = local.as_str();
3204                        match tag {
3205                            "match" => {
3206                                // End of match block: store config if we have a family
3207                                if let Some(family) = current_family.take() {
3208                                    let empty = FcFontRenderConfig::default();
3209                                    if config != empty {
3210                                        configs.insert(family, config.clone());
3211                                    }
3212                                }
3213                                state = State::Idle;
3214                                config = FcFontRenderConfig::default();
3215                            }
3216                            "test" => {
3217                                if state == State::InTestFamily {
3218                                    // Extract the family name from the value we collected
3219                                    if let Some(ref val) = current_value {
3220                                        current_family = Some(val.clone());
3221                                    }
3222                                    state = State::InMatchFont;
3223                                }
3224                                current_value = None;
3225                                value_tag = None;
3226                            }
3227                            "edit" => {
3228                                if state == State::InEdit {
3229                                    // Apply the collected value to the config
3230                                    if let (Some(ref name), Some(ref val)) = (&current_edit_name, &current_value) {
3231                                        apply_edit_value(&mut config, name, val, value_tag.as_deref());
3232                                    }
3233                                    state = State::InMatchFont;
3234                                }
3235                                current_edit_name = None;
3236                                current_value = None;
3237                                value_tag = None;
3238                            }
3239                            "bool" | "double" | "const" | "string" | "int" => {
3240                                // value_tag and current_value already set by Text handler
3241                            }
3242                            _ => {}
3243                        }
3244                    }
3245                    xmlparser::ElementEnd::Empty => {
3246                        // Self-closing tags: nothing to do
3247                    }
3248                }
3249            }
3250            _ => {}
3251        }
3252    }
3253}
3254
3255/// Apply a parsed edit value to the render config.
3256#[cfg(all(feature = "std", feature = "parsing", target_os = "linux"))]
3257fn apply_edit_value(
3258    config: &mut FcFontRenderConfig,
3259    edit_name: &str,
3260    value: &str,
3261    value_tag: Option<&str>,
3262) {
3263    match edit_name {
3264        "antialias" => {
3265            config.antialias = parse_bool_value(value);
3266        }
3267        "hinting" => {
3268            config.hinting = parse_bool_value(value);
3269        }
3270        "autohint" => {
3271            config.autohint = parse_bool_value(value);
3272        }
3273        "embeddedbitmap" => {
3274            config.embeddedbitmap = parse_bool_value(value);
3275        }
3276        "embolden" => {
3277            config.embolden = parse_bool_value(value);
3278        }
3279        "minspace" => {
3280            config.minspace = parse_bool_value(value);
3281        }
3282        "hintstyle" => {
3283            config.hintstyle = parse_hintstyle_const(value);
3284        }
3285        "rgba" => {
3286            config.rgba = parse_rgba_const(value);
3287        }
3288        "lcdfilter" => {
3289            config.lcdfilter = parse_lcdfilter_const(value);
3290        }
3291        "dpi" => {
3292            if let Ok(v) = value.parse::<f64>() {
3293                config.dpi = Some(v);
3294            }
3295        }
3296        "scale" => {
3297            if let Ok(v) = value.parse::<f64>() {
3298                config.scale = Some(v);
3299            }
3300        }
3301        _ => {
3302            // Unknown edit property, ignore
3303        }
3304    }
3305}
3306
3307#[cfg(all(feature = "std", feature = "parsing", target_os = "linux"))]
3308fn parse_bool_value(value: &str) -> Option<bool> {
3309    match value {
3310        "true" => Some(true),
3311        "false" => Some(false),
3312        _ => None,
3313    }
3314}
3315
3316#[cfg(all(feature = "std", feature = "parsing", target_os = "linux"))]
3317fn parse_hintstyle_const(value: &str) -> Option<FcHintStyle> {
3318    match value {
3319        "hintnone" => Some(FcHintStyle::None),
3320        "hintslight" => Some(FcHintStyle::Slight),
3321        "hintmedium" => Some(FcHintStyle::Medium),
3322        "hintfull" => Some(FcHintStyle::Full),
3323        _ => None,
3324    }
3325}
3326
3327#[cfg(all(feature = "std", feature = "parsing", target_os = "linux"))]
3328fn parse_rgba_const(value: &str) -> Option<FcRgba> {
3329    match value {
3330        "unknown" => Some(FcRgba::Unknown),
3331        "rgb" => Some(FcRgba::Rgb),
3332        "bgr" => Some(FcRgba::Bgr),
3333        "vrgb" => Some(FcRgba::Vrgb),
3334        "vbgr" => Some(FcRgba::Vbgr),
3335        "none" => Some(FcRgba::None),
3336        _ => None,
3337    }
3338}
3339
3340#[cfg(all(feature = "std", feature = "parsing", target_os = "linux"))]
3341fn parse_lcdfilter_const(value: &str) -> Option<FcLcdFilter> {
3342    match value {
3343        "lcdnone" => Some(FcLcdFilter::None),
3344        "lcddefault" => Some(FcLcdFilter::Default),
3345        "lcdlight" => Some(FcLcdFilter::Light),
3346        "lcdlegacy" => Some(FcLcdFilter::Legacy),
3347        _ => None,
3348    }
3349}
3350
3351// Unicode range bit positions to actual ranges (full table from OpenType spec).
3352// Based on: https://learn.microsoft.com/en-us/typography/opentype/spec/os2#ur
3353#[cfg(all(feature = "std", feature = "parsing"))]
3354const UNICODE_RANGE_MAPPINGS: &[(usize, u32, u32)] = &[
3355    // ulUnicodeRange1 (bits 0-31)
3356    (0, 0x0000, 0x007F), // Basic Latin
3357    (1, 0x0080, 0x00FF), // Latin-1 Supplement
3358    (2, 0x0100, 0x017F), // Latin Extended-A
3359    (3, 0x0180, 0x024F), // Latin Extended-B
3360    (4, 0x0250, 0x02AF), // IPA Extensions
3361    (5, 0x02B0, 0x02FF), // Spacing Modifier Letters
3362    (6, 0x0300, 0x036F), // Combining Diacritical Marks
3363    (7, 0x0370, 0x03FF), // Greek and Coptic
3364    (8, 0x2C80, 0x2CFF), // Coptic
3365    (9, 0x0400, 0x04FF), // Cyrillic
3366    (10, 0x0530, 0x058F), // Armenian
3367    (11, 0x0590, 0x05FF), // Hebrew
3368    (12, 0x0600, 0x06FF), // Arabic
3369    (13, 0x0700, 0x074F), // Syriac
3370    (14, 0x0780, 0x07BF), // Thaana
3371    (15, 0x0900, 0x097F), // Devanagari
3372    (16, 0x0980, 0x09FF), // Bengali
3373    (17, 0x0A00, 0x0A7F), // Gurmukhi
3374    (18, 0x0A80, 0x0AFF), // Gujarati
3375    (19, 0x0B00, 0x0B7F), // Oriya
3376    (20, 0x0B80, 0x0BFF), // Tamil
3377    (21, 0x0C00, 0x0C7F), // Telugu
3378    (22, 0x0C80, 0x0CFF), // Kannada
3379    (23, 0x0D00, 0x0D7F), // Malayalam
3380    (24, 0x0E00, 0x0E7F), // Thai
3381    (25, 0x0E80, 0x0EFF), // Lao
3382    (26, 0x10A0, 0x10FF), // Georgian
3383    (27, 0x1B00, 0x1B7F), // Balinese
3384    (28, 0x1100, 0x11FF), // Hangul Jamo
3385    (29, 0x1E00, 0x1EFF), // Latin Extended Additional
3386    (30, 0x1F00, 0x1FFF), // Greek Extended
3387    (31, 0x2000, 0x206F), // General Punctuation
3388    // ulUnicodeRange2 (bits 32-63)
3389    (32, 0x2070, 0x209F), // Superscripts And Subscripts
3390    (33, 0x20A0, 0x20CF), // Currency Symbols
3391    (34, 0x20D0, 0x20FF), // Combining Diacritical Marks For Symbols
3392    (35, 0x2100, 0x214F), // Letterlike Symbols
3393    (36, 0x2150, 0x218F), // Number Forms
3394    (37, 0x2190, 0x21FF), // Arrows
3395    (38, 0x2200, 0x22FF), // Mathematical Operators
3396    (39, 0x2300, 0x23FF), // Miscellaneous Technical
3397    (40, 0x2400, 0x243F), // Control Pictures
3398    (41, 0x2440, 0x245F), // Optical Character Recognition
3399    (42, 0x2460, 0x24FF), // Enclosed Alphanumerics
3400    (43, 0x2500, 0x257F), // Box Drawing
3401    (44, 0x2580, 0x259F), // Block Elements
3402    (45, 0x25A0, 0x25FF), // Geometric Shapes
3403    (46, 0x2600, 0x26FF), // Miscellaneous Symbols
3404    (47, 0x2700, 0x27BF), // Dingbats
3405    (48, 0x3000, 0x303F), // CJK Symbols And Punctuation
3406    (49, 0x3040, 0x309F), // Hiragana
3407    (50, 0x30A0, 0x30FF), // Katakana
3408    (51, 0x3100, 0x312F), // Bopomofo
3409    (52, 0x3130, 0x318F), // Hangul Compatibility Jamo
3410    (53, 0x3190, 0x319F), // Kanbun
3411    (54, 0x31A0, 0x31BF), // Bopomofo Extended
3412    (55, 0x31C0, 0x31EF), // CJK Strokes
3413    (56, 0x31F0, 0x31FF), // Katakana Phonetic Extensions
3414    (57, 0x3200, 0x32FF), // Enclosed CJK Letters And Months
3415    (58, 0x3300, 0x33FF), // CJK Compatibility
3416    (59, 0x4E00, 0x9FFF), // CJK Unified Ideographs
3417    (60, 0xA000, 0xA48F), // Yi Syllables
3418    (61, 0xA490, 0xA4CF), // Yi Radicals
3419    (62, 0xAC00, 0xD7AF), // Hangul Syllables
3420    (63, 0xD800, 0xDFFF), // Non-Plane 0 (note: surrogates, not directly usable)
3421    // ulUnicodeRange3 (bits 64-95)
3422    (64, 0x10000, 0x10FFFF), // Phoenician and other non-BMP (bit 64 indicates non-BMP support)
3423    (65, 0xF900, 0xFAFF), // CJK Compatibility Ideographs
3424    (66, 0xFB00, 0xFB4F), // Alphabetic Presentation Forms
3425    (67, 0xFB50, 0xFDFF), // Arabic Presentation Forms-A
3426    (68, 0xFE00, 0xFE0F), // Variation Selectors
3427    (69, 0xFE10, 0xFE1F), // Vertical Forms
3428    (70, 0xFE20, 0xFE2F), // Combining Half Marks
3429    (71, 0xFE30, 0xFE4F), // CJK Compatibility Forms
3430    (72, 0xFE50, 0xFE6F), // Small Form Variants
3431    (73, 0xFE70, 0xFEFF), // Arabic Presentation Forms-B
3432    (74, 0xFF00, 0xFFEF), // Halfwidth And Fullwidth Forms
3433    (75, 0xFFF0, 0xFFFF), // Specials
3434    (76, 0x0F00, 0x0FFF), // Tibetan
3435    (77, 0x0700, 0x074F), // Syriac
3436    (78, 0x0780, 0x07BF), // Thaana
3437    (79, 0x0D80, 0x0DFF), // Sinhala
3438    (80, 0x1000, 0x109F), // Myanmar
3439    (81, 0x1200, 0x137F), // Ethiopic
3440    (82, 0x13A0, 0x13FF), // Cherokee
3441    (83, 0x1400, 0x167F), // Unified Canadian Aboriginal Syllabics
3442    (84, 0x1680, 0x169F), // Ogham
3443    (85, 0x16A0, 0x16FF), // Runic
3444    (86, 0x1780, 0x17FF), // Khmer
3445    (87, 0x1800, 0x18AF), // Mongolian
3446    (88, 0x2800, 0x28FF), // Braille Patterns
3447    (89, 0xA000, 0xA48F), // Yi Syllables
3448    (90, 0x1680, 0x169F), // Ogham
3449    (91, 0x16A0, 0x16FF), // Runic
3450    (92, 0x1700, 0x171F), // Tagalog
3451    (93, 0x1720, 0x173F), // Hanunoo
3452    (94, 0x1740, 0x175F), // Buhid
3453    (95, 0x1760, 0x177F), // Tagbanwa
3454    // ulUnicodeRange4 (bits 96-127)
3455    (96, 0x1900, 0x194F), // Limbu
3456    (97, 0x1950, 0x197F), // Tai Le
3457    (98, 0x1980, 0x19DF), // New Tai Lue
3458    (99, 0x1A00, 0x1A1F), // Buginese
3459    (100, 0x2C00, 0x2C5F), // Glagolitic
3460    (101, 0x2D30, 0x2D7F), // Tifinagh
3461    (102, 0x4DC0, 0x4DFF), // Yijing Hexagram Symbols
3462    (103, 0xA800, 0xA82F), // Syloti Nagri
3463    (104, 0x10000, 0x1007F), // Linear B Syllabary
3464    (105, 0x10080, 0x100FF), // Linear B Ideograms
3465    (106, 0x10100, 0x1013F), // Aegean Numbers
3466    (107, 0x10140, 0x1018F), // Ancient Greek Numbers
3467    (108, 0x10300, 0x1032F), // Old Italic
3468    (109, 0x10330, 0x1034F), // Gothic
3469    (110, 0x10380, 0x1039F), // Ugaritic
3470    (111, 0x103A0, 0x103DF), // Old Persian
3471    (112, 0x10400, 0x1044F), // Deseret
3472    (113, 0x10450, 0x1047F), // Shavian
3473    (114, 0x10480, 0x104AF), // Osmanya
3474    (115, 0x10800, 0x1083F), // Cypriot Syllabary
3475    (116, 0x10A00, 0x10A5F), // Kharoshthi
3476    (117, 0x1D000, 0x1D0FF), // Byzantine Musical Symbols
3477    (118, 0x1D100, 0x1D1FF), // Musical Symbols
3478    (119, 0x1D200, 0x1D24F), // Ancient Greek Musical Notation
3479    (120, 0x1D300, 0x1D35F), // Tai Xuan Jing Symbols
3480    (121, 0x1D400, 0x1D7FF), // Mathematical Alphanumeric Symbols
3481    (122, 0x1F000, 0x1F02F), // Mahjong Tiles
3482    (123, 0x1F030, 0x1F09F), // Domino Tiles
3483    (124, 0x1F300, 0x1F9FF), // Miscellaneous Symbols And Pictographs (Emoji)
3484    (125, 0x1F680, 0x1F6FF), // Transport And Map Symbols
3485    (126, 0x1F700, 0x1F77F), // Alchemical Symbols
3486    (127, 0x1F900, 0x1F9FF), // Supplemental Symbols and Pictographs
3487];
3488
3489/// Intermediate parsed data from a single font face within a font file.
3490/// Used to share parsing logic between `FcParseFont` and `FcParseFontBytesInner`.
3491#[cfg(all(feature = "std", feature = "parsing"))]
3492struct ParsedFontFace {
3493    pattern: FcPattern,
3494    font_index: usize,
3495}
3496
3497/// Parse all font table data from a single font face and return the extracted patterns.
3498///
3499/// This is the shared core of `FcParseFont` and `FcParseFontBytesInner`:
3500/// TTC detection, font table parsing, OS/2/head/post reading, unicode range extraction,
3501/// CMAP verification, monospace detection, metadata extraction, and pattern creation.
3502#[cfg(all(feature = "std", feature = "parsing"))]
3503fn parse_font_faces(font_bytes: &[u8]) -> Option<Vec<ParsedFontFace>> {
3504    use allsorts::{
3505        binary::read::ReadScope,
3506        font_data::FontData,
3507        get_name::fontcode_get_name,
3508        post::PostTable,
3509        tables::{
3510            os2::Os2, HeadTable, NameTable,
3511        },
3512        tag,
3513    };
3514    use std::collections::BTreeSet;
3515
3516    const FONT_SPECIFIER_NAME_ID: u16 = 4;
3517    const FONT_SPECIFIER_FAMILY_ID: u16 = 1;
3518
3519    let max_fonts = if font_bytes.len() >= 12 && &font_bytes[0..4] == b"ttcf" {
3520        // Read numFonts from TTC header (offset 8, 4 bytes)
3521        let num_fonts =
3522            u32::from_be_bytes([font_bytes[8], font_bytes[9], font_bytes[10], font_bytes[11]]);
3523        // Cap at a reasonable maximum as a safety measure
3524        std::cmp::min(num_fonts as usize, 100)
3525    } else {
3526        // Not a collection, just one font
3527        1
3528    };
3529
3530    let scope = ReadScope::new(font_bytes);
3531    let font_file = scope.read::<FontData<'_>>().ok()?;
3532
3533    // Handle collections properly by iterating through all fonts
3534    let mut results = Vec::new();
3535
3536    for font_index in 0..max_fonts {
3537        let provider = font_file.table_provider(font_index).ok()?;
3538        let head_data = provider.table_data(tag::HEAD).ok()??.into_owned();
3539        let head_table = ReadScope::new(&head_data).read::<HeadTable>().ok()?;
3540
3541        let is_bold = head_table.is_bold();
3542        let is_italic = head_table.is_italic();
3543        let mut detected_monospace = None;
3544
3545        let post_data = provider.table_data(tag::POST).ok()??;
3546        if let Ok(post_table) = ReadScope::new(&post_data).read::<PostTable>() {
3547            // isFixedPitch here - https://learn.microsoft.com/en-us/typography/opentype/spec/post#header
3548            detected_monospace = Some(post_table.header.is_fixed_pitch != 0);
3549        }
3550
3551        // Get font properties from OS/2 table
3552        let os2_data = provider.table_data(tag::OS_2).ok()??;
3553        let os2_table = ReadScope::new(&os2_data)
3554            .read_dep::<Os2>(os2_data.len())
3555            .ok()?;
3556
3557        // Extract additional style information
3558        let is_oblique = os2_table
3559            .fs_selection
3560            .contains(allsorts::tables::os2::FsSelection::OBLIQUE);
3561        let weight = FcWeight::from_u16(os2_table.us_weight_class);
3562        let stretch = FcStretch::from_u16(os2_table.us_width_class);
3563
3564        // Extract unicode ranges from OS/2 table (fast, but may be inaccurate)
3565        // These are hints about what the font *should* support
3566        // For actual glyph coverage verification, query the font file directly
3567        let mut unicode_ranges = Vec::new();
3568
3569        // Process the 4 Unicode range bitfields from OS/2 table
3570        let os2_ranges = [
3571            os2_table.ul_unicode_range1,
3572            os2_table.ul_unicode_range2,
3573            os2_table.ul_unicode_range3,
3574            os2_table.ul_unicode_range4,
3575        ];
3576
3577        for &(bit, start, end) in UNICODE_RANGE_MAPPINGS {
3578            let range_idx = bit / 32;
3579            let bit_pos = bit % 32;
3580            if range_idx < 4 && (os2_ranges[range_idx] & (1 << bit_pos)) != 0 {
3581                unicode_ranges.push(UnicodeRange { start, end });
3582            }
3583        }
3584
3585        // Verify OS/2 reported ranges against actual CMAP support
3586        // OS/2 ulUnicodeRange bits can be unreliable - fonts may claim support
3587        // for ranges they don't actually have glyphs for
3588        unicode_ranges = verify_unicode_ranges_with_cmap(&provider, unicode_ranges);
3589
3590        // If still empty (OS/2 had no ranges or all were invalid), do full CMAP analysis
3591        if unicode_ranges.is_empty() {
3592            if let Some(cmap_ranges) = analyze_cmap_coverage(&provider) {
3593                unicode_ranges = cmap_ranges;
3594            }
3595        }
3596
3597        // Use the shared detect_monospace helper for PANOSE + hmtx fallback
3598        let is_monospace = detect_monospace(&provider, &os2_table, detected_monospace)
3599            .unwrap_or(false);
3600
3601        let name_data = provider.table_data(tag::NAME).ok()??.into_owned();
3602        let name_table = ReadScope::new(&name_data).read::<NameTable>().ok()?;
3603
3604        // Extract metadata from name table
3605        let mut metadata = FcFontMetadata::default();
3606
3607        const NAME_ID_COPYRIGHT: u16 = 0;
3608        const NAME_ID_FAMILY: u16 = 1;
3609        const NAME_ID_SUBFAMILY: u16 = 2;
3610        const NAME_ID_UNIQUE_ID: u16 = 3;
3611        const NAME_ID_FULL_NAME: u16 = 4;
3612        const NAME_ID_VERSION: u16 = 5;
3613        const NAME_ID_POSTSCRIPT_NAME: u16 = 6;
3614        const NAME_ID_TRADEMARK: u16 = 7;
3615        const NAME_ID_MANUFACTURER: u16 = 8;
3616        const NAME_ID_DESIGNER: u16 = 9;
3617        const NAME_ID_DESCRIPTION: u16 = 10;
3618        const NAME_ID_VENDOR_URL: u16 = 11;
3619        const NAME_ID_DESIGNER_URL: u16 = 12;
3620        const NAME_ID_LICENSE: u16 = 13;
3621        const NAME_ID_LICENSE_URL: u16 = 14;
3622        const NAME_ID_PREFERRED_FAMILY: u16 = 16;
3623        const NAME_ID_PREFERRED_SUBFAMILY: u16 = 17;
3624
3625        metadata.copyright = get_name_string(&name_data, NAME_ID_COPYRIGHT);
3626        metadata.font_family = get_name_string(&name_data, NAME_ID_FAMILY);
3627        metadata.font_subfamily = get_name_string(&name_data, NAME_ID_SUBFAMILY);
3628        metadata.full_name = get_name_string(&name_data, NAME_ID_FULL_NAME);
3629        metadata.unique_id = get_name_string(&name_data, NAME_ID_UNIQUE_ID);
3630        metadata.version = get_name_string(&name_data, NAME_ID_VERSION);
3631        metadata.postscript_name = get_name_string(&name_data, NAME_ID_POSTSCRIPT_NAME);
3632        metadata.trademark = get_name_string(&name_data, NAME_ID_TRADEMARK);
3633        metadata.manufacturer = get_name_string(&name_data, NAME_ID_MANUFACTURER);
3634        metadata.designer = get_name_string(&name_data, NAME_ID_DESIGNER);
3635        metadata.id_description = get_name_string(&name_data, NAME_ID_DESCRIPTION);
3636        metadata.designer_url = get_name_string(&name_data, NAME_ID_DESIGNER_URL);
3637        metadata.manufacturer_url = get_name_string(&name_data, NAME_ID_VENDOR_URL);
3638        metadata.license = get_name_string(&name_data, NAME_ID_LICENSE);
3639        metadata.license_url = get_name_string(&name_data, NAME_ID_LICENSE_URL);
3640        metadata.preferred_family = get_name_string(&name_data, NAME_ID_PREFERRED_FAMILY);
3641        metadata.preferred_subfamily = get_name_string(&name_data, NAME_ID_PREFERRED_SUBFAMILY);
3642
3643        // One font can support multiple patterns
3644        let mut f_family = None;
3645
3646        let patterns = name_table
3647            .name_records
3648            .iter()
3649            .filter_map(|name_record| {
3650                let name_id = name_record.name_id;
3651                if name_id == FONT_SPECIFIER_FAMILY_ID {
3652                    if let Ok(Some(family)) =
3653                        fontcode_get_name(&name_data, FONT_SPECIFIER_FAMILY_ID)
3654                    {
3655                        f_family = Some(family);
3656                    }
3657                    None
3658                } else if name_id == FONT_SPECIFIER_NAME_ID {
3659                    let family = f_family.as_ref()?;
3660                    let name = fontcode_get_name(&name_data, FONT_SPECIFIER_NAME_ID).ok()??;
3661                    if name.to_bytes().is_empty() {
3662                        None
3663                    } else {
3664                        let mut name_str =
3665                            String::from_utf8_lossy(name.to_bytes()).to_string();
3666                        let mut family_str =
3667                            String::from_utf8_lossy(family.as_bytes()).to_string();
3668                        if name_str.starts_with('.') {
3669                            name_str = name_str[1..].to_string();
3670                        }
3671                        if family_str.starts_with('.') {
3672                            family_str = family_str[1..].to_string();
3673                        }
3674                        Some((
3675                            FcPattern {
3676                                name: Some(name_str),
3677                                family: Some(family_str),
3678                                bold: if is_bold {
3679                                    PatternMatch::True
3680                                } else {
3681                                    PatternMatch::False
3682                                },
3683                                italic: if is_italic {
3684                                    PatternMatch::True
3685                                } else {
3686                                    PatternMatch::False
3687                                },
3688                                oblique: if is_oblique {
3689                                    PatternMatch::True
3690                                } else {
3691                                    PatternMatch::False
3692                                },
3693                                monospace: if is_monospace {
3694                                    PatternMatch::True
3695                                } else {
3696                                    PatternMatch::False
3697                                },
3698                                condensed: if stretch <= FcStretch::Condensed {
3699                                    PatternMatch::True
3700                                } else {
3701                                    PatternMatch::False
3702                                },
3703                                weight,
3704                                stretch,
3705                                unicode_ranges: unicode_ranges.clone(),
3706                                metadata: metadata.clone(),
3707                                render_config: FcFontRenderConfig::default(),
3708                            },
3709                            font_index,
3710                        ))
3711                    }
3712                } else {
3713                    None
3714                }
3715            })
3716            .collect::<BTreeSet<_>>();
3717
3718        results.extend(patterns.into_iter().map(|(pat, idx)| ParsedFontFace {
3719            pattern: pat,
3720            font_index: idx,
3721        }));
3722    }
3723
3724    if results.is_empty() {
3725        None
3726    } else {
3727        Some(results)
3728    }
3729}
3730
3731// Remaining implementation for font scanning, parsing, etc.
3732#[cfg(all(feature = "std", feature = "parsing"))]
3733pub(crate) fn FcParseFont(filepath: &PathBuf) -> Option<Vec<(FcPattern, FcFontPath)>> {
3734    #[cfg(all(not(target_family = "wasm"), feature = "std"))]
3735    use mmapio::MmapOptions;
3736    use std::fs::File;
3737
3738    // Try parsing the font file and see if the postscript name matches
3739    let file = File::open(filepath).ok()?;
3740
3741    #[cfg(all(not(target_family = "wasm"), feature = "std"))]
3742    let font_bytes = unsafe { MmapOptions::new().map(&file).ok()? };
3743
3744    #[cfg(not(all(not(target_family = "wasm"), feature = "std")))]
3745    let font_bytes = std::fs::read(filepath).ok()?;
3746
3747    let faces = parse_font_faces(&font_bytes[..])?;
3748    let path_str = filepath.to_string_lossy().to_string();
3749    // Hash once per file — every face of a .ttc shares this value,
3750    // so the shared-bytes cache can return the same Arc<[u8]> for
3751    // all of them. Use the cheap sampled variant so the scout doesn't
3752    // page-fault the full file into RSS just to produce a dedup key.
3753    let bytes_hash = crate::utils::content_dedup_hash_u64(&font_bytes[..]);
3754
3755    Some(
3756        faces
3757            .into_iter()
3758            .map(|face| {
3759                (
3760                    face.pattern,
3761                    FcFontPath {
3762                        path: path_str.clone(),
3763                        font_index: face.font_index,
3764                        bytes_hash,
3765                    },
3766                )
3767            })
3768            .collect(),
3769    )
3770}
3771
3772/// Parse font bytes and extract font patterns for in-memory fonts.
3773///
3774/// This is the public API for parsing in-memory font data to create
3775/// `(FcPattern, FcFont)` tuples that can be added to an `FcFontCache`
3776/// via `with_memory_fonts()`.
3777///
3778/// # Arguments
3779/// * `font_bytes` - The raw bytes of a TrueType/OpenType font file
3780/// * `font_id` - An identifier string for this font (used internally)
3781///
3782/// # Returns
3783/// A vector of `(FcPattern, FcFont)` tuples, one for each font face in the file.
3784/// Returns `None` if the font could not be parsed.
3785///
3786/// # Example
3787/// ```ignore
3788/// use rust_fontconfig::{FcFontCache, FcParseFontBytes};
3789///
3790/// let font_bytes = include_bytes!("path/to/font.ttf");
3791/// let mut cache = FcFontCache::default();
3792///
3793/// if let Some(fonts) = FcParseFontBytes(font_bytes, "MyFont") {
3794///     cache.with_memory_fonts(fonts);
3795/// }
3796/// ```
3797#[cfg(all(feature = "std", feature = "parsing"))]
3798#[allow(non_snake_case)]
3799pub fn FcParseFontBytes(font_bytes: &[u8], font_id: &str) -> Option<Vec<(FcPattern, FcFont)>> {
3800    FcParseFontBytesInner(font_bytes, font_id)
3801}
3802
3803/// Internal implementation for parsing font bytes.
3804/// Delegates to `parse_font_faces` for shared parsing logic and wraps results as `FcFont`.
3805#[cfg(all(feature = "std", feature = "parsing"))]
3806fn FcParseFontBytesInner(font_bytes: &[u8], font_id: &str) -> Option<Vec<(FcPattern, FcFont)>> {
3807    let faces = parse_font_faces(font_bytes)?;
3808    let id = font_id.to_string();
3809    let bytes = font_bytes.to_vec();
3810
3811    Some(
3812        faces
3813            .into_iter()
3814            .map(|face| {
3815                (
3816                    face.pattern,
3817                    FcFont {
3818                        bytes: bytes.clone(),
3819                        font_index: face.font_index,
3820                        id: id.clone(),
3821                    },
3822                )
3823            })
3824            .collect(),
3825    )
3826}
3827
3828#[cfg(all(feature = "std", feature = "parsing"))]
3829fn FcScanDirectoriesInner(paths: &[(Option<String>, String)]) -> Vec<(FcPattern, FcFontPath)> {
3830    #[cfg(feature = "multithreading")]
3831    {
3832        use rayon::prelude::*;
3833
3834        // scan directories in parallel
3835        paths
3836            .par_iter()
3837            .filter_map(|(prefix, p)| {
3838                process_path(prefix, PathBuf::from(p), false).map(FcScanSingleDirectoryRecursive)
3839            })
3840            .flatten()
3841            .collect()
3842    }
3843    #[cfg(not(feature = "multithreading"))]
3844    {
3845        paths
3846            .iter()
3847            .filter_map(|(prefix, p)| {
3848                process_path(prefix, PathBuf::from(p), false).map(FcScanSingleDirectoryRecursive)
3849            })
3850            .flatten()
3851            .collect()
3852    }
3853}
3854
3855/// Recursively collect all files from a directory (no parsing, no allsorts).
3856#[cfg(feature = "std")]
3857fn FcCollectFontFilesRecursive(dir: PathBuf) -> Vec<PathBuf> {
3858    let mut files = Vec::new();
3859    let mut dirs_to_parse = vec![dir];
3860
3861    loop {
3862        let mut new_dirs = Vec::new();
3863        for dir in &dirs_to_parse {
3864            let entries = match std::fs::read_dir(dir) {
3865                Ok(o) => o,
3866                Err(_) => continue,
3867            };
3868            for entry in entries.flatten() {
3869                let path = entry.path();
3870                if path.is_dir() {
3871                    new_dirs.push(path);
3872                } else {
3873                    files.push(path);
3874                }
3875            }
3876        }
3877        if new_dirs.is_empty() {
3878            break;
3879        }
3880        dirs_to_parse = new_dirs;
3881    }
3882
3883    files
3884}
3885
3886#[cfg(all(feature = "std", feature = "parsing"))]
3887fn FcScanSingleDirectoryRecursive(dir: PathBuf) -> Vec<(FcPattern, FcFontPath)> {
3888    let files = FcCollectFontFilesRecursive(dir);
3889    FcParseFontFiles(&files)
3890}
3891
3892#[cfg(all(feature = "std", feature = "parsing"))]
3893fn FcParseFontFiles(files_to_parse: &[PathBuf]) -> Vec<(FcPattern, FcFontPath)> {
3894    let result = {
3895        #[cfg(feature = "multithreading")]
3896        {
3897            use rayon::prelude::*;
3898
3899            files_to_parse
3900                .par_iter()
3901                .filter_map(|file| FcParseFont(file))
3902                .collect::<Vec<Vec<_>>>()
3903        }
3904        #[cfg(not(feature = "multithreading"))]
3905        {
3906            files_to_parse
3907                .iter()
3908                .filter_map(|file| FcParseFont(file))
3909                .collect::<Vec<Vec<_>>>()
3910        }
3911    };
3912
3913    result.into_iter().flat_map(|f| f.into_iter()).collect()
3914}
3915
3916#[cfg(all(feature = "std", feature = "parsing"))]
3917/// Takes a path & prefix and resolves them to a usable path, or `None` if they're unsupported/unavailable.
3918///
3919/// Behaviour is based on: https://www.freedesktop.org/software/fontconfig/fontconfig-user.html
3920fn process_path(
3921    prefix: &Option<String>,
3922    mut path: PathBuf,
3923    is_include_path: bool,
3924) -> Option<PathBuf> {
3925    use std::env::var;
3926
3927    const HOME_SHORTCUT: &str = "~";
3928    const CWD_PATH: &str = ".";
3929
3930    const HOME_ENV_VAR: &str = "HOME";
3931    const XDG_CONFIG_HOME_ENV_VAR: &str = "XDG_CONFIG_HOME";
3932    const XDG_CONFIG_HOME_DEFAULT_PATH_SUFFIX: &str = ".config";
3933    const XDG_DATA_HOME_ENV_VAR: &str = "XDG_DATA_HOME";
3934    const XDG_DATA_HOME_DEFAULT_PATH_SUFFIX: &str = ".local/share";
3935
3936    const PREFIX_CWD: &str = "cwd";
3937    const PREFIX_DEFAULT: &str = "default";
3938    const PREFIX_XDG: &str = "xdg";
3939
3940    // These three could, in theory, be cached, but the work required to do so outweighs the minor benefits
3941    fn get_home_value() -> Option<PathBuf> {
3942        var(HOME_ENV_VAR).ok().map(PathBuf::from)
3943    }
3944    fn get_xdg_config_home_value() -> Option<PathBuf> {
3945        var(XDG_CONFIG_HOME_ENV_VAR)
3946            .ok()
3947            .map(PathBuf::from)
3948            .or_else(|| {
3949                get_home_value()
3950                    .map(|home_path| home_path.join(XDG_CONFIG_HOME_DEFAULT_PATH_SUFFIX))
3951            })
3952    }
3953    fn get_xdg_data_home_value() -> Option<PathBuf> {
3954        var(XDG_DATA_HOME_ENV_VAR)
3955            .ok()
3956            .map(PathBuf::from)
3957            .or_else(|| {
3958                get_home_value().map(|home_path| home_path.join(XDG_DATA_HOME_DEFAULT_PATH_SUFFIX))
3959            })
3960    }
3961
3962    // Resolve the tilde character in the path, if present
3963    if path.starts_with(HOME_SHORTCUT) {
3964        if let Some(home_path) = get_home_value() {
3965            path = home_path.join(
3966                path.strip_prefix(HOME_SHORTCUT)
3967                    .expect("already checked that it starts with the prefix"),
3968            );
3969        } else {
3970            return None;
3971        }
3972    }
3973
3974    // Resolve prefix values
3975    match prefix {
3976        Some(prefix) => match prefix.as_str() {
3977            PREFIX_CWD | PREFIX_DEFAULT => {
3978                let mut new_path = PathBuf::from(CWD_PATH);
3979                new_path.push(path);
3980
3981                Some(new_path)
3982            }
3983            PREFIX_XDG => {
3984                if is_include_path {
3985                    get_xdg_config_home_value()
3986                        .map(|xdg_config_home_path| xdg_config_home_path.join(path))
3987                } else {
3988                    get_xdg_data_home_value()
3989                        .map(|xdg_data_home_path| xdg_data_home_path.join(path))
3990                }
3991            }
3992            _ => None, // Unsupported prefix
3993        },
3994        None => Some(path),
3995    }
3996}
3997
3998// Helper function to extract a string from the name table
3999#[cfg(all(feature = "std", feature = "parsing"))]
4000fn get_name_string(name_data: &[u8], name_id: u16) -> Option<String> {
4001    fontcode_get_name(name_data, name_id)
4002        .ok()
4003        .flatten()
4004        .map(|name| String::from_utf8_lossy(name.to_bytes()).to_string())
4005}
4006
4007/// Representative test codepoints for each Unicode block.
4008/// These are carefully chosen to be actual script characters (not punctuation/symbols)
4009/// that a font claiming to support this script should definitely have.
4010#[cfg(all(feature = "std", feature = "parsing"))]
4011fn get_verification_codepoints(start: u32, end: u32) -> Vec<u32> {
4012    match start {
4013        // Basic Latin - test uppercase, lowercase, and digits
4014        0x0000 => vec!['A' as u32, 'M' as u32, 'Z' as u32, 'a' as u32, 'm' as u32, 'z' as u32],
4015        // Latin-1 Supplement - common accented letters
4016        0x0080 => vec![0x00C0, 0x00C9, 0x00D1, 0x00E0, 0x00E9, 0x00F1], // À É Ñ à é ñ
4017        // Latin Extended-A
4018        0x0100 => vec![0x0100, 0x0110, 0x0141, 0x0152, 0x0160], // Ā Đ Ł Œ Š
4019        // Latin Extended-B
4020        0x0180 => vec![0x0180, 0x01A0, 0x01B0, 0x01CD], // ƀ Ơ ư Ǎ
4021        // IPA Extensions
4022        0x0250 => vec![0x0250, 0x0259, 0x026A, 0x0279], // ɐ ə ɪ ɹ
4023        // Greek and Coptic
4024        0x0370 => vec![0x0391, 0x0392, 0x0393, 0x03B1, 0x03B2, 0x03C9], // Α Β Γ α β ω
4025        // Cyrillic
4026        0x0400 => vec![0x0410, 0x0411, 0x0412, 0x0430, 0x0431, 0x042F], // А Б В а б Я
4027        // Armenian
4028        0x0530 => vec![0x0531, 0x0532, 0x0533, 0x0561, 0x0562], // Ա Բ Գ ա բ
4029        // Hebrew
4030        0x0590 => vec![0x05D0, 0x05D1, 0x05D2, 0x05E9, 0x05EA], // א ב ג ש ת
4031        // Arabic
4032        0x0600 => vec![0x0627, 0x0628, 0x062A, 0x062C, 0x0645], // ا ب ت ج م
4033        // Syriac
4034        0x0700 => vec![0x0710, 0x0712, 0x0713, 0x0715], // ܐ ܒ ܓ ܕ
4035        // Devanagari
4036        0x0900 => vec![0x0905, 0x0906, 0x0915, 0x0916, 0x0939], // अ आ क ख ह
4037        // Bengali
4038        0x0980 => vec![0x0985, 0x0986, 0x0995, 0x0996], // অ আ ক খ
4039        // Gurmukhi
4040        0x0A00 => vec![0x0A05, 0x0A06, 0x0A15, 0x0A16], // ਅ ਆ ਕ ਖ
4041        // Gujarati
4042        0x0A80 => vec![0x0A85, 0x0A86, 0x0A95, 0x0A96], // અ આ ક ખ
4043        // Oriya
4044        0x0B00 => vec![0x0B05, 0x0B06, 0x0B15, 0x0B16], // ଅ ଆ କ ଖ
4045        // Tamil
4046        0x0B80 => vec![0x0B85, 0x0B86, 0x0B95, 0x0BA4], // அ ஆ க த
4047        // Telugu
4048        0x0C00 => vec![0x0C05, 0x0C06, 0x0C15, 0x0C16], // అ ఆ క ఖ
4049        // Kannada
4050        0x0C80 => vec![0x0C85, 0x0C86, 0x0C95, 0x0C96], // ಅ ಆ ಕ ಖ
4051        // Malayalam
4052        0x0D00 => vec![0x0D05, 0x0D06, 0x0D15, 0x0D16], // അ ആ ക ഖ
4053        // Thai
4054        0x0E00 => vec![0x0E01, 0x0E02, 0x0E04, 0x0E07, 0x0E40], // ก ข ค ง เ
4055        // Lao
4056        0x0E80 => vec![0x0E81, 0x0E82, 0x0E84, 0x0E87], // ກ ຂ ຄ ງ
4057        // Myanmar
4058        0x1000 => vec![0x1000, 0x1001, 0x1002, 0x1010, 0x1019], // က ခ ဂ တ မ
4059        // Georgian
4060        0x10A0 => vec![0x10D0, 0x10D1, 0x10D2, 0x10D3], // ა ბ გ დ
4061        // Hangul Jamo
4062        0x1100 => vec![0x1100, 0x1102, 0x1103, 0x1161, 0x1162], // ᄀ ᄂ ᄃ ᅡ ᅢ
4063        // Ethiopic
4064        0x1200 => vec![0x1200, 0x1208, 0x1210, 0x1218], // ሀ ለ ሐ መ
4065        // Cherokee
4066        0x13A0 => vec![0x13A0, 0x13A1, 0x13A2, 0x13A3], // Ꭰ Ꭱ Ꭲ Ꭳ
4067        // Khmer
4068        0x1780 => vec![0x1780, 0x1781, 0x1782, 0x1783], // ក ខ គ ឃ
4069        // Mongolian
4070        0x1800 => vec![0x1820, 0x1821, 0x1822, 0x1823], // ᠠ ᠡ ᠢ ᠣ
4071        // Hiragana
4072        0x3040 => vec![0x3042, 0x3044, 0x3046, 0x304B, 0x304D, 0x3093], // あ い う か き ん
4073        // Katakana
4074        0x30A0 => vec![0x30A2, 0x30A4, 0x30A6, 0x30AB, 0x30AD, 0x30F3], // ア イ ウ カ キ ン
4075        // Bopomofo
4076        0x3100 => vec![0x3105, 0x3106, 0x3107, 0x3108], // ㄅ ㄆ ㄇ ㄈ
4077        // CJK Unified Ideographs - common characters
4078        0x4E00 => vec![0x4E00, 0x4E2D, 0x4EBA, 0x5927, 0x65E5, 0x6708], // 一 中 人 大 日 月
4079        // Hangul Syllables
4080        0xAC00 => vec![0xAC00, 0xAC01, 0xAC04, 0xB098, 0xB2E4], // 가 각 간 나 다
4081        // CJK Compatibility Ideographs
4082        0xF900 => vec![0xF900, 0xF901, 0xF902], // 豈 更 車
4083        // Arabic Presentation Forms-A
4084        0xFB50 => vec![0xFB50, 0xFB51, 0xFB52, 0xFB56], // ﭐ ﭑ ﭒ ﭖ
4085        // Arabic Presentation Forms-B
4086        0xFE70 => vec![0xFE70, 0xFE72, 0xFE74, 0xFE76], // ﹰ ﹲ ﹴ ﹶ
4087        // Halfwidth and Fullwidth Forms
4088        0xFF00 => vec![0xFF01, 0xFF21, 0xFF41, 0xFF61], // ! A a 。
4089        // Default: sample at regular intervals
4090        _ => {
4091            let range_size = end - start;
4092            if range_size > 20 {
4093                vec![
4094                    start + range_size / 5,
4095                    start + 2 * range_size / 5,
4096                    start + 3 * range_size / 5,
4097                    start + 4 * range_size / 5,
4098                ]
4099            } else {
4100                vec![start, start + range_size / 2]
4101            }
4102        }
4103    }
4104}
4105
4106/// Find the best Unicode CMAP subtable from a font provider.
4107/// Tries multiple platform/encoding combinations in priority order.
4108#[cfg(all(feature = "std", feature = "parsing"))]
4109fn find_best_cmap_subtable<'a>(
4110    cmap: &allsorts::tables::cmap::Cmap<'a>,
4111) -> Option<allsorts::tables::cmap::EncodingRecord> {
4112    use allsorts::tables::cmap::{PlatformId, EncodingId};
4113
4114    cmap.find_subtable(PlatformId::UNICODE, EncodingId(3))
4115        .or_else(|| cmap.find_subtable(PlatformId::UNICODE, EncodingId(4)))
4116        .or_else(|| cmap.find_subtable(PlatformId::WINDOWS, EncodingId(1)))
4117        .or_else(|| cmap.find_subtable(PlatformId::WINDOWS, EncodingId(10)))
4118        .or_else(|| cmap.find_subtable(PlatformId::UNICODE, EncodingId(0)))
4119        .or_else(|| cmap.find_subtable(PlatformId::UNICODE, EncodingId(1)))
4120}
4121
4122/// Verify OS/2 reported Unicode ranges against actual CMAP support.
4123/// Returns only ranges that are actually supported by the font's CMAP table.
4124#[cfg(all(feature = "std", feature = "parsing"))]
4125fn verify_unicode_ranges_with_cmap(
4126    provider: &impl FontTableProvider,
4127    os2_ranges: Vec<UnicodeRange>
4128) -> Vec<UnicodeRange> {
4129    use allsorts::tables::cmap::{Cmap, CmapSubtable};
4130
4131    if os2_ranges.is_empty() {
4132        return Vec::new();
4133    }
4134
4135    // Try to get CMAP subtable
4136    let cmap_data = match provider.table_data(tag::CMAP) {
4137        Ok(Some(data)) => data,
4138        _ => return os2_ranges, // Can't verify, trust OS/2
4139    };
4140
4141    let cmap = match ReadScope::new(&cmap_data).read::<Cmap<'_>>() {
4142        Ok(c) => c,
4143        Err(_) => return os2_ranges,
4144    };
4145
4146    let encoding_record = match find_best_cmap_subtable(&cmap) {
4147        Some(r) => r,
4148        None => return os2_ranges, // No suitable subtable, trust OS/2
4149    };
4150
4151    let cmap_subtable = match ReadScope::new(&cmap_data)
4152        .offset(encoding_record.offset as usize)
4153        .read::<CmapSubtable<'_>>()
4154    {
4155        Ok(st) => st,
4156        Err(_) => return os2_ranges,
4157    };
4158
4159    // Verify each range
4160    let mut verified_ranges = Vec::new();
4161
4162    for range in os2_ranges {
4163        let test_codepoints = get_verification_codepoints(range.start, range.end);
4164
4165        // Require at least 50% of test codepoints to have valid glyphs
4166        // This is stricter than before to avoid false positives
4167        let required_hits = (test_codepoints.len() + 1) / 2; // ceil(len/2)
4168        let mut hits = 0;
4169
4170        for cp in test_codepoints {
4171            if cp >= range.start && cp <= range.end {
4172                if let Ok(Some(gid)) = cmap_subtable.map_glyph(cp) {
4173                    if gid != 0 {
4174                        hits += 1;
4175                        if hits >= required_hits {
4176                            break;
4177                        }
4178                    }
4179                }
4180            }
4181        }
4182
4183        if hits >= required_hits {
4184            verified_ranges.push(range);
4185        }
4186    }
4187
4188    verified_ranges
4189}
4190
4191/// Analyze CMAP table to discover font coverage when OS/2 provides no info.
4192/// This is the fallback when OS/2 ulUnicodeRange bits are all zero.
4193#[cfg(all(feature = "std", feature = "parsing"))]
4194fn analyze_cmap_coverage(provider: &impl FontTableProvider) -> Option<Vec<UnicodeRange>> {
4195    use allsorts::tables::cmap::{Cmap, CmapSubtable};
4196
4197    let cmap_data = provider.table_data(tag::CMAP).ok()??;
4198    let cmap = ReadScope::new(&cmap_data).read::<Cmap<'_>>().ok()?;
4199
4200    let encoding_record = find_best_cmap_subtable(&cmap)?;
4201
4202    let cmap_subtable = ReadScope::new(&cmap_data)
4203        .offset(encoding_record.offset as usize)
4204        .read::<CmapSubtable<'_>>()
4205        .ok()?;
4206
4207    // Standard Unicode blocks to probe
4208    let blocks_to_check: &[(u32, u32)] = &[
4209        (0x0000, 0x007F), // Basic Latin
4210        (0x0080, 0x00FF), // Latin-1 Supplement
4211        (0x0100, 0x017F), // Latin Extended-A
4212        (0x0180, 0x024F), // Latin Extended-B
4213        (0x0250, 0x02AF), // IPA Extensions
4214        (0x0300, 0x036F), // Combining Diacritical Marks
4215        (0x0370, 0x03FF), // Greek and Coptic
4216        (0x0400, 0x04FF), // Cyrillic
4217        (0x0500, 0x052F), // Cyrillic Supplement
4218        (0x0530, 0x058F), // Armenian
4219        (0x0590, 0x05FF), // Hebrew
4220        (0x0600, 0x06FF), // Arabic
4221        (0x0700, 0x074F), // Syriac
4222        (0x0900, 0x097F), // Devanagari
4223        (0x0980, 0x09FF), // Bengali
4224        (0x0A00, 0x0A7F), // Gurmukhi
4225        (0x0A80, 0x0AFF), // Gujarati
4226        (0x0B00, 0x0B7F), // Oriya
4227        (0x0B80, 0x0BFF), // Tamil
4228        (0x0C00, 0x0C7F), // Telugu
4229        (0x0C80, 0x0CFF), // Kannada
4230        (0x0D00, 0x0D7F), // Malayalam
4231        (0x0E00, 0x0E7F), // Thai
4232        (0x0E80, 0x0EFF), // Lao
4233        (0x1000, 0x109F), // Myanmar
4234        (0x10A0, 0x10FF), // Georgian
4235        (0x1100, 0x11FF), // Hangul Jamo
4236        (0x1200, 0x137F), // Ethiopic
4237        (0x13A0, 0x13FF), // Cherokee
4238        (0x1780, 0x17FF), // Khmer
4239        (0x1800, 0x18AF), // Mongolian
4240        (0x2000, 0x206F), // General Punctuation
4241        (0x20A0, 0x20CF), // Currency Symbols
4242        (0x2100, 0x214F), // Letterlike Symbols
4243        (0x2190, 0x21FF), // Arrows
4244        (0x2200, 0x22FF), // Mathematical Operators
4245        (0x2500, 0x257F), // Box Drawing
4246        (0x25A0, 0x25FF), // Geometric Shapes
4247        (0x2600, 0x26FF), // Miscellaneous Symbols
4248        (0x3000, 0x303F), // CJK Symbols and Punctuation
4249        (0x3040, 0x309F), // Hiragana
4250        (0x30A0, 0x30FF), // Katakana
4251        (0x3100, 0x312F), // Bopomofo
4252        (0x3130, 0x318F), // Hangul Compatibility Jamo
4253        (0x4E00, 0x9FFF), // CJK Unified Ideographs
4254        (0xAC00, 0xD7AF), // Hangul Syllables
4255        (0xF900, 0xFAFF), // CJK Compatibility Ideographs
4256        (0xFB50, 0xFDFF), // Arabic Presentation Forms-A
4257        (0xFE70, 0xFEFF), // Arabic Presentation Forms-B
4258        (0xFF00, 0xFFEF), // Halfwidth and Fullwidth Forms
4259    ];
4260
4261    let mut ranges = Vec::new();
4262
4263    for &(start, end) in blocks_to_check {
4264        let test_codepoints = get_verification_codepoints(start, end);
4265        let required_hits = (test_codepoints.len() + 1) / 2;
4266        let mut hits = 0;
4267
4268        for cp in test_codepoints {
4269            if let Ok(Some(gid)) = cmap_subtable.map_glyph(cp) {
4270                if gid != 0 {
4271                    hits += 1;
4272                    if hits >= required_hits {
4273                        break;
4274                    }
4275                }
4276            }
4277        }
4278
4279        if hits >= required_hits {
4280            ranges.push(UnicodeRange { start, end });
4281        }
4282    }
4283
4284    if ranges.is_empty() {
4285        None
4286    } else {
4287        Some(ranges)
4288    }
4289}
4290
4291// Helper function to extract unicode ranges (unused, kept for reference)
4292#[cfg(all(feature = "std", feature = "parsing"))]
4293#[allow(dead_code)]
4294fn extract_unicode_ranges(os2_table: &Os2) -> Vec<UnicodeRange> {
4295    let mut unicode_ranges = Vec::new();
4296
4297    let ranges = [
4298        os2_table.ul_unicode_range1,
4299        os2_table.ul_unicode_range2,
4300        os2_table.ul_unicode_range3,
4301        os2_table.ul_unicode_range4,
4302    ];
4303
4304    for &(bit, start, end) in UNICODE_RANGE_MAPPINGS {
4305        let range_idx = bit / 32;
4306        let bit_pos = bit % 32;
4307        if range_idx < 4 && (ranges[range_idx] & (1 << bit_pos)) != 0 {
4308            unicode_ranges.push(UnicodeRange { start, end });
4309        }
4310    }
4311
4312    unicode_ranges
4313}
4314
4315// Helper function to detect if a font is monospace
4316#[cfg(all(feature = "std", feature = "parsing"))]
4317fn detect_monospace(
4318    provider: &impl FontTableProvider,
4319    os2_table: &Os2,
4320    detected_monospace: Option<bool>,
4321) -> Option<bool> {
4322    if let Some(is_monospace) = detected_monospace {
4323        return Some(is_monospace);
4324    }
4325
4326    // Try using PANOSE classification
4327    if os2_table.panose[0] == 2 {
4328        // 2 = Latin Text
4329        return Some(os2_table.panose[3] == 9); // 9 = Monospaced
4330    }
4331
4332    // Check glyph widths in hmtx table
4333    let hhea_data = provider.table_data(tag::HHEA).ok()??;
4334    let hhea_table = ReadScope::new(&hhea_data).read::<HheaTable>().ok()?;
4335    let maxp_data = provider.table_data(tag::MAXP).ok()??;
4336    let maxp_table = ReadScope::new(&maxp_data).read::<MaxpTable>().ok()?;
4337    let hmtx_data = provider.table_data(tag::HMTX).ok()??;
4338    let hmtx_table = ReadScope::new(&hmtx_data)
4339        .read_dep::<HmtxTable<'_>>((
4340            usize::from(maxp_table.num_glyphs),
4341            usize::from(hhea_table.num_h_metrics),
4342        ))
4343        .ok()?;
4344
4345    let mut monospace = true;
4346    let mut last_advance = 0;
4347
4348    // Check if all advance widths are the same
4349    for i in 0..hhea_table.num_h_metrics as usize {
4350        let advance = hmtx_table.h_metrics.read_item(i).ok()?.advance_width;
4351        if i > 0 && advance != last_advance {
4352            monospace = false;
4353            break;
4354        }
4355        last_advance = advance;
4356    }
4357
4358    Some(monospace)
4359}
4360
4361/// Guess font metadata from a filename using the existing tokenizer.
4362///
4363/// Uses [`config::tokenize_font_stem`] and [`config::FONT_STYLE_TOKENS`]
4364/// to extract the family name and detect style hints from the filename.
4365#[cfg(feature = "std")]
4366fn pattern_from_filename(path: &std::path::Path) -> Option<FcPattern> {
4367    let ext = path.extension()?.to_str()?.to_lowercase();
4368    match ext.as_str() {
4369        "ttf" | "otf" | "ttc" | "woff" | "woff2" => {}
4370        _ => return None,
4371    }
4372
4373    let stem = path.file_stem()?.to_str()?;
4374    let all_tokens = crate::config::tokenize_lowercase(stem);
4375
4376    // Style detection: check if any token matches a known style keyword
4377    let has_token = |kw: &str| all_tokens.iter().any(|t| t == kw);
4378    let is_bold = has_token("bold") || has_token("heavy");
4379    let is_italic = has_token("italic");
4380    let is_oblique = has_token("oblique");
4381    let is_mono = has_token("mono") || has_token("monospace");
4382    let is_condensed = has_token("condensed");
4383
4384    // Family = non-style tokens joined
4385    let family_tokens = crate::config::tokenize_font_stem(stem);
4386    if family_tokens.is_empty() { return None; }
4387    let family = family_tokens.join(" ");
4388
4389    Some(FcPattern {
4390        name: Some(stem.to_string()),
4391        family: Some(family),
4392        bold: if is_bold { PatternMatch::True } else { PatternMatch::False },
4393        italic: if is_italic { PatternMatch::True } else { PatternMatch::False },
4394        oblique: if is_oblique { PatternMatch::True } else { PatternMatch::DontCare },
4395        monospace: if is_mono { PatternMatch::True } else { PatternMatch::DontCare },
4396        condensed: if is_condensed { PatternMatch::True } else { PatternMatch::DontCare },
4397        weight: if is_bold { FcWeight::Bold } else { FcWeight::Normal },
4398        stretch: if is_condensed { FcStretch::Condensed } else { FcStretch::Normal },
4399        unicode_ranges: Vec::new(),
4400        metadata: FcFontMetadata::default(),
4401        render_config: FcFontRenderConfig::default(),
4402    })
4403}