Skip to main content

rust_fontconfig/
lib.rs

1//! # rust-fontconfig
2//!
3//! Pure-Rust rewrite of the Linux fontconfig library (no system dependencies) - using allsorts as a font parser to support `.woff`, `.woff2`, `.ttc`, `.otf` and `.ttf`
4//!
5//! **NOTE**: Also works on Windows, macOS and WASM - without external dependencies!
6//!
7//! ## Usage
8//!
9//! ### Basic Font Query
10//!
11//! ```rust,no_run
12//! use rust_fontconfig::{FcFontCache, FcPattern};
13//!
14//! fn main() {
15//!     // Build the font cache
16//!     let cache = FcFontCache::build();
17//!
18//!     // Query a font by name
19//!     let results = cache.query(
20//!         &FcPattern {
21//!             name: Some(String::from("Arial")),
22//!             ..Default::default()
23//!         },
24//!         &mut Vec::new() // Trace messages container
25//!     );
26//!
27//!     if let Some(font_match) = results {
28//!         println!("Font match ID: {:?}", font_match.id);
29//!         println!("Font unicode ranges: {:?}", font_match.unicode_ranges);
30//!     } else {
31//!         println!("No matching font found");
32//!     }
33//! }
34//! ```
35//!
36//! ### Resolve Font Chain and Query for Text
37//!
38//! ```rust,no_run
39//! use rust_fontconfig::{FcFontCache, FcWeight, PatternMatch};
40//!
41//! fn main() {
42//!     let cache = FcFontCache::build();
43//!     
44//!     // Build font fallback chain (without text parameter)
45//!     let font_chain = cache.resolve_font_chain(
46//!         &["Arial".to_string(), "sans-serif".to_string()],
47//!         FcWeight::Normal,
48//!         PatternMatch::DontCare,
49//!         PatternMatch::DontCare,
50//!         &mut Vec::new(),
51//!     );
52//!     
53//!     // Query which fonts to use for specific text
54//!     let text = "Hello 你好 Здравствуйте";
55//!     let font_runs = font_chain.query_for_text(&cache, text);
56//!     
57//!     println!("Text split into {} font runs:", font_runs.len());
58//!     for run in font_runs {
59//!         println!("  '{}' -> font {:?}", run.text, run.font_id);
60//!     }
61//! }
62//! ```
63
64#![allow(non_snake_case)]
65#![cfg_attr(not(feature = "std"), no_std)]
66
67extern crate alloc;
68
69#[cfg(all(feature = "std", feature = "parsing"))]
70use alloc::borrow::ToOwned;
71use alloc::collections::btree_map::BTreeMap;
72use alloc::string::{String, ToString};
73use alloc::vec::Vec;
74use alloc::{format, vec};
75#[cfg(feature = "parsing")]
76use allsorts::binary::read::ReadScope;
77#[cfg(all(feature = "std", feature = "parsing"))]
78use allsorts::get_name::fontcode_get_name;
79#[cfg(feature = "parsing")]
80use allsorts::tables::os2::Os2;
81#[cfg(feature = "parsing")]
82use allsorts::tables::{FontTableProvider, HheaTable, HmtxTable, MaxpTable};
83#[cfg(feature = "parsing")]
84use allsorts::tag;
85#[cfg(all(feature = "std", feature = "parsing"))]
86use std::path::PathBuf;
87
88#[cfg(feature = "ffi")]
89pub mod ffi;
90
91#[cfg(feature = "async-registry")]
92pub mod registry;
93
94/// Operating system type for generic font family resolution
95#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash)]
96pub enum OperatingSystem {
97    Windows,
98    Linux,
99    MacOS,
100    Wasm,
101}
102
103impl OperatingSystem {
104    /// Detect the current operating system at compile time
105    pub fn current() -> Self {
106        #[cfg(target_os = "windows")]
107        return OperatingSystem::Windows;
108        
109        #[cfg(target_os = "linux")]
110        return OperatingSystem::Linux;
111        
112        #[cfg(target_os = "macos")]
113        return OperatingSystem::MacOS;
114        
115        #[cfg(target_family = "wasm")]
116        return OperatingSystem::Wasm;
117        
118        #[cfg(not(any(target_os = "windows", target_os = "linux", target_os = "macos", target_family = "wasm")))]
119        return OperatingSystem::Linux; // Default fallback
120    }
121    
122    /// Get system-specific fonts for the "serif" generic family
123    /// Prioritizes fonts based on Unicode range coverage
124    pub fn get_serif_fonts(&self, unicode_ranges: &[UnicodeRange]) -> Vec<String> {
125        let has_cjk = unicode_ranges.iter().any(|r| {
126            (r.start >= 0x4E00 && r.start <= 0x9FFF) || // CJK Unified Ideographs
127            (r.start >= 0x3040 && r.start <= 0x309F) || // Hiragana
128            (r.start >= 0x30A0 && r.start <= 0x30FF) || // Katakana
129            (r.start >= 0xAC00 && r.start <= 0xD7AF)    // Hangul
130        });
131        
132        let has_arabic = unicode_ranges.iter().any(|r| r.start >= 0x0600 && r.start <= 0x06FF);
133        let _has_cyrillic = unicode_ranges.iter().any(|r| r.start >= 0x0400 && r.start <= 0x04FF);
134        
135        match self {
136            OperatingSystem::Windows => {
137                let mut fonts = Vec::new();
138                if has_cjk {
139                    fonts.extend_from_slice(&["MS Mincho", "SimSun", "MingLiU"]);
140                }
141                if has_arabic {
142                    fonts.push("Traditional Arabic");
143                }
144                fonts.push("Times New Roman");
145                fonts.iter().map(|s| s.to_string()).collect()
146            }
147            OperatingSystem::Linux => {
148                let mut fonts = Vec::new();
149                if has_cjk {
150                    fonts.extend_from_slice(&["Noto Serif CJK SC", "Noto Serif CJK JP", "Noto Serif CJK KR"]);
151                }
152                if has_arabic {
153                    fonts.push("Noto Serif Arabic");
154                }
155                fonts.extend_from_slice(&[
156                    "Times", "Times New Roman", "DejaVu Serif", "Free Serif", 
157                    "Noto Serif", "Bitstream Vera Serif", "Roman", "Regular"
158                ]);
159                fonts.iter().map(|s| s.to_string()).collect()
160            }
161            OperatingSystem::MacOS => {
162                let mut fonts = Vec::new();
163                if has_cjk {
164                    fonts.extend_from_slice(&["Hiragino Mincho ProN", "STSong", "AppleMyungjo"]);
165                }
166                if has_arabic {
167                    fonts.push("Geeza Pro");
168                }
169                fonts.extend_from_slice(&["Times", "New York", "Palatino"]);
170                fonts.iter().map(|s| s.to_string()).collect()
171            }
172            OperatingSystem::Wasm => Vec::new(),
173        }
174    }
175    
176    /// Get system-specific fonts for the "sans-serif" generic family
177    /// Prioritizes fonts based on Unicode range coverage
178    pub fn get_sans_serif_fonts(&self, unicode_ranges: &[UnicodeRange]) -> Vec<String> {
179        let has_cjk = unicode_ranges.iter().any(|r| {
180            (r.start >= 0x4E00 && r.start <= 0x9FFF) || // CJK Unified Ideographs
181            (r.start >= 0x3040 && r.start <= 0x309F) || // Hiragana
182            (r.start >= 0x30A0 && r.start <= 0x30FF) || // Katakana
183            (r.start >= 0xAC00 && r.start <= 0xD7AF)    // Hangul
184        });
185        
186        let has_arabic = unicode_ranges.iter().any(|r| r.start >= 0x0600 && r.start <= 0x06FF);
187        let _has_cyrillic = unicode_ranges.iter().any(|r| r.start >= 0x0400 && r.start <= 0x04FF);
188        let has_hebrew = unicode_ranges.iter().any(|r| r.start >= 0x0590 && r.start <= 0x05FF);
189        let has_thai = unicode_ranges.iter().any(|r| r.start >= 0x0E00 && r.start <= 0x0E7F);
190        
191        match self {
192            OperatingSystem::Windows => {
193                let mut fonts = Vec::new();
194                if has_cjk {
195                    fonts.extend_from_slice(&["Microsoft YaHei", "MS Gothic", "Malgun Gothic", "SimHei"]);
196                }
197                if has_arabic {
198                    fonts.push("Segoe UI Arabic");
199                }
200                if has_hebrew {
201                    fonts.push("Segoe UI Hebrew");
202                }
203                if has_thai {
204                    fonts.push("Leelawadee UI");
205                }
206                fonts.extend_from_slice(&["Segoe UI", "Tahoma", "Microsoft Sans Serif", "MS Sans Serif", "Helv"]);
207                fonts.iter().map(|s| s.to_string()).collect()
208            }
209            OperatingSystem::Linux => {
210                let mut fonts = Vec::new();
211                if has_cjk {
212                    fonts.extend_from_slice(&[
213                        "Noto Sans CJK SC", "Noto Sans CJK JP", "Noto Sans CJK KR",
214                        "WenQuanYi Micro Hei", "Droid Sans Fallback"
215                    ]);
216                }
217                if has_arabic {
218                    fonts.push("Noto Sans Arabic");
219                }
220                if has_hebrew {
221                    fonts.push("Noto Sans Hebrew");
222                }
223                if has_thai {
224                    fonts.push("Noto Sans Thai");
225                }
226                fonts.extend_from_slice(&["Ubuntu", "Arial", "DejaVu Sans", "Noto Sans", "Liberation Sans"]);
227                fonts.iter().map(|s| s.to_string()).collect()
228            }
229            OperatingSystem::MacOS => {
230                let mut fonts = Vec::new();
231                if has_cjk {
232                    fonts.extend_from_slice(&[
233                        "Hiragino Sans", "Hiragino Kaku Gothic ProN", 
234                        "PingFang SC", "PingFang TC", "Apple SD Gothic Neo"
235                    ]);
236                }
237                if has_arabic {
238                    fonts.push("Geeza Pro");
239                }
240                if has_hebrew {
241                    fonts.push("Arial Hebrew");
242                }
243                if has_thai {
244                    fonts.push("Thonburi");
245                }
246                fonts.extend_from_slice(&["San Francisco", "Helvetica Neue", "Lucida Grande"]);
247                fonts.iter().map(|s| s.to_string()).collect()
248            }
249            OperatingSystem::Wasm => Vec::new(),
250        }
251    }
252    
253    /// Get system-specific fonts for the "monospace" generic family
254    /// Prioritizes fonts based on Unicode range coverage
255    pub fn get_monospace_fonts(&self, unicode_ranges: &[UnicodeRange]) -> Vec<String> {
256        let has_cjk = unicode_ranges.iter().any(|r| {
257            (r.start >= 0x4E00 && r.start <= 0x9FFF) || // CJK Unified Ideographs
258            (r.start >= 0x3040 && r.start <= 0x309F) || // Hiragana
259            (r.start >= 0x30A0 && r.start <= 0x30FF) || // Katakana
260            (r.start >= 0xAC00 && r.start <= 0xD7AF)    // Hangul
261        });
262        
263        match self {
264            OperatingSystem::Windows => {
265                let mut fonts = Vec::new();
266                if has_cjk {
267                    fonts.extend_from_slice(&["MS Gothic", "SimHei"]);
268                }
269                fonts.extend_from_slice(&["Segoe UI Mono", "Courier New", "Cascadia Code", "Cascadia Mono", "Consolas"]);
270                fonts.iter().map(|s| s.to_string()).collect()
271            }
272            OperatingSystem::Linux => {
273                let mut fonts = Vec::new();
274                if has_cjk {
275                    fonts.extend_from_slice(&["Noto Sans Mono CJK SC", "Noto Sans Mono CJK JP", "WenQuanYi Zen Hei Mono"]);
276                }
277                fonts.extend_from_slice(&[
278                    "Source Code Pro", "Cantarell", "DejaVu Sans Mono", 
279                    "Roboto Mono", "Ubuntu Monospace", "Droid Sans Mono"
280                ]);
281                fonts.iter().map(|s| s.to_string()).collect()
282            }
283            OperatingSystem::MacOS => {
284                let mut fonts = Vec::new();
285                if has_cjk {
286                    fonts.extend_from_slice(&["Hiragino Sans", "PingFang SC"]);
287                }
288                fonts.extend_from_slice(&["SF Mono", "Menlo", "Monaco", "Courier", "Oxygen Mono", "Source Code Pro", "Fira Mono"]);
289                fonts.iter().map(|s| s.to_string()).collect()
290            }
291            OperatingSystem::Wasm => Vec::new(),
292        }
293    }
294    
295    /// Expand a generic CSS font family to system-specific font names
296    /// Returns the original name if not a generic family
297    /// Prioritizes fonts based on Unicode range coverage
298    pub fn expand_generic_family(&self, family: &str, unicode_ranges: &[UnicodeRange]) -> Vec<String> {
299        match family.to_lowercase().as_str() {
300            "serif" => self.get_serif_fonts(unicode_ranges),
301            "sans-serif" => self.get_sans_serif_fonts(unicode_ranges),
302            "monospace" => self.get_monospace_fonts(unicode_ranges),
303            "cursive" | "fantasy" | "system-ui" => {
304                // Use sans-serif as fallback for these
305                self.get_sans_serif_fonts(unicode_ranges)
306            }
307            _ => vec![family.to_string()],
308        }
309    }
310}
311
312/// Expand a CSS font-family stack with generic families resolved to OS-specific fonts
313/// Prioritizes fonts based on Unicode range coverage
314/// Example: ["Arial", "sans-serif"] on macOS with CJK ranges -> ["Arial", "PingFang SC", "Hiragino Sans", ...]
315pub fn expand_font_families(families: &[String], os: OperatingSystem, unicode_ranges: &[UnicodeRange]) -> Vec<String> {
316    let mut expanded = Vec::new();
317    
318    for family in families {
319        expanded.extend(os.expand_generic_family(family, unicode_ranges));
320    }
321    
322    expanded
323}
324
325/// UUID to identify a font (collections are broken up into separate fonts)
326#[derive(Clone, Copy, PartialOrd, Ord, PartialEq, Eq, Hash)]
327#[cfg_attr(feature = "cache", derive(serde::Serialize, serde::Deserialize))]
328pub struct FontId(pub u128);
329
330impl core::fmt::Debug for FontId {
331    fn fmt(&self, f: &mut core::fmt::Formatter<'_>) -> core::fmt::Result {
332        core::fmt::Display::fmt(self, f)
333    }
334}
335
336impl core::fmt::Display for FontId {
337    fn fmt(&self, f: &mut core::fmt::Formatter<'_>) -> core::fmt::Result {
338        let id = self.0;
339        write!(
340            f,
341            "{:08x}-{:04x}-{:04x}-{:04x}-{:012x}",
342            (id >> 96) & 0xFFFFFFFF,
343            (id >> 80) & 0xFFFF,
344            (id >> 64) & 0xFFFF,
345            (id >> 48) & 0xFFFF,
346            id & 0xFFFFFFFFFFFF
347        )
348    }
349}
350
351impl FontId {
352    /// Generate a new unique FontId using an atomic counter
353    pub fn new() -> Self {
354        use core::sync::atomic::{AtomicU64, Ordering};
355        static COUNTER: AtomicU64 = AtomicU64::new(1);
356        let id = COUNTER.fetch_add(1, Ordering::Relaxed) as u128;
357        FontId(id)
358    }
359}
360
361/// Whether a field is required to match (yes / no / don't care)
362#[derive(Debug, Default, Copy, Clone, PartialOrd, Ord, PartialEq, Eq, Hash)]
363#[cfg_attr(feature = "cache", derive(serde::Serialize, serde::Deserialize))]
364#[repr(C)]
365pub enum PatternMatch {
366    /// Default: don't particularly care whether the requirement matches
367    #[default]
368    DontCare,
369    /// Requirement has to be true for the selected font
370    True,
371    /// Requirement has to be false for the selected font
372    False,
373}
374
375impl PatternMatch {
376    fn needs_to_match(&self) -> bool {
377        matches!(self, PatternMatch::True | PatternMatch::False)
378    }
379
380    fn matches(&self, other: &PatternMatch) -> bool {
381        match (self, other) {
382            (PatternMatch::DontCare, _) => true,
383            (_, PatternMatch::DontCare) => true,
384            (a, b) => a == b,
385        }
386    }
387}
388
389/// Font weight values as defined in CSS specification
390#[derive(Debug, Clone, Copy, PartialOrd, Ord, PartialEq, Eq, Hash)]
391#[cfg_attr(feature = "cache", derive(serde::Serialize, serde::Deserialize))]
392#[repr(C)]
393pub enum FcWeight {
394    Thin = 100,
395    ExtraLight = 200,
396    Light = 300,
397    Normal = 400,
398    Medium = 500,
399    SemiBold = 600,
400    Bold = 700,
401    ExtraBold = 800,
402    Black = 900,
403}
404
405impl FcWeight {
406    pub fn from_u16(weight: u16) -> Self {
407        match weight {
408            0..=149 => FcWeight::Thin,
409            150..=249 => FcWeight::ExtraLight,
410            250..=349 => FcWeight::Light,
411            350..=449 => FcWeight::Normal,
412            450..=549 => FcWeight::Medium,
413            550..=649 => FcWeight::SemiBold,
414            650..=749 => FcWeight::Bold,
415            750..=849 => FcWeight::ExtraBold,
416            _ => FcWeight::Black,
417        }
418    }
419
420    pub fn find_best_match(&self, available: &[FcWeight]) -> Option<FcWeight> {
421        if available.is_empty() {
422            return None;
423        }
424
425        // Exact match
426        if available.contains(self) {
427            return Some(*self);
428        }
429
430        // Get numeric value
431        let self_value = *self as u16;
432
433        match *self {
434            FcWeight::Normal => {
435                // For Normal (400), try Medium (500) first
436                if available.contains(&FcWeight::Medium) {
437                    return Some(FcWeight::Medium);
438                }
439                // Then try lighter weights
440                for weight in &[FcWeight::Light, FcWeight::ExtraLight, FcWeight::Thin] {
441                    if available.contains(weight) {
442                        return Some(*weight);
443                    }
444                }
445                // Last, try heavier weights
446                for weight in &[
447                    FcWeight::SemiBold,
448                    FcWeight::Bold,
449                    FcWeight::ExtraBold,
450                    FcWeight::Black,
451                ] {
452                    if available.contains(weight) {
453                        return Some(*weight);
454                    }
455                }
456            }
457            FcWeight::Medium => {
458                // For Medium (500), try Normal (400) first
459                if available.contains(&FcWeight::Normal) {
460                    return Some(FcWeight::Normal);
461                }
462                // Then try lighter weights
463                for weight in &[FcWeight::Light, FcWeight::ExtraLight, FcWeight::Thin] {
464                    if available.contains(weight) {
465                        return Some(*weight);
466                    }
467                }
468                // Last, try heavier weights
469                for weight in &[
470                    FcWeight::SemiBold,
471                    FcWeight::Bold,
472                    FcWeight::ExtraBold,
473                    FcWeight::Black,
474                ] {
475                    if available.contains(weight) {
476                        return Some(*weight);
477                    }
478                }
479            }
480            FcWeight::Thin | FcWeight::ExtraLight | FcWeight::Light => {
481                // For lightweight fonts (<400), first try lighter or equal weights
482                let mut best_match = None;
483                let mut smallest_diff = u16::MAX;
484
485                // Find the closest lighter weight
486                for weight in available {
487                    let weight_value = *weight as u16;
488                    // Only consider weights <= self (per test expectation)
489                    if weight_value <= self_value {
490                        let diff = self_value - weight_value;
491                        if diff < smallest_diff {
492                            smallest_diff = diff;
493                            best_match = Some(*weight);
494                        }
495                    }
496                }
497
498                if best_match.is_some() {
499                    return best_match;
500                }
501
502                // If no lighter weight, find the closest heavier weight
503                best_match = None;
504                smallest_diff = u16::MAX;
505
506                for weight in available {
507                    let weight_value = *weight as u16;
508                    if weight_value > self_value {
509                        let diff = weight_value - self_value;
510                        if diff < smallest_diff {
511                            smallest_diff = diff;
512                            best_match = Some(*weight);
513                        }
514                    }
515                }
516
517                return best_match;
518            }
519            FcWeight::SemiBold | FcWeight::Bold | FcWeight::ExtraBold | FcWeight::Black => {
520                // For heavyweight fonts (>500), first try heavier or equal weights
521                let mut best_match = None;
522                let mut smallest_diff = u16::MAX;
523
524                // Find the closest heavier weight
525                for weight in available {
526                    let weight_value = *weight as u16;
527                    // Only consider weights >= self
528                    if weight_value >= self_value {
529                        let diff = weight_value - self_value;
530                        if diff < smallest_diff {
531                            smallest_diff = diff;
532                            best_match = Some(*weight);
533                        }
534                    }
535                }
536
537                if best_match.is_some() {
538                    return best_match;
539                }
540
541                // If no heavier weight, find the closest lighter weight
542                best_match = None;
543                smallest_diff = u16::MAX;
544
545                for weight in available {
546                    let weight_value = *weight as u16;
547                    if weight_value < self_value {
548                        let diff = self_value - weight_value;
549                        if diff < smallest_diff {
550                            smallest_diff = diff;
551                            best_match = Some(*weight);
552                        }
553                    }
554                }
555
556                return best_match;
557            }
558        }
559
560        // If nothing matches by now, return the first available weight
561        Some(available[0])
562    }
563}
564
565impl Default for FcWeight {
566    fn default() -> Self {
567        FcWeight::Normal
568    }
569}
570
571/// CSS font-stretch values
572#[derive(Debug, Clone, Copy, PartialOrd, Ord, PartialEq, Eq, Hash)]
573#[cfg_attr(feature = "cache", derive(serde::Serialize, serde::Deserialize))]
574#[repr(C)]
575pub enum FcStretch {
576    UltraCondensed = 1,
577    ExtraCondensed = 2,
578    Condensed = 3,
579    SemiCondensed = 4,
580    Normal = 5,
581    SemiExpanded = 6,
582    Expanded = 7,
583    ExtraExpanded = 8,
584    UltraExpanded = 9,
585}
586
587impl FcStretch {
588    pub fn is_condensed(&self) -> bool {
589        use self::FcStretch::*;
590        match self {
591            UltraCondensed => true,
592            ExtraCondensed => true,
593            Condensed => true,
594            SemiCondensed => true,
595            Normal => false,
596            SemiExpanded => false,
597            Expanded => false,
598            ExtraExpanded => false,
599            UltraExpanded => false,
600        }
601    }
602    pub fn from_u16(width_class: u16) -> Self {
603        match width_class {
604            1 => FcStretch::UltraCondensed,
605            2 => FcStretch::ExtraCondensed,
606            3 => FcStretch::Condensed,
607            4 => FcStretch::SemiCondensed,
608            5 => FcStretch::Normal,
609            6 => FcStretch::SemiExpanded,
610            7 => FcStretch::Expanded,
611            8 => FcStretch::ExtraExpanded,
612            9 => FcStretch::UltraExpanded,
613            _ => FcStretch::Normal,
614        }
615    }
616
617    /// Follows CSS spec for stretch matching
618    pub fn find_best_match(&self, available: &[FcStretch]) -> Option<FcStretch> {
619        if available.is_empty() {
620            return None;
621        }
622
623        if available.contains(self) {
624            return Some(*self);
625        }
626
627        // For 'normal' or condensed values, narrower widths are checked first, then wider values
628        if *self <= FcStretch::Normal {
629            // Find narrower values first
630            let mut closest_narrower = None;
631            for stretch in available.iter() {
632                if *stretch < *self
633                    && (closest_narrower.is_none() || *stretch > closest_narrower.unwrap())
634                {
635                    closest_narrower = Some(*stretch);
636                }
637            }
638
639            if closest_narrower.is_some() {
640                return closest_narrower;
641            }
642
643            // Otherwise, find wider values
644            let mut closest_wider = None;
645            for stretch in available.iter() {
646                if *stretch > *self
647                    && (closest_wider.is_none() || *stretch < closest_wider.unwrap())
648                {
649                    closest_wider = Some(*stretch);
650                }
651            }
652
653            return closest_wider;
654        } else {
655            // For expanded values, wider values are checked first, then narrower values
656            let mut closest_wider = None;
657            for stretch in available.iter() {
658                if *stretch > *self
659                    && (closest_wider.is_none() || *stretch < closest_wider.unwrap())
660                {
661                    closest_wider = Some(*stretch);
662                }
663            }
664
665            if closest_wider.is_some() {
666                return closest_wider;
667            }
668
669            // Otherwise, find narrower values
670            let mut closest_narrower = None;
671            for stretch in available.iter() {
672                if *stretch < *self
673                    && (closest_narrower.is_none() || *stretch > closest_narrower.unwrap())
674                {
675                    closest_narrower = Some(*stretch);
676                }
677            }
678
679            return closest_narrower;
680        }
681    }
682}
683
684impl Default for FcStretch {
685    fn default() -> Self {
686        FcStretch::Normal
687    }
688}
689
690/// Unicode range representation for font matching
691#[derive(Debug, Clone, Copy, PartialEq, Eq, PartialOrd, Ord, Hash)]
692#[cfg_attr(feature = "cache", derive(serde::Serialize, serde::Deserialize))]
693pub struct UnicodeRange {
694    pub start: u32,
695    pub end: u32,
696}
697
698impl UnicodeRange {
699    pub fn contains(&self, c: char) -> bool {
700        let c = c as u32;
701        c >= self.start && c <= self.end
702    }
703
704    pub fn overlaps(&self, other: &UnicodeRange) -> bool {
705        self.start <= other.end && other.start <= self.end
706    }
707
708    pub fn is_subset_of(&self, other: &UnicodeRange) -> bool {
709        self.start >= other.start && self.end <= other.end
710    }
711}
712
713/// Log levels for trace messages
714#[derive(Debug, Clone, Copy, PartialOrd, Ord, PartialEq, Eq, Hash)]
715pub enum TraceLevel {
716    Debug,
717    Info,
718    Warning,
719    Error,
720}
721
722/// Reason for font matching failure or success
723#[derive(Debug, Clone, PartialEq, Eq, Hash)]
724pub enum MatchReason {
725    NameMismatch {
726        requested: Option<String>,
727        found: Option<String>,
728    },
729    FamilyMismatch {
730        requested: Option<String>,
731        found: Option<String>,
732    },
733    StyleMismatch {
734        property: &'static str,
735        requested: String,
736        found: String,
737    },
738    WeightMismatch {
739        requested: FcWeight,
740        found: FcWeight,
741    },
742    StretchMismatch {
743        requested: FcStretch,
744        found: FcStretch,
745    },
746    UnicodeRangeMismatch {
747        character: char,
748        ranges: Vec<UnicodeRange>,
749    },
750    Success,
751}
752
753/// Trace message for debugging font matching
754#[derive(Debug, Clone, PartialEq, Eq)]
755pub struct TraceMsg {
756    pub level: TraceLevel,
757    pub path: String,
758    pub reason: MatchReason,
759}
760
761/// Font pattern for matching
762#[derive(Default, Clone, PartialOrd, Ord, PartialEq, Eq)]
763#[cfg_attr(feature = "cache", derive(serde::Serialize, serde::Deserialize))]
764#[repr(C)]
765pub struct FcPattern {
766    // font name
767    pub name: Option<String>,
768    // family name
769    pub family: Option<String>,
770    // "italic" property
771    pub italic: PatternMatch,
772    // "oblique" property
773    pub oblique: PatternMatch,
774    // "bold" property
775    pub bold: PatternMatch,
776    // "monospace" property
777    pub monospace: PatternMatch,
778    // "condensed" property
779    pub condensed: PatternMatch,
780    // font weight
781    pub weight: FcWeight,
782    // font stretch
783    pub stretch: FcStretch,
784    // unicode ranges to match
785    pub unicode_ranges: Vec<UnicodeRange>,
786    // extended font metadata
787    pub metadata: FcFontMetadata,
788}
789
790impl core::fmt::Debug for FcPattern {
791    fn fmt(&self, f: &mut core::fmt::Formatter<'_>) -> core::fmt::Result {
792        let mut d = f.debug_struct("FcPattern");
793
794        if let Some(name) = &self.name {
795            d.field("name", name);
796        }
797
798        if let Some(family) = &self.family {
799            d.field("family", family);
800        }
801
802        if self.italic != PatternMatch::DontCare {
803            d.field("italic", &self.italic);
804        }
805
806        if self.oblique != PatternMatch::DontCare {
807            d.field("oblique", &self.oblique);
808        }
809
810        if self.bold != PatternMatch::DontCare {
811            d.field("bold", &self.bold);
812        }
813
814        if self.monospace != PatternMatch::DontCare {
815            d.field("monospace", &self.monospace);
816        }
817
818        if self.condensed != PatternMatch::DontCare {
819            d.field("condensed", &self.condensed);
820        }
821
822        if self.weight != FcWeight::Normal {
823            d.field("weight", &self.weight);
824        }
825
826        if self.stretch != FcStretch::Normal {
827            d.field("stretch", &self.stretch);
828        }
829
830        if !self.unicode_ranges.is_empty() {
831            d.field("unicode_ranges", &self.unicode_ranges);
832        }
833
834        // Only show non-empty metadata fields
835        let empty_metadata = FcFontMetadata::default();
836        if self.metadata != empty_metadata {
837            d.field("metadata", &self.metadata);
838        }
839
840        d.finish()
841    }
842}
843
844/// Font metadata from the OS/2 table
845#[derive(Debug, Default, Clone, PartialEq, Eq, PartialOrd, Ord)]
846#[cfg_attr(feature = "cache", derive(serde::Serialize, serde::Deserialize))]
847pub struct FcFontMetadata {
848    pub copyright: Option<String>,
849    pub designer: Option<String>,
850    pub designer_url: Option<String>,
851    pub font_family: Option<String>,
852    pub font_subfamily: Option<String>,
853    pub full_name: Option<String>,
854    pub id_description: Option<String>,
855    pub license: Option<String>,
856    pub license_url: Option<String>,
857    pub manufacturer: Option<String>,
858    pub manufacturer_url: Option<String>,
859    pub postscript_name: Option<String>,
860    pub preferred_family: Option<String>,
861    pub preferred_subfamily: Option<String>,
862    pub trademark: Option<String>,
863    pub unique_id: Option<String>,
864    pub version: Option<String>,
865}
866
867impl FcPattern {
868    /// Check if this pattern would match the given character
869    pub fn contains_char(&self, c: char) -> bool {
870        if self.unicode_ranges.is_empty() {
871            return true; // No ranges specified means match all characters
872        }
873
874        for range in &self.unicode_ranges {
875            if range.contains(c) {
876                return true;
877            }
878        }
879
880        false
881    }
882}
883
884/// Font match result with UUID
885#[derive(Debug, Clone, PartialEq, Eq)]
886pub struct FontMatch {
887    pub id: FontId,
888    pub unicode_ranges: Vec<UnicodeRange>,
889    pub fallbacks: Vec<FontMatchNoFallback>,
890}
891
892/// Font match result with UUID (without fallback)
893#[derive(Debug, Clone, PartialEq, Eq)]
894pub struct FontMatchNoFallback {
895    pub id: FontId,
896    pub unicode_ranges: Vec<UnicodeRange>,
897}
898
899/// A run of text that uses the same font
900/// Returned by FontFallbackChain::query_for_text()
901#[derive(Debug, Clone, PartialEq, Eq)]
902pub struct ResolvedFontRun {
903    /// The text content of this run
904    pub text: String,
905    /// Start byte index in the original text
906    pub start_byte: usize,
907    /// End byte index in the original text (exclusive)
908    pub end_byte: usize,
909    /// The font to use for this run (None if no font found)
910    pub font_id: Option<FontId>,
911    /// Which CSS font-family this came from
912    pub css_source: String,
913}
914
915/// Resolved font fallback chain for a CSS font-family stack
916/// This represents the complete chain of fonts to use for rendering text
917#[derive(Debug, Clone, PartialEq, Eq)]
918pub struct FontFallbackChain {
919    /// CSS-based fallbacks: Each CSS font expanded to its system fallbacks
920    /// Example: ["NotoSansJP" -> [Hiragino Sans, PingFang SC], "sans-serif" -> [Helvetica]]
921    pub css_fallbacks: Vec<CssFallbackGroup>,
922    
923    /// Unicode-based fallbacks: Fonts added to cover missing Unicode ranges
924    /// Only populated if css_fallbacks don't cover all requested characters
925    pub unicode_fallbacks: Vec<FontMatch>,
926    
927    /// The original CSS font-family stack that was requested
928    pub original_stack: Vec<String>,
929}
930
931impl FontFallbackChain {
932    /// Resolve which font should be used for a specific character
933    /// Returns (FontId, css_source_name) where css_source_name indicates which CSS font matched
934    /// Returns None if no font in the chain can render this character
935    pub fn resolve_char(&self, cache: &FcFontCache, ch: char) -> Option<(FontId, String)> {
936        let codepoint = ch as u32;
937        
938        // First check CSS fallbacks in order
939        for group in &self.css_fallbacks {
940            for font in &group.fonts {
941                if let Some(meta) = cache.get_metadata_by_id(&font.id) {
942                    // Check if this font's unicode ranges cover the character
943                    if meta.unicode_ranges.is_empty() {
944                        // Font has no unicode range info - skip it, don't assume it covers everything
945                        // This is important because fonts that don't properly declare their ranges
946                        // should not be used as a catch-all
947                        continue;
948                    } else {
949                        // Check if character is in any of the font's ranges
950                        for range in &meta.unicode_ranges {
951                            if codepoint >= range.start && codepoint <= range.end {
952                                return Some((font.id, group.css_name.clone()));
953                            }
954                        }
955                        // Character not in any range - continue to next font
956                    }
957                }
958            }
959        }
960        
961        // If not found in CSS fallbacks, check Unicode fallbacks
962        for font in &self.unicode_fallbacks {
963            if let Some(meta) = cache.get_metadata_by_id(&font.id) {
964                // Check if this font's unicode ranges cover the character
965                for range in &meta.unicode_ranges {
966                    if codepoint >= range.start && codepoint <= range.end {
967                        return Some((font.id, "(unicode-fallback)".to_string()));
968                    }
969                }
970            }
971        }
972        
973        None
974    }
975    
976    /// Resolve all characters in a text string to their fonts
977    /// Returns a vector of (character, FontId, css_source) tuples
978    pub fn resolve_text(&self, cache: &FcFontCache, text: &str) -> Vec<(char, Option<(FontId, String)>)> {
979        text.chars()
980            .map(|ch| (ch, self.resolve_char(cache, ch)))
981            .collect()
982    }
983    
984    /// Query which fonts should be used for a text string, grouped by font
985    /// Returns runs of consecutive characters that use the same font
986    /// This is the main API for text shaping - call this to get font runs, then shape each run
987    pub fn query_for_text(&self, cache: &FcFontCache, text: &str) -> Vec<ResolvedFontRun> {
988        if text.is_empty() {
989            return Vec::new();
990        }
991        
992        let mut runs: Vec<ResolvedFontRun> = Vec::new();
993        let mut current_font: Option<FontId> = None;
994        let mut current_css_source: Option<String> = None;
995        let mut current_start_byte: usize = 0;
996        
997        for (byte_idx, ch) in text.char_indices() {
998            let resolved = self.resolve_char(cache, ch);
999            let (font_id, css_source) = match &resolved {
1000                Some((id, source)) => (Some(*id), Some(source.clone())),
1001                None => (None, None),
1002            };
1003            
1004            // Check if we need to start a new run
1005            let font_changed = font_id != current_font;
1006            
1007            if font_changed && byte_idx > 0 {
1008                // Finalize the current run
1009                let run_text = &text[current_start_byte..byte_idx];
1010                runs.push(ResolvedFontRun {
1011                    text: run_text.to_string(),
1012                    start_byte: current_start_byte,
1013                    end_byte: byte_idx,
1014                    font_id: current_font,
1015                    css_source: current_css_source.clone().unwrap_or_default(),
1016                });
1017                current_start_byte = byte_idx;
1018            }
1019            
1020            current_font = font_id;
1021            current_css_source = css_source;
1022        }
1023        
1024        // Finalize the last run
1025        if current_start_byte < text.len() {
1026            let run_text = &text[current_start_byte..];
1027            runs.push(ResolvedFontRun {
1028                text: run_text.to_string(),
1029                start_byte: current_start_byte,
1030                end_byte: text.len(),
1031                font_id: current_font,
1032                css_source: current_css_source.unwrap_or_default(),
1033            });
1034        }
1035        
1036        runs
1037    }
1038}
1039
1040/// A group of fonts that are fallbacks for a single CSS font-family name
1041#[derive(Debug, Clone, PartialEq, Eq)]
1042pub struct CssFallbackGroup {
1043    /// The CSS font name (e.g., "NotoSansJP", "sans-serif")
1044    pub css_name: String,
1045    
1046    /// System fonts that match this CSS name
1047    /// First font in list is the best match
1048    pub fonts: Vec<FontMatch>,
1049}
1050
1051/// Cache key for font fallback chain queries
1052/// 
1053/// IMPORTANT: This key intentionally does NOT include unicode_ranges.
1054/// Font chains should be cached by CSS properties only, not by text content.
1055/// Different texts with the same CSS font-stack should share the same chain.
1056#[cfg(feature = "std")]
1057#[derive(Debug, Clone, PartialEq, Eq, Hash)]
1058pub(crate) struct FontChainCacheKey {
1059    /// CSS font stack (expanded to OS-specific fonts)
1060    pub(crate) font_families: Vec<String>,
1061    /// Font weight
1062    pub(crate) weight: FcWeight,
1063    /// Font style flags
1064    pub(crate) italic: PatternMatch,
1065    pub(crate) oblique: PatternMatch,
1066}
1067
1068/// Path to a font file
1069#[derive(Debug, Clone, PartialOrd, Ord, PartialEq, Eq)]
1070#[cfg_attr(feature = "cache", derive(serde::Serialize, serde::Deserialize))]
1071#[repr(C)]
1072pub struct FcFontPath {
1073    pub path: String,
1074    pub font_index: usize,
1075}
1076
1077/// In-memory font data
1078#[derive(Debug, Clone, PartialEq, Eq)]
1079#[repr(C)]
1080pub struct FcFont {
1081    pub bytes: Vec<u8>,
1082    pub font_index: usize,
1083    pub id: String, // For identification in tests
1084}
1085
1086/// Font source enum to represent either disk or memory fonts
1087#[derive(Debug, Clone)]
1088pub enum FontSource<'a> {
1089    /// Font loaded from memory
1090    Memory(&'a FcFont),
1091    /// Font loaded from disk
1092    Disk(&'a FcFontPath),
1093}
1094
1095/// A named font to be added to the font cache from memory.
1096/// This is the primary way to supply custom fonts to the application.
1097#[derive(Debug, Clone)]
1098pub struct NamedFont {
1099    /// Human-readable name for this font (e.g., "My Custom Font")
1100    pub name: String,
1101    /// The raw font file bytes (TTF, OTF, WOFF, WOFF2, TTC)
1102    pub bytes: Vec<u8>,
1103}
1104
1105impl NamedFont {
1106    /// Create a new named font from bytes
1107    pub fn new(name: impl Into<String>, bytes: Vec<u8>) -> Self {
1108        Self {
1109            name: name.into(),
1110            bytes,
1111        }
1112    }
1113}
1114
1115/// Font cache, initialized at startup
1116#[derive(Debug)]
1117pub struct FcFontCache {
1118    // Pattern to FontId mapping (query index)
1119    pub(crate) patterns: BTreeMap<FcPattern, FontId>,
1120    // On-disk font paths
1121    pub(crate) disk_fonts: BTreeMap<FontId, FcFontPath>,
1122    // In-memory fonts
1123    pub(crate) memory_fonts: BTreeMap<FontId, FcFont>,
1124    // Metadata cache (patterns stored by ID for quick lookup)
1125    pub(crate) metadata: BTreeMap<FontId, FcPattern>,
1126    // Token index: maps lowercase tokens ("noto", "sans", "jp") to sets of FontIds
1127    // This enables fast fuzzy search by intersecting token sets
1128    pub(crate) token_index: BTreeMap<String, alloc::collections::BTreeSet<FontId>>,
1129    // Pre-tokenized font names (lowercase): FontId -> Vec<lowercase tokens>
1130    // Avoids re-tokenization during fuzzy search
1131    pub(crate) font_tokens: BTreeMap<FontId, Vec<String>>,
1132    // Font fallback chain cache (CSS stack + unicode -> resolved chain)
1133    #[cfg(feature = "std")]
1134    chain_cache: std::sync::Mutex<std::collections::HashMap<FontChainCacheKey, FontFallbackChain>>,
1135}
1136
1137impl Clone for FcFontCache {
1138    fn clone(&self) -> Self {
1139        Self {
1140            patterns: self.patterns.clone(),
1141            disk_fonts: self.disk_fonts.clone(),
1142            memory_fonts: self.memory_fonts.clone(),
1143            metadata: self.metadata.clone(),
1144            token_index: self.token_index.clone(),
1145            font_tokens: self.font_tokens.clone(),
1146            #[cfg(feature = "std")]
1147            chain_cache: std::sync::Mutex::new(std::collections::HashMap::new()), // Empty cache for cloned instance
1148        }
1149    }
1150}
1151
1152impl Default for FcFontCache {
1153    fn default() -> Self {
1154        Self {
1155            patterns: BTreeMap::new(),
1156            disk_fonts: BTreeMap::new(),
1157            memory_fonts: BTreeMap::new(),
1158            metadata: BTreeMap::new(),
1159            token_index: BTreeMap::new(),
1160            font_tokens: BTreeMap::new(),
1161            #[cfg(feature = "std")]
1162            chain_cache: std::sync::Mutex::new(std::collections::HashMap::new()),
1163        }
1164    }
1165}
1166
1167impl FcFontCache {
1168    /// Helper method to add a font pattern to the token index
1169    fn index_pattern_tokens(&mut self, pattern: &FcPattern, id: FontId) {
1170        // Extract tokens from both name and family
1171        let mut all_tokens = Vec::new();
1172        
1173        if let Some(name) = &pattern.name {
1174            all_tokens.extend(Self::extract_font_name_tokens(name));
1175        }
1176        
1177        if let Some(family) = &pattern.family {
1178            all_tokens.extend(Self::extract_font_name_tokens(family));
1179        }
1180        
1181        // Convert tokens to lowercase and store them
1182        let tokens_lower: Vec<String> = all_tokens.iter().map(|t| t.to_lowercase()).collect();
1183        
1184        // Add each token (lowercase) to the index
1185        for token_lower in &tokens_lower {
1186            self.token_index
1187                .entry(token_lower.clone())
1188                .or_insert_with(alloc::collections::BTreeSet::new)
1189                .insert(id);
1190        }
1191        
1192        // Store pre-tokenized font name for fast lookup (no re-tokenization needed)
1193        self.font_tokens.insert(id, tokens_lower);
1194    }
1195
1196    /// Adds in-memory font files
1197    pub fn with_memory_fonts(&mut self, fonts: Vec<(FcPattern, FcFont)>) -> &mut Self {
1198        for (pattern, font) in fonts {
1199            let id = FontId::new();
1200            self.patterns.insert(pattern.clone(), id);
1201            self.metadata.insert(id, pattern.clone());
1202            self.memory_fonts.insert(id, font);
1203            self.index_pattern_tokens(&pattern, id);
1204        }
1205        self
1206    }
1207
1208    /// Adds a memory font with a specific ID (for testing)
1209    pub fn with_memory_font_with_id(
1210        &mut self,
1211        id: FontId,
1212        pattern: FcPattern,
1213        font: FcFont,
1214    ) -> &mut Self {
1215        self.patterns.insert(pattern.clone(), id);
1216        self.metadata.insert(id, pattern.clone());
1217        self.memory_fonts.insert(id, font);
1218        self.index_pattern_tokens(&pattern, id);
1219        self
1220    }
1221
1222    /// Get font data for a given font ID
1223    pub fn get_font_by_id<'a>(&'a self, id: &FontId) -> Option<FontSource<'a>> {
1224        // Check memory fonts first
1225        if let Some(font) = self.memory_fonts.get(id) {
1226            return Some(FontSource::Memory(font));
1227        }
1228        // Then check disk fonts
1229        if let Some(path) = self.disk_fonts.get(id) {
1230            return Some(FontSource::Disk(path));
1231        }
1232        None
1233    }
1234
1235    /// Get metadata directly from an ID
1236    pub fn get_metadata_by_id(&self, id: &FontId) -> Option<&FcPattern> {
1237        self.metadata.get(id)
1238    }
1239
1240    /// Get font bytes (either from disk or memory)
1241    #[cfg(feature = "std")]
1242    pub fn get_font_bytes(&self, id: &FontId) -> Option<Vec<u8>> {
1243        match self.get_font_by_id(id)? {
1244            FontSource::Memory(font) => {
1245                Some(font.bytes.clone())
1246            }
1247            FontSource::Disk(path) => {
1248                std::fs::read(&path.path).ok()
1249            }
1250        }
1251    }
1252
1253    /// Builds a new font cache
1254    #[cfg(not(all(feature = "std", feature = "parsing")))]
1255    pub fn build() -> Self {
1256        Self::default()
1257    }
1258
1259    /// Builds a new font cache from all fonts discovered on the system
1260    #[cfg(all(feature = "std", feature = "parsing"))]
1261    pub fn build() -> Self {
1262        Self::build_inner(None)
1263    }
1264    
1265    /// Builds a font cache with only specific font families (and their fallbacks).
1266    /// 
1267    /// This is a performance optimization for applications that know ahead of time
1268    /// which fonts they need. Instead of scanning all system fonts (which can be slow
1269    /// on systems with many fonts), only fonts matching the specified families are loaded.
1270    /// 
1271    /// Generic family names like "sans-serif", "serif", "monospace" are expanded
1272    /// to OS-specific font names (e.g., "sans-serif" on macOS becomes "Helvetica Neue", 
1273    /// "San Francisco", etc.).
1274    /// 
1275    /// **Note**: This will NOT automatically load fallback fonts for scripts not covered
1276    /// by the requested families. If you need Arabic, CJK, or emoji support, either:
1277    /// - Add those families explicitly to the filter
1278    /// - Use `with_memory_fonts()` to add bundled fonts
1279    /// - Use `build()` to load all system fonts
1280    /// 
1281    /// # Arguments
1282    /// * `families` - Font family names to load (e.g., ["Arial", "sans-serif"])
1283    /// 
1284    /// # Example
1285    /// ```ignore
1286    /// // Only load Arial and sans-serif fallback fonts
1287    /// let cache = FcFontCache::build_with_families(&["Arial", "sans-serif"]);
1288    /// ```
1289    #[cfg(all(feature = "std", feature = "parsing"))]
1290    pub fn build_with_families(families: &[impl AsRef<str>]) -> Self {
1291        // Expand generic families to OS-specific names
1292        let os = OperatingSystem::current();
1293        let mut target_families: Vec<String> = Vec::new();
1294        
1295        for family in families {
1296            let family_str = family.as_ref();
1297            let expanded = os.expand_generic_family(family_str, &[]);
1298            if expanded.is_empty() || (expanded.len() == 1 && expanded[0] == family_str) {
1299                target_families.push(family_str.to_string());
1300            } else {
1301                target_families.extend(expanded);
1302            }
1303        }
1304        
1305        Self::build_inner(Some(&target_families))
1306    }
1307    
1308    /// Inner build function that handles both filtered and unfiltered font loading.
1309    /// 
1310    /// # Arguments
1311    /// * `family_filter` - If Some, only load fonts matching these family names.
1312    ///                     If None, load all fonts.
1313    #[cfg(all(feature = "std", feature = "parsing"))]
1314    fn build_inner(family_filter: Option<&[String]>) -> Self {
1315        let mut cache = FcFontCache::default();
1316        
1317        // Normalize filter families for matching (lowercase, remove spaces/dashes)
1318        let filter_normalized: Option<Vec<String>> = family_filter.map(|families| {
1319            families
1320                .iter()
1321                .map(|f| f.to_lowercase().replace(' ', "").replace('-', ""))
1322                .collect()
1323        });
1324        
1325        // Helper closure to check if a pattern matches the filter
1326        let matches_filter = |pattern: &FcPattern| -> bool {
1327            match &filter_normalized {
1328                None => true, // No filter = accept all
1329                Some(targets) => {
1330                    pattern.name.as_ref().map_or(false, |name| {
1331                        let name_norm = name.to_lowercase().replace(' ', "").replace('-', "");
1332                        targets.iter().any(|target| name_norm.contains(target))
1333                    }) || pattern.family.as_ref().map_or(false, |family| {
1334                        let family_norm = family.to_lowercase().replace(' ', "").replace('-', "");
1335                        targets.iter().any(|target| family_norm.contains(target))
1336                    })
1337                }
1338            }
1339        };
1340
1341        #[cfg(target_os = "linux")]
1342        {
1343            if let Some(font_entries) = FcScanDirectories() {
1344                for (pattern, path) in font_entries {
1345                    if matches_filter(&pattern) {
1346                        let id = FontId::new();
1347                        cache.patterns.insert(pattern.clone(), id);
1348                        cache.metadata.insert(id, pattern.clone());
1349                        cache.disk_fonts.insert(id, path);
1350                        cache.index_pattern_tokens(&pattern, id);
1351                    }
1352                }
1353            }
1354        }
1355
1356        #[cfg(target_os = "windows")]
1357        {
1358            let system_root = std::env::var("SystemRoot")
1359                .or_else(|_| std::env::var("WINDIR"))
1360                .unwrap_or_else(|_| "C:\\Windows".to_string());
1361            
1362            let user_profile = std::env::var("USERPROFILE")
1363                .unwrap_or_else(|_| "C:\\Users\\Default".to_string());
1364            
1365            let font_dirs = vec![
1366                (None, format!("{}\\Fonts\\", system_root)),
1367                (None, format!("{}\\AppData\\Local\\Microsoft\\Windows\\Fonts\\", user_profile)),
1368            ];
1369
1370            let font_entries = FcScanDirectoriesInner(&font_dirs);
1371            for (pattern, path) in font_entries {
1372                if matches_filter(&pattern) {
1373                    let id = FontId::new();
1374                    cache.patterns.insert(pattern.clone(), id);
1375                    cache.metadata.insert(id, pattern.clone());
1376                    cache.disk_fonts.insert(id, path);
1377                    cache.index_pattern_tokens(&pattern, id);
1378                }
1379            }
1380        }
1381
1382        #[cfg(target_os = "macos")]
1383        {
1384            let font_dirs = vec![
1385                (None, "~/Library/Fonts".to_owned()),
1386                (None, "/System/Library/Fonts".to_owned()),
1387                (None, "/Library/Fonts".to_owned()),
1388                (None, "/System/Library/AssetsV2".to_owned()),
1389            ];
1390
1391            let font_entries = FcScanDirectoriesInner(&font_dirs);
1392            for (pattern, path) in font_entries {
1393                if matches_filter(&pattern) {
1394                    let id = FontId::new();
1395                    cache.patterns.insert(pattern.clone(), id);
1396                    cache.metadata.insert(id, pattern.clone());
1397                    cache.disk_fonts.insert(id, path);
1398                    cache.index_pattern_tokens(&pattern, id);
1399                }
1400            }
1401        }
1402
1403        cache
1404    }
1405    
1406    /// Check if a font ID is a memory font (preferred over disk fonts)
1407    pub fn is_memory_font(&self, id: &FontId) -> bool {
1408        self.memory_fonts.contains_key(id)
1409    }
1410
1411    /// Returns the list of fonts and font patterns
1412    pub fn list(&self) -> Vec<(&FcPattern, FontId)> {
1413        self.patterns
1414            .iter()
1415            .map(|(pattern, id)| (pattern, *id))
1416            .collect()
1417    }
1418
1419    /// Returns true if the cache contains no font patterns
1420    pub fn is_empty(&self) -> bool {
1421        self.patterns.is_empty()
1422    }
1423
1424    /// Returns the number of font patterns in the cache
1425    pub fn len(&self) -> usize {
1426        self.patterns.len()
1427    }
1428
1429    /// Queries a font from the in-memory cache, returns the first found font (early return)
1430    /// Memory fonts are always preferred over disk fonts with the same match quality.
1431    pub fn query(&self, pattern: &FcPattern, trace: &mut Vec<TraceMsg>) -> Option<FontMatch> {
1432        let mut matches = Vec::new();
1433
1434        for (stored_pattern, id) in &self.patterns {
1435            if Self::query_matches_internal(stored_pattern, pattern, trace) {
1436                let metadata = self.metadata.get(id).unwrap_or(stored_pattern);
1437                
1438                // Calculate Unicode compatibility score
1439                let unicode_compatibility = if pattern.unicode_ranges.is_empty() {
1440                    // No specific Unicode requirements, use general coverage
1441                    Self::calculate_unicode_coverage(&metadata.unicode_ranges) as i32
1442                } else {
1443                    // Calculate how well this font covers the requested Unicode ranges
1444                    Self::calculate_unicode_compatibility(&pattern.unicode_ranges, &metadata.unicode_ranges)
1445                };
1446                
1447                let style_score = Self::calculate_style_score(pattern, metadata);
1448                
1449                // Memory fonts get a bonus to prefer them over disk fonts
1450                let is_memory = self.memory_fonts.contains_key(id);
1451                
1452                matches.push((*id, unicode_compatibility, style_score, metadata.clone(), is_memory));
1453            }
1454        }
1455
1456        // Sort by: 1. Memory font (preferred), 2. Unicode compatibility, 3. Style score
1457        matches.sort_by(|a, b| {
1458            // Memory fonts first
1459            b.4.cmp(&a.4)
1460                .then_with(|| b.1.cmp(&a.1)) // Unicode compatibility (higher is better)
1461                .then_with(|| a.2.cmp(&b.2)) // Style score (lower is better)
1462        });
1463
1464        matches.first().map(|(id, _, _, metadata, _)| {
1465            FontMatch {
1466                id: *id,
1467                unicode_ranges: metadata.unicode_ranges.clone(),
1468                fallbacks: Vec::new(), // Fallbacks computed lazily via compute_fallbacks()
1469            }
1470        })
1471    }
1472
1473    /// Queries all fonts matching a pattern (internal use only)
1474    /// 
1475    /// Note: This function is now private. Use resolve_font_chain() to build a font fallback chain,
1476    /// then call FontFallbackChain::query_for_text() to resolve fonts for specific text.
1477    fn query_internal(&self, pattern: &FcPattern, trace: &mut Vec<TraceMsg>) -> Vec<FontMatch> {
1478        let mut matches = Vec::new();
1479
1480        for (stored_pattern, id) in &self.patterns {
1481            if Self::query_matches_internal(stored_pattern, pattern, trace) {
1482                let metadata = self.metadata.get(id).unwrap_or(stored_pattern);
1483                
1484                // Calculate Unicode compatibility score
1485                let unicode_compatibility = if pattern.unicode_ranges.is_empty() {
1486                    Self::calculate_unicode_coverage(&metadata.unicode_ranges) as i32
1487                } else {
1488                    Self::calculate_unicode_compatibility(&pattern.unicode_ranges, &metadata.unicode_ranges)
1489                };
1490                
1491                let style_score = Self::calculate_style_score(pattern, metadata);
1492                matches.push((*id, unicode_compatibility, style_score, metadata.clone()));
1493            }
1494        }
1495
1496        // Sort by style score (lowest first), THEN by Unicode compatibility (highest first)
1497        // Style matching (weight, italic, etc.) is now the primary criterion
1498        // Deterministic tiebreaker: prefer non-italic, then alphabetical by name
1499        matches.sort_by(|a, b| {
1500            a.2.cmp(&b.2) // Style score (lower is better)
1501                .then_with(|| b.1.cmp(&a.1)) // Unicode compatibility (higher is better)
1502                .then_with(|| a.3.italic.cmp(&b.3.italic)) // Prefer non-italic
1503                .then_with(|| a.3.name.cmp(&b.3.name)) // Alphabetical tiebreaker
1504        });
1505
1506        matches
1507            .into_iter()
1508            .map(|(id, _, _, metadata)| {
1509                FontMatch {
1510                    id,
1511                    unicode_ranges: metadata.unicode_ranges.clone(),
1512                    fallbacks: Vec::new(), // Fallbacks computed lazily via compute_fallbacks()
1513                }
1514            })
1515            .collect()
1516    }
1517
1518    /// Compute fallback fonts for a given font
1519    /// This is a lazy operation that can be expensive - only call when actually needed
1520    /// (e.g., for FFI or debugging, not needed for resolve_char)
1521    pub fn compute_fallbacks(
1522        &self,
1523        font_id: &FontId,
1524        trace: &mut Vec<TraceMsg>,
1525    ) -> Vec<FontMatchNoFallback> {
1526        // Get the pattern for this font
1527        let pattern = match self.metadata.get(font_id) {
1528            Some(p) => p,
1529            None => return Vec::new(),
1530        };
1531        
1532        self.compute_fallbacks_for_pattern(pattern, Some(font_id), trace)
1533    }
1534    
1535    fn compute_fallbacks_for_pattern(
1536        &self,
1537        pattern: &FcPattern,
1538        exclude_id: Option<&FontId>,
1539        _trace: &mut Vec<TraceMsg>,
1540    ) -> Vec<FontMatchNoFallback> {
1541        let mut candidates = Vec::new();
1542
1543        // Collect all potential fallbacks (excluding original pattern)
1544        for (stored_pattern, id) in &self.patterns {
1545            // Skip if this is the original font
1546            if exclude_id.is_some() && exclude_id.unwrap() == id {
1547                continue;
1548            }
1549
1550            // Check if this font supports any of the unicode ranges
1551            if !stored_pattern.unicode_ranges.is_empty() && !pattern.unicode_ranges.is_empty() {
1552                // Calculate Unicode compatibility
1553                let unicode_compatibility = Self::calculate_unicode_compatibility(
1554                    &pattern.unicode_ranges,
1555                    &stored_pattern.unicode_ranges
1556                );
1557                
1558                // Only include if there's actual overlap
1559                if unicode_compatibility > 0 {
1560                    let style_score = Self::calculate_style_score(pattern, stored_pattern);
1561                    candidates.push((
1562                        FontMatchNoFallback {
1563                            id: *id,
1564                            unicode_ranges: stored_pattern.unicode_ranges.clone(),
1565                        },
1566                        unicode_compatibility,
1567                        style_score,
1568                        stored_pattern.clone(),
1569                    ));
1570                }
1571            } else if pattern.unicode_ranges.is_empty() && !stored_pattern.unicode_ranges.is_empty() {
1572                // No specific Unicode requirements, use general coverage
1573                let coverage = Self::calculate_unicode_coverage(&stored_pattern.unicode_ranges) as i32;
1574                let style_score = Self::calculate_style_score(pattern, stored_pattern);
1575                candidates.push((
1576                    FontMatchNoFallback {
1577                        id: *id,
1578                        unicode_ranges: stored_pattern.unicode_ranges.clone(),
1579                    },
1580                    coverage,
1581                    style_score,
1582                    stored_pattern.clone(),
1583                ));
1584            }
1585        }
1586
1587        // Sort by Unicode compatibility (highest first), THEN by style score (lowest first)
1588        candidates.sort_by(|a, b| {
1589            b.1.cmp(&a.1)
1590                .then_with(|| a.2.cmp(&b.2))
1591        });
1592
1593        // Deduplicate by keeping only the best match per unique unicode range
1594        let mut seen_ranges = Vec::new();
1595        let mut deduplicated = Vec::new();
1596
1597        for (id, _, _, pattern) in candidates {
1598            let mut is_new_range = false;
1599
1600            for range in &pattern.unicode_ranges {
1601                if !seen_ranges.iter().any(|r: &UnicodeRange| r.overlaps(range)) {
1602                    seen_ranges.push(*range);
1603                    is_new_range = true;
1604                }
1605            }
1606
1607            if is_new_range {
1608                deduplicated.push(id);
1609            }
1610        }
1611
1612        deduplicated
1613    }
1614
1615    /// Get in-memory font data
1616    pub fn get_memory_font(&self, id: &FontId) -> Option<&FcFont> {
1617        self.memory_fonts.get(id)
1618    }
1619
1620    /// Check if a pattern matches the query, with detailed tracing
1621    pub fn query_matches_internal(
1622        k: &FcPattern,
1623        pattern: &FcPattern,
1624        trace: &mut Vec<TraceMsg>,
1625    ) -> bool {
1626        // Check name - substring match
1627        if let Some(ref name) = pattern.name {
1628            let matches = k
1629                .name
1630                .as_ref()
1631                .map_or(false, |k_name| k_name.contains(name));
1632
1633            if !matches {
1634                trace.push(TraceMsg {
1635                    level: TraceLevel::Info,
1636                    path: k
1637                        .name
1638                        .as_ref()
1639                        .map_or_else(|| "<unknown>".to_string(), Clone::clone),
1640                    reason: MatchReason::NameMismatch {
1641                        requested: pattern.name.clone(),
1642                        found: k.name.clone(),
1643                    },
1644                });
1645                return false;
1646            }
1647        }
1648
1649        // Check family - substring match
1650        if let Some(ref family) = pattern.family {
1651            let matches = k
1652                .family
1653                .as_ref()
1654                .map_or(false, |k_family| k_family.contains(family));
1655
1656            if !matches {
1657                trace.push(TraceMsg {
1658                    level: TraceLevel::Info,
1659                    path: k
1660                        .name
1661                        .as_ref()
1662                        .map_or_else(|| "<unknown>".to_string(), Clone::clone),
1663                    reason: MatchReason::FamilyMismatch {
1664                        requested: pattern.family.clone(),
1665                        found: k.family.clone(),
1666                    },
1667                });
1668                return false;
1669            }
1670        }
1671
1672        // Check style properties
1673        let style_properties = [
1674            (
1675                "italic",
1676                pattern.italic.needs_to_match(),
1677                pattern.italic.matches(&k.italic),
1678            ),
1679            (
1680                "oblique",
1681                pattern.oblique.needs_to_match(),
1682                pattern.oblique.matches(&k.oblique),
1683            ),
1684            (
1685                "bold",
1686                pattern.bold.needs_to_match(),
1687                pattern.bold.matches(&k.bold),
1688            ),
1689            (
1690                "monospace",
1691                pattern.monospace.needs_to_match(),
1692                pattern.monospace.matches(&k.monospace),
1693            ),
1694            (
1695                "condensed",
1696                pattern.condensed.needs_to_match(),
1697                pattern.condensed.matches(&k.condensed),
1698            ),
1699        ];
1700
1701        for (property_name, needs_to_match, matches) in style_properties {
1702            if needs_to_match && !matches {
1703                let (requested, found) = match property_name {
1704                    "italic" => (format!("{:?}", pattern.italic), format!("{:?}", k.italic)),
1705                    "oblique" => (format!("{:?}", pattern.oblique), format!("{:?}", k.oblique)),
1706                    "bold" => (format!("{:?}", pattern.bold), format!("{:?}", k.bold)),
1707                    "monospace" => (
1708                        format!("{:?}", pattern.monospace),
1709                        format!("{:?}", k.monospace),
1710                    ),
1711                    "condensed" => (
1712                        format!("{:?}", pattern.condensed),
1713                        format!("{:?}", k.condensed),
1714                    ),
1715                    _ => (String::new(), String::new()),
1716                };
1717
1718                trace.push(TraceMsg {
1719                    level: TraceLevel::Info,
1720                    path: k
1721                        .name
1722                        .as_ref()
1723                        .map_or_else(|| "<unknown>".to_string(), |s| s.clone()),
1724                    reason: MatchReason::StyleMismatch {
1725                        property: property_name,
1726                        requested,
1727                        found,
1728                    },
1729                });
1730                return false;
1731            }
1732        }
1733
1734        // Check weight - hard filter if non-normal weight is requested
1735        if pattern.weight != FcWeight::Normal && pattern.weight != k.weight {
1736            trace.push(TraceMsg {
1737                level: TraceLevel::Info,
1738                path: k
1739                    .name
1740                    .as_ref()
1741                    .map_or_else(|| "<unknown>".to_string(), |s| s.clone()),
1742                reason: MatchReason::WeightMismatch {
1743                    requested: pattern.weight,
1744                    found: k.weight,
1745                },
1746            });
1747            return false;
1748        }
1749
1750        // Check stretch - hard filter if non-normal stretch is requested
1751        if pattern.stretch != FcStretch::Normal && pattern.stretch != k.stretch {
1752            trace.push(TraceMsg {
1753                level: TraceLevel::Info,
1754                path: k
1755                    .name
1756                    .as_ref()
1757                    .map_or_else(|| "<unknown>".to_string(), |s| s.clone()),
1758                reason: MatchReason::StretchMismatch {
1759                    requested: pattern.stretch,
1760                    found: k.stretch,
1761                },
1762            });
1763            return false;
1764        }
1765
1766        // Check unicode ranges if specified
1767        if !pattern.unicode_ranges.is_empty() {
1768            let mut has_overlap = false;
1769
1770            for p_range in &pattern.unicode_ranges {
1771                for k_range in &k.unicode_ranges {
1772                    if p_range.overlaps(k_range) {
1773                        has_overlap = true;
1774                        break;
1775                    }
1776                }
1777                if has_overlap {
1778                    break;
1779                }
1780            }
1781
1782            if !has_overlap {
1783                trace.push(TraceMsg {
1784                    level: TraceLevel::Info,
1785                    path: k
1786                        .name
1787                        .as_ref()
1788                        .map_or_else(|| "<unknown>".to_string(), |s| s.clone()),
1789                    reason: MatchReason::UnicodeRangeMismatch {
1790                        character: '\0', // No specific character to report
1791                        ranges: k.unicode_ranges.clone(),
1792                    },
1793                });
1794                return false;
1795            }
1796        }
1797
1798        true
1799    }
1800    
1801    /// Resolve a complete font fallback chain for a CSS font-family stack
1802    /// This is the main entry point for font resolution with caching
1803    /// Automatically expands generic CSS families (serif, sans-serif, monospace) to OS-specific fonts
1804    /// 
1805    /// # Arguments
1806    /// * `font_families` - CSS font-family stack (e.g., ["Arial", "sans-serif"])
1807    /// * `text` - The text to render (used to extract Unicode ranges)
1808    /// * `weight` - Font weight
1809    /// * `italic` - Italic style requirement
1810    /// * `oblique` - Oblique style requirement
1811    /// * `trace` - Debug trace messages
1812    /// 
1813    /// # Returns
1814    /// A complete font fallback chain with CSS fallbacks and Unicode fallbacks
1815    /// 
1816    /// # Example
1817    /// ```no_run
1818    /// # use rust_fontconfig::{FcFontCache, FcWeight, PatternMatch};
1819    /// let cache = FcFontCache::build();
1820    /// let families = vec!["Arial".to_string(), "sans-serif".to_string()];
1821    /// let chain = cache.resolve_font_chain(&families, FcWeight::Normal, 
1822    ///                                       PatternMatch::DontCare, PatternMatch::DontCare, 
1823    ///                                       &mut Vec::new());
1824    /// // On macOS: families expanded to ["Arial", "San Francisco", "Helvetica Neue", "Lucida Grande"]
1825    /// ```
1826    #[cfg(feature = "std")]
1827    pub fn resolve_font_chain(
1828        &self,
1829        font_families: &[String],
1830        weight: FcWeight,
1831        italic: PatternMatch,
1832        oblique: PatternMatch,
1833        trace: &mut Vec<TraceMsg>,
1834    ) -> FontFallbackChain {
1835        self.resolve_font_chain_with_os(font_families, weight, italic, oblique, trace, OperatingSystem::current())
1836    }
1837    
1838    /// Resolve font chain with explicit OS specification (useful for testing)
1839    #[cfg(feature = "std")]
1840    pub fn resolve_font_chain_with_os(
1841        &self,
1842        font_families: &[String],
1843        weight: FcWeight,
1844        italic: PatternMatch,
1845        oblique: PatternMatch,
1846        trace: &mut Vec<TraceMsg>,
1847        os: OperatingSystem,
1848    ) -> FontFallbackChain {
1849        // Check cache FIRST - key uses original (unexpanded) families
1850        // This ensures all text nodes with same CSS properties share one chain
1851        let cache_key = FontChainCacheKey {
1852            font_families: font_families.to_vec(),  // Use ORIGINAL families, not expanded
1853            weight,
1854            italic,
1855            oblique,
1856        };
1857        
1858        if let Ok(cache) = self.chain_cache.lock() {
1859            if let Some(cached) = cache.get(&cache_key) {
1860                return cached.clone();
1861            }
1862        }
1863        
1864        // Expand generic CSS families to OS-specific fonts (no unicode ranges needed anymore)
1865        let expanded_families = expand_font_families(font_families, os, &[]);
1866        
1867        // Build the chain
1868        let chain = self.resolve_font_chain_uncached(
1869            &expanded_families,
1870            weight,
1871            italic,
1872            oblique,
1873            trace,
1874        );
1875        
1876        // Cache the result
1877        if let Ok(mut cache) = self.chain_cache.lock() {
1878            cache.insert(cache_key, chain.clone());
1879        }
1880        
1881        chain
1882    }
1883    
1884    /// Internal implementation without caching
1885    /// 
1886    /// Note: This function no longer takes text/unicode_ranges as input.
1887    /// Instead, the returned FontFallbackChain has a query_for_text() method
1888    /// that can be called to resolve which fonts to use for specific text.
1889    #[cfg(feature = "std")]
1890    fn resolve_font_chain_uncached(
1891        &self,
1892        font_families: &[String],
1893        weight: FcWeight,
1894        italic: PatternMatch,
1895        oblique: PatternMatch,
1896        trace: &mut Vec<TraceMsg>,
1897    ) -> FontFallbackChain {
1898        let mut css_fallbacks = Vec::new();
1899        
1900        // Resolve each CSS font-family to its system fallbacks
1901        for (_i, family) in font_families.iter().enumerate() {
1902            // Check if this is a generic font family
1903            let (pattern, is_generic) = if Self::is_generic_family(family) {
1904                // For generic families, don't filter by name, use font properties instead
1905                let pattern = match family.as_str() {
1906                    "sans-serif" => FcPattern {
1907                        name: None,
1908                        weight,
1909                        italic,
1910                        oblique,
1911                        monospace: PatternMatch::False,
1912                        unicode_ranges: Vec::new(),
1913                        ..Default::default()
1914                    },
1915                    "serif" => FcPattern {
1916                        name: None,
1917                        weight,
1918                        italic,
1919                        oblique,
1920                        monospace: PatternMatch::False,
1921                        unicode_ranges: Vec::new(),
1922                        ..Default::default()
1923                    },
1924                    "monospace" => FcPattern {
1925                        name: None,
1926                        weight,
1927                        italic,
1928                        oblique,
1929                        monospace: PatternMatch::True,
1930                        unicode_ranges: Vec::new(),
1931                        ..Default::default()
1932                    },
1933                    _ => FcPattern {
1934                        name: None,
1935                        weight,
1936                        italic,
1937                        oblique,
1938                        unicode_ranges: Vec::new(),
1939                        ..Default::default()
1940                    },
1941                };
1942                (pattern, true)
1943            } else {
1944                // Specific font family name
1945                let pattern = FcPattern {
1946                    name: Some(family.clone()),
1947                    weight,
1948                    italic,
1949                    oblique,
1950                    unicode_ranges: Vec::new(),
1951                    ..Default::default()
1952                };
1953                (pattern, false)
1954            };
1955            
1956            // Use fuzzy matching for specific fonts (fast token-based lookup)
1957            // For generic families, use query (slower but necessary for property matching)
1958            let mut matches = if is_generic {
1959                // Generic families need full pattern matching
1960                self.query_internal(&pattern, trace)
1961            } else {
1962                // Specific font names: use fast token-based fuzzy matching
1963                self.fuzzy_query_by_name(family, weight, italic, oblique, &[], trace)
1964            };
1965            
1966            // For generic families, limit to top 5 fonts to avoid too many matches
1967            if is_generic && matches.len() > 5 {
1968                matches.truncate(5);
1969            }
1970            
1971            // Always add the CSS fallback group to preserve CSS ordering
1972            // even if no fonts were found for this family
1973            css_fallbacks.push(CssFallbackGroup {
1974                css_name: family.clone(),
1975                fonts: matches,
1976            });
1977        }
1978        
1979        // Unicode fallbacks are now resolved lazily in query_for_text()
1980        // This avoids the expensive unicode coverage check during chain building
1981        FontFallbackChain {
1982            css_fallbacks,
1983            unicode_fallbacks: Vec::new(), // Will be populated on-demand
1984            original_stack: font_families.to_vec(),
1985        }
1986    }
1987    
1988    /// Extract Unicode ranges from text
1989    #[allow(dead_code)]
1990    fn extract_unicode_ranges(text: &str) -> Vec<UnicodeRange> {
1991        let mut chars: Vec<char> = text.chars().collect();
1992        chars.sort_unstable();
1993        chars.dedup();
1994        
1995        if chars.is_empty() {
1996            return Vec::new();
1997        }
1998        
1999        let mut ranges = Vec::new();
2000        let mut range_start = chars[0] as u32;
2001        let mut range_end = range_start;
2002        
2003        for &c in &chars[1..] {
2004            let codepoint = c as u32;
2005            if codepoint == range_end + 1 {
2006                range_end = codepoint;
2007            } else {
2008                ranges.push(UnicodeRange { start: range_start, end: range_end });
2009                range_start = codepoint;
2010                range_end = codepoint;
2011            }
2012        }
2013        
2014        ranges.push(UnicodeRange { start: range_start, end: range_end });
2015        ranges
2016    }
2017    
2018    /// Check if a font family name is a generic CSS family
2019    #[cfg(feature = "std")]
2020    fn is_generic_family(family: &str) -> bool {
2021        matches!(
2022            family.to_lowercase().as_str(),
2023            "serif" | "sans-serif" | "monospace" | "cursive" | "fantasy" | "system-ui"
2024        )
2025    }
2026    
2027    /// Fuzzy query for fonts by name when exact match fails
2028    /// Uses intelligent token-based matching with inverted index for speed:
2029    /// 1. Break name into tokens (e.g., "NotoSansJP" -> ["noto", "sans", "jp"])
2030    /// 2. Use token_index to find candidate fonts via BTreeSet intersection
2031    /// 3. Score only the candidate fonts (instead of all 800+ patterns)
2032    /// 4. Prioritize fonts matching more tokens + Unicode coverage
2033    #[cfg(feature = "std")]
2034    fn fuzzy_query_by_name(
2035        &self,
2036        requested_name: &str,
2037        weight: FcWeight,
2038        italic: PatternMatch,
2039        oblique: PatternMatch,
2040        unicode_ranges: &[UnicodeRange],
2041        _trace: &mut Vec<TraceMsg>,
2042    ) -> Vec<FontMatch> {
2043        // Extract tokens from the requested name (e.g., "NotoSansJP" -> ["noto", "sans", "jp"])
2044        let tokens = Self::extract_font_name_tokens(requested_name);
2045        
2046        if tokens.is_empty() {
2047            return Vec::new();
2048        }
2049        
2050        // Convert tokens to lowercase for case-insensitive lookup
2051        let tokens_lower: Vec<String> = tokens.iter().map(|t| t.to_lowercase()).collect();
2052        
2053        // Progressive token matching strategy:
2054        // Start with first token, then progressively narrow down with each additional token
2055        // If adding a token results in 0 matches, use the previous (broader) set
2056        // Example: ["Noto"] -> 10 fonts, ["Noto","Sans"] -> 2 fonts, ["Noto","Sans","JP"] -> 0 fonts => use 2 fonts
2057        
2058        // Start with the first token
2059        let first_token = &tokens_lower[0];
2060        let mut candidate_ids = match self.token_index.get(first_token) {
2061            Some(ids) if !ids.is_empty() => ids.clone(),
2062            _ => {
2063                // First token not found - no fonts match, quit immediately
2064                return Vec::new();
2065            }
2066        };
2067        
2068        // Progressively narrow down with each additional token
2069        for token in &tokens_lower[1..] {
2070            if let Some(token_ids) = self.token_index.get(token) {
2071                // Calculate intersection
2072                let intersection: alloc::collections::BTreeSet<FontId> = 
2073                    candidate_ids.intersection(token_ids).copied().collect();
2074                
2075                if intersection.is_empty() {
2076                    // Adding this token results in 0 matches - keep previous set and stop
2077                    break;
2078                } else {
2079                    // Successfully narrowed down - use intersection
2080                    candidate_ids = intersection;
2081                }
2082            } else {
2083                // Token not in index - keep current set and stop
2084                break;
2085            }
2086        }
2087        
2088        // Now score only the candidate fonts (HUGE speedup!)
2089        let mut candidates = Vec::new();
2090        
2091        for id in candidate_ids {
2092            let pattern = match self.metadata.get(&id) {
2093                Some(p) => p,
2094                None => continue,
2095            };
2096            
2097            // Get pre-tokenized font name (already lowercase)
2098            let font_tokens_lower = match self.font_tokens.get(&id) {
2099                Some(tokens) => tokens,
2100                None => continue,
2101            };
2102            
2103            if font_tokens_lower.is_empty() {
2104                continue;
2105            }
2106            
2107            // Calculate token match score (how many requested tokens appear in font name)
2108            // Both tokens_lower and font_tokens_lower are already lowercase, so direct comparison
2109            let token_matches = tokens_lower.iter()
2110                .filter(|req_token| {
2111                    font_tokens_lower.iter().any(|font_token| {
2112                        // Both already lowercase - just check if font token contains request token
2113                        font_token.contains(req_token.as_str())
2114                    })
2115                })
2116                .count();
2117            
2118            // Skip if no tokens match (shouldn't happen due to index, but safety check)
2119            if token_matches == 0 {
2120                continue;
2121            }
2122            
2123            // Calculate token similarity score (0-100)
2124            let token_similarity = (token_matches * 100 / tokens.len()) as i32;
2125            
2126            // Calculate Unicode range similarity
2127            let unicode_similarity = if !unicode_ranges.is_empty() && !pattern.unicode_ranges.is_empty() {
2128                Self::calculate_unicode_compatibility(unicode_ranges, &pattern.unicode_ranges)
2129            } else {
2130                0
2131            };
2132            
2133            // CRITICAL: If we have Unicode requirements, ONLY accept fonts that cover them
2134            // A font with great name match but no Unicode coverage is useless
2135            if !unicode_ranges.is_empty() && unicode_similarity == 0 {
2136                continue;
2137            }
2138            
2139            let style_score = Self::calculate_style_score(&FcPattern {
2140                weight,
2141                italic,
2142                oblique,
2143                ..Default::default()
2144            }, pattern);
2145            
2146            candidates.push((
2147                id,
2148                token_similarity,
2149                unicode_similarity,
2150                style_score,
2151                pattern.clone(),
2152            ));
2153        }
2154        
2155        // Sort by:
2156        // 1. Token matches (more matches = better)
2157        // 2. Unicode compatibility (if ranges provided)
2158        // 3. Style score (lower is better)
2159        // 4. Deterministic tiebreaker: prefer non-italic, then by font name
2160        candidates.sort_by(|a, b| {
2161            if !unicode_ranges.is_empty() {
2162                // When we have Unicode requirements, prioritize coverage
2163                b.1.cmp(&a.1) // Token similarity (higher is better) - PRIMARY
2164                    .then_with(|| b.2.cmp(&a.2)) // Unicode similarity (higher is better) - SECONDARY
2165                    .then_with(|| a.3.cmp(&b.3)) // Style score (lower is better) - TERTIARY
2166                    .then_with(|| a.4.italic.cmp(&b.4.italic)) // Prefer non-italic (False < True)
2167                    .then_with(|| a.4.name.cmp(&b.4.name)) // Alphabetical by name
2168            } else {
2169                // No Unicode requirements, token similarity is primary
2170                b.1.cmp(&a.1) // Token similarity (higher is better)
2171                    .then_with(|| a.3.cmp(&b.3)) // Style score (lower is better)
2172                    .then_with(|| a.4.italic.cmp(&b.4.italic)) // Prefer non-italic (False < True)
2173                    .then_with(|| a.4.name.cmp(&b.4.name)) // Alphabetical by name
2174            }
2175        });
2176        
2177        // Take top 5 matches
2178        candidates.truncate(5);
2179        
2180        // Convert to FontMatch
2181        candidates
2182            .into_iter()
2183            .map(|(id, _token_sim, _unicode_sim, _style, pattern)| {
2184                FontMatch {
2185                    id,
2186                    unicode_ranges: pattern.unicode_ranges.clone(),
2187                    fallbacks: Vec::new(), // Fallbacks computed lazily via compute_fallbacks()
2188                }
2189            })
2190            .collect()
2191    }
2192    
2193    /// Extract tokens from a font name
2194    /// E.g., "NotoSansJP" -> ["Noto", "Sans", "JP"]
2195    /// E.g., "Noto Sans CJK JP" -> ["Noto", "Sans", "CJK", "JP"]
2196    pub fn extract_font_name_tokens(name: &str) -> Vec<String> {
2197        let mut tokens = Vec::new();
2198        let mut current_token = String::new();
2199        let mut last_was_lower = false;
2200        
2201        for c in name.chars() {
2202            if c.is_whitespace() || c == '-' || c == '_' {
2203                // Word separator
2204                if !current_token.is_empty() {
2205                    tokens.push(current_token.clone());
2206                    current_token.clear();
2207                }
2208                last_was_lower = false;
2209            } else if c.is_uppercase() && last_was_lower && !current_token.is_empty() {
2210                // CamelCase boundary (e.g., "Noto" | "Sans")
2211                tokens.push(current_token.clone());
2212                current_token.clear();
2213                current_token.push(c);
2214                last_was_lower = false;
2215            } else {
2216                current_token.push(c);
2217                last_was_lower = c.is_lowercase();
2218            }
2219        }
2220        
2221        if !current_token.is_empty() {
2222            tokens.push(current_token);
2223        }
2224        
2225        tokens
2226    }
2227    
2228    /// Normalize font name for comparison (remove spaces, lowercase, keep only ASCII alphanumeric)
2229    /// This ensures we only compare Latin-script names, ignoring localized names
2230    #[allow(dead_code)]
2231    fn normalize_font_name(name: &str) -> String {
2232        name.chars()
2233            .filter(|c| c.is_ascii_alphanumeric())
2234            .map(|c| c.to_ascii_lowercase())
2235            .collect()
2236    }
2237    
2238    /// Calculate Levenshtein distance between two strings
2239    #[allow(dead_code)]
2240    fn levenshtein_distance(s1: &str, s2: &str) -> usize {
2241        let len1 = s1.chars().count();
2242        let len2 = s2.chars().count();
2243        
2244        if len1 == 0 {
2245            return len2;
2246        }
2247        if len2 == 0 {
2248            return len1;
2249        }
2250        
2251        let mut prev_row: Vec<usize> = (0..=len2).collect();
2252        let mut curr_row = vec![0; len2 + 1];
2253        
2254        for (i, c1) in s1.chars().enumerate() {
2255            curr_row[0] = i + 1;
2256            
2257            for (j, c2) in s2.chars().enumerate() {
2258                let cost = if c1 == c2 { 0 } else { 1 };
2259                curr_row[j + 1] = (curr_row[j] + 1)
2260                    .min(prev_row[j + 1] + 1)
2261                    .min(prev_row[j] + cost);
2262            }
2263            
2264            core::mem::swap(&mut prev_row, &mut curr_row);
2265        }
2266        
2267        prev_row[len2]
2268    }
2269    
2270    /// Find fonts to cover missing Unicode ranges
2271    /// Uses intelligent matching: prefers fonts with similar names to existing ones
2272    /// Early quits once all Unicode ranges are covered for performance
2273    #[allow(dead_code)]
2274    fn find_unicode_fallbacks(
2275        &self,
2276        unicode_ranges: &[UnicodeRange],
2277        covered_chars: &[bool],
2278        existing_groups: &[CssFallbackGroup],
2279        weight: FcWeight,
2280        italic: PatternMatch,
2281        oblique: PatternMatch,
2282        trace: &mut Vec<TraceMsg>,
2283    ) -> Vec<FontMatch> {
2284        // Extract uncovered ranges
2285        let mut uncovered_ranges = Vec::new();
2286        for (i, &covered) in covered_chars.iter().enumerate() {
2287            if !covered && i < unicode_ranges.len() {
2288                uncovered_ranges.push(unicode_ranges[i].clone());
2289            }
2290        }
2291        
2292        if uncovered_ranges.is_empty() {
2293            return Vec::new();
2294        }
2295        
2296        // Query for fonts that cover these ranges
2297        let pattern = FcPattern {
2298            name: None, // Wildcard - match any font
2299            weight,
2300            italic,
2301            oblique,
2302            unicode_ranges: uncovered_ranges.clone(),
2303            ..Default::default()
2304        };
2305        
2306        let mut candidates = self.query_internal(&pattern, trace);
2307        
2308        // Intelligent sorting: prefer fonts with similar names to existing ones
2309        // Extract font family prefixes from existing fonts (e.g., "Noto Sans" from "Noto Sans JP")
2310        let existing_prefixes: Vec<String> = existing_groups
2311            .iter()
2312            .flat_map(|group| {
2313                group.fonts.iter().filter_map(|font| {
2314                    self.get_metadata_by_id(&font.id)
2315                        .and_then(|meta| meta.family.clone())
2316                        .and_then(|family| {
2317                            // Extract prefix (e.g., "Noto Sans" from "Noto Sans JP")
2318                            family.split_whitespace()
2319                                .take(2)
2320                                .collect::<Vec<_>>()
2321                                .join(" ")
2322                                .into()
2323                        })
2324                })
2325            })
2326            .collect();
2327        
2328        // Sort candidates by:
2329        // 1. Name similarity to existing fonts (highest priority)
2330        // 2. Unicode coverage (secondary)
2331        candidates.sort_by(|a, b| {
2332            let a_meta = self.get_metadata_by_id(&a.id);
2333            let b_meta = self.get_metadata_by_id(&b.id);
2334            
2335            let a_score = Self::calculate_font_similarity_score(a_meta, &existing_prefixes);
2336            let b_score = Self::calculate_font_similarity_score(b_meta, &existing_prefixes);
2337            
2338            b_score.cmp(&a_score) // Higher score = better match
2339                .then_with(|| {
2340                    let a_coverage = Self::calculate_unicode_compatibility(&uncovered_ranges, &a.unicode_ranges);
2341                    let b_coverage = Self::calculate_unicode_compatibility(&uncovered_ranges, &b.unicode_ranges);
2342                    b_coverage.cmp(&a_coverage)
2343                })
2344        });
2345        
2346        // Early quit optimization: only take fonts until all ranges are covered
2347        let mut result = Vec::new();
2348        let mut remaining_uncovered: Vec<bool> = vec![true; uncovered_ranges.len()];
2349        
2350        for candidate in candidates {
2351            // Check which ranges this font covers
2352            let mut covers_new_range = false;
2353            
2354            for (i, range) in uncovered_ranges.iter().enumerate() {
2355                if remaining_uncovered[i] {
2356                    // Check if this font covers this range
2357                    for font_range in &candidate.unicode_ranges {
2358                        if font_range.overlaps(range) {
2359                            remaining_uncovered[i] = false;
2360                            covers_new_range = true;
2361                            break;
2362                        }
2363                    }
2364                }
2365            }
2366            
2367            // Only add fonts that cover at least one new range
2368            if covers_new_range {
2369                result.push(candidate);
2370                
2371                // Early quit: if all ranges are covered, stop
2372                if remaining_uncovered.iter().all(|&uncovered| !uncovered) {
2373                    break;
2374                }
2375            }
2376        }
2377        
2378        result
2379    }
2380    
2381    /// Calculate similarity score between a font and existing font prefixes
2382    /// Higher score = more similar
2383    #[allow(dead_code)]
2384    fn calculate_font_similarity_score(
2385        font_meta: Option<&FcPattern>,
2386        existing_prefixes: &[String],
2387    ) -> i32 {
2388        let Some(meta) = font_meta else { return 0; };
2389        let Some(family) = &meta.family else { return 0; };
2390        
2391        // Check if this font's family matches any existing prefix
2392        for prefix in existing_prefixes {
2393            if family.starts_with(prefix) {
2394                return 100; // Strong match
2395            }
2396            if family.contains(prefix) {
2397                return 50; // Partial match
2398            }
2399        }
2400        
2401        0 // No match
2402    }
2403    
2404    /// Find fallback fonts for a given pattern
2405    // Helper to calculate total unicode coverage
2406    pub fn calculate_unicode_coverage(ranges: &[UnicodeRange]) -> u64 {
2407        ranges
2408            .iter()
2409            .map(|range| (range.end - range.start + 1) as u64)
2410            .sum()
2411    }
2412
2413    /// Calculate how well a font's Unicode ranges cover the requested ranges
2414    /// Returns a compatibility score (higher is better, 0 means no overlap)
2415    pub fn calculate_unicode_compatibility(
2416        requested: &[UnicodeRange],
2417        available: &[UnicodeRange],
2418    ) -> i32 {
2419        if requested.is_empty() {
2420            // No specific requirements, return total coverage
2421            return Self::calculate_unicode_coverage(available) as i32;
2422        }
2423        
2424        let mut total_coverage = 0u32;
2425        
2426        for req_range in requested {
2427            for avail_range in available {
2428                // Calculate overlap between requested and available ranges
2429                let overlap_start = req_range.start.max(avail_range.start);
2430                let overlap_end = req_range.end.min(avail_range.end);
2431                
2432                if overlap_start <= overlap_end {
2433                    // There is overlap
2434                    let overlap_size = overlap_end - overlap_start + 1;
2435                    total_coverage += overlap_size;
2436                }
2437            }
2438        }
2439        
2440        total_coverage as i32
2441    }
2442
2443    pub fn calculate_style_score(original: &FcPattern, candidate: &FcPattern) -> i32 {
2444
2445        let mut score = 0_i32;
2446
2447        // Weight calculation with special handling for bold property
2448        if (original.bold == PatternMatch::True && candidate.weight == FcWeight::Bold)
2449            || (original.bold == PatternMatch::False && candidate.weight != FcWeight::Bold)
2450        {
2451            // No weight penalty when bold is requested and font has Bold weight
2452            // No weight penalty when non-bold is requested and font has non-Bold weight
2453        } else {
2454            // Apply normal weight difference penalty
2455            let weight_diff = (original.weight as i32 - candidate.weight as i32).abs();
2456            score += weight_diff as i32;
2457        }
2458
2459        // Exact weight match bonus: reward fonts whose weight matches the request exactly,
2460        // with an extra bonus when both are Normal (the most common case for body text)
2461        if original.weight == candidate.weight {
2462            score -= 15;
2463            if original.weight == FcWeight::Normal {
2464                score -= 10; // Extra bonus for Normal-Normal match
2465            }
2466        }
2467
2468        // Stretch calculation with special handling for condensed property
2469        if (original.condensed == PatternMatch::True && candidate.stretch.is_condensed())
2470            || (original.condensed == PatternMatch::False && !candidate.stretch.is_condensed())
2471        {
2472            // No stretch penalty when condensed is requested and font has condensed stretch
2473            // No stretch penalty when non-condensed is requested and font has non-condensed stretch
2474        } else {
2475            // Apply normal stretch difference penalty
2476            let stretch_diff = (original.stretch as i32 - candidate.stretch as i32).abs();
2477            score += (stretch_diff * 100) as i32;
2478        }
2479
2480        // Handle style properties with standard penalties and bonuses
2481        let style_props = [
2482            (original.italic, candidate.italic, 300, 150),
2483            (original.oblique, candidate.oblique, 200, 100),
2484            (original.bold, candidate.bold, 300, 150),
2485            (original.monospace, candidate.monospace, 100, 50),
2486            (original.condensed, candidate.condensed, 100, 50),
2487        ];
2488
2489        for (orig, cand, mismatch_penalty, dontcare_penalty) in style_props {
2490            if orig.needs_to_match() {
2491                if orig == PatternMatch::False && cand == PatternMatch::DontCare {
2492                    // Requesting non-italic but font doesn't declare: small penalty
2493                    // (less than a full mismatch but more than a perfect match)
2494                    score += dontcare_penalty / 2;
2495                } else if !orig.matches(&cand) {
2496                    if cand == PatternMatch::DontCare {
2497                        score += dontcare_penalty;
2498                    } else {
2499                        score += mismatch_penalty;
2500                    }
2501                } else if orig == PatternMatch::True && cand == PatternMatch::True {
2502                    // Give bonus for exact True match
2503                    score -= 20;
2504                } else if orig == PatternMatch::False && cand == PatternMatch::False {
2505                    // Give bonus for exact False match (prefer explicitly non-italic
2506                    // over fonts with unknown/DontCare italic status)
2507                    score -= 20;
2508                }
2509            } else {
2510                // orig == DontCare: prefer "normal" fonts over styled ones.
2511                // When the caller doesn't specify italic/bold/etc., a font
2512                // that IS italic/bold should score slightly worse than one
2513                // that isn't, so Regular is chosen over Italic by default.
2514                if cand == PatternMatch::True {
2515                    score += dontcare_penalty / 3;
2516                }
2517            }
2518        }
2519
2520        // ── Name-based "base font" detection ──
2521        // The shorter the font name relative to its family, the more "basic" the
2522        // variant.  E.g. "System Font" (the base) should score better than
2523        // "System Font Regular Italic" (a variant) when the user hasn't
2524        // explicitly requested italic.
2525        if let (Some(name), Some(family)) = (&candidate.name, &candidate.family) {
2526            let name_lower = name.to_lowercase();
2527            let family_lower = family.to_lowercase();
2528
2529            // Strip the family prefix from the name to get the "extra" part
2530            let extra = if name_lower.starts_with(&family_lower) {
2531                name_lower[family_lower.len()..].to_string()
2532            } else {
2533                String::new()
2534            };
2535
2536            // Strip common neutral descriptors that don't indicate a style variant
2537            let stripped = extra
2538                .replace("regular", "")
2539                .replace("normal", "")
2540                .replace("book", "")
2541                .replace("roman", "");
2542            let stripped = stripped.trim();
2543
2544            if stripped.is_empty() {
2545                // This is a "base font" – name is just the family (± "Regular")
2546                score -= 50;
2547            } else {
2548                // Name has extra style descriptors – add a penalty per extra word
2549                let extra_words = stripped.split_whitespace().count();
2550                score += (extra_words as i32) * 25;
2551            }
2552        }
2553
2554        // ── Subfamily "Regular" bonus ──
2555        // Fonts whose OpenType subfamily is exactly "Regular" are the canonical
2556        // base variant and should be strongly preferred.
2557        if let Some(ref subfamily) = candidate.metadata.font_subfamily {
2558            let sf_lower = subfamily.to_lowercase();
2559            if sf_lower == "regular" {
2560                score -= 30;
2561            }
2562        }
2563
2564        score
2565    }
2566}
2567
2568#[cfg(all(feature = "std", feature = "parsing", target_os = "linux"))]
2569fn FcScanDirectories() -> Option<Vec<(FcPattern, FcFontPath)>> {
2570    use std::fs;
2571    use std::path::Path;
2572
2573    const BASE_FONTCONFIG_PATH: &str = "/etc/fonts/fonts.conf";
2574
2575    if !Path::new(BASE_FONTCONFIG_PATH).exists() {
2576        return None;
2577    }
2578
2579    let mut font_paths = Vec::with_capacity(32);
2580    let mut paths_to_visit = vec![(None, PathBuf::from(BASE_FONTCONFIG_PATH))];
2581
2582    while let Some((prefix, path_to_visit)) = paths_to_visit.pop() {
2583        let path = match process_path(&prefix, path_to_visit, true) {
2584            Some(path) => path,
2585            None => continue,
2586        };
2587
2588        let metadata = match fs::metadata(&path) {
2589            Ok(metadata) => metadata,
2590            Err(_) => continue,
2591        };
2592
2593        if metadata.is_file() {
2594            let xml_utf8 = match fs::read_to_string(&path) {
2595                Ok(xml_utf8) => xml_utf8,
2596                Err(_) => continue,
2597            };
2598
2599            if ParseFontsConf(&xml_utf8, &mut paths_to_visit, &mut font_paths).is_none() {
2600                continue;
2601            }
2602        } else if metadata.is_dir() {
2603            let dir_entries = match fs::read_dir(&path) {
2604                Ok(dir_entries) => dir_entries,
2605                Err(_) => continue,
2606            };
2607
2608            for entry_result in dir_entries {
2609                let entry = match entry_result {
2610                    Ok(entry) => entry,
2611                    Err(_) => continue,
2612                };
2613
2614                let entry_path = entry.path();
2615
2616                // `fs::metadata` traverses symbolic links
2617                let entry_metadata = match fs::metadata(&entry_path) {
2618                    Ok(metadata) => metadata,
2619                    Err(_) => continue,
2620                };
2621
2622                if !entry_metadata.is_file() {
2623                    continue;
2624                }
2625
2626                let file_name = match entry_path.file_name() {
2627                    Some(name) => name,
2628                    None => continue,
2629                };
2630
2631                let file_name_str = file_name.to_string_lossy();
2632                if file_name_str.starts_with(|c: char| c.is_ascii_digit())
2633                    && file_name_str.ends_with(".conf")
2634                {
2635                    paths_to_visit.push((None, entry_path));
2636                }
2637            }
2638        }
2639    }
2640
2641    if font_paths.is_empty() {
2642        return None;
2643    }
2644
2645    Some(FcScanDirectoriesInner(&font_paths))
2646}
2647
2648// Parses the fonts.conf file
2649#[cfg(all(feature = "std", feature = "parsing", target_os = "linux"))]
2650fn ParseFontsConf(
2651    input: &str,
2652    paths_to_visit: &mut Vec<(Option<String>, PathBuf)>,
2653    font_paths: &mut Vec<(Option<String>, String)>,
2654) -> Option<()> {
2655    use xmlparser::Token::*;
2656    use xmlparser::Tokenizer;
2657
2658    const TAG_INCLUDE: &str = "include";
2659    const TAG_DIR: &str = "dir";
2660    const ATTRIBUTE_PREFIX: &str = "prefix";
2661
2662    let mut current_prefix: Option<&str> = None;
2663    let mut current_path: Option<&str> = None;
2664    let mut is_in_include = false;
2665    let mut is_in_dir = false;
2666
2667    for token_result in Tokenizer::from(input) {
2668        let token = match token_result {
2669            Ok(token) => token,
2670            Err(_) => return None,
2671        };
2672
2673        match token {
2674            ElementStart { local, .. } => {
2675                if is_in_include || is_in_dir {
2676                    return None; /* error: nested tags */
2677                }
2678
2679                match local.as_str() {
2680                    TAG_INCLUDE => {
2681                        is_in_include = true;
2682                    }
2683                    TAG_DIR => {
2684                        is_in_dir = true;
2685                    }
2686                    _ => continue,
2687                }
2688
2689                current_path = None;
2690            }
2691            Text { text, .. } => {
2692                let text = text.as_str().trim();
2693                if text.is_empty() {
2694                    continue;
2695                }
2696                if is_in_include || is_in_dir {
2697                    current_path = Some(text);
2698                }
2699            }
2700            Attribute { local, value, .. } => {
2701                if !is_in_include && !is_in_dir {
2702                    continue;
2703                }
2704                // attribute on <include> or <dir> node
2705                if local.as_str() == ATTRIBUTE_PREFIX {
2706                    current_prefix = Some(value.as_str());
2707                }
2708            }
2709            ElementEnd { end, .. } => {
2710                let end_tag = match end {
2711                    xmlparser::ElementEnd::Close(_, a) => a,
2712                    _ => continue,
2713                };
2714
2715                match end_tag.as_str() {
2716                    TAG_INCLUDE => {
2717                        if !is_in_include {
2718                            continue;
2719                        }
2720
2721                        if let Some(current_path) = current_path.as_ref() {
2722                            paths_to_visit.push((
2723                                current_prefix.map(ToOwned::to_owned),
2724                                PathBuf::from(*current_path),
2725                            ));
2726                        }
2727                    }
2728                    TAG_DIR => {
2729                        if !is_in_dir {
2730                            continue;
2731                        }
2732
2733                        if let Some(current_path) = current_path.as_ref() {
2734                            font_paths.push((
2735                                current_prefix.map(ToOwned::to_owned),
2736                                (*current_path).to_owned(),
2737                            ));
2738                        }
2739                    }
2740                    _ => continue,
2741                }
2742
2743                is_in_include = false;
2744                is_in_dir = false;
2745                current_path = None;
2746                current_prefix = None;
2747            }
2748            _ => {}
2749        }
2750    }
2751
2752    Some(())
2753}
2754
2755// Remaining implementation for font scanning, parsing, etc.
2756#[cfg(all(feature = "std", feature = "parsing"))]
2757pub(crate) fn FcParseFont(filepath: &PathBuf) -> Option<Vec<(FcPattern, FcFontPath)>> {
2758    use allsorts::{
2759        binary::read::ReadScope,
2760        font_data::FontData,
2761        get_name::fontcode_get_name,
2762        post::PostTable,
2763        tables::{
2764            os2::Os2, FontTableProvider, HeadTable, HheaTable, HmtxTable, MaxpTable, NameTable,
2765        },
2766        tag,
2767    };
2768    #[cfg(all(not(target_family = "wasm"), feature = "std"))]
2769    use mmapio::MmapOptions;
2770    use std::collections::BTreeSet;
2771    use std::fs::File;
2772
2773    const FONT_SPECIFIER_NAME_ID: u16 = 4;
2774    const FONT_SPECIFIER_FAMILY_ID: u16 = 1;
2775
2776    // Try parsing the font file and see if the postscript name matches
2777    let file = File::open(filepath).ok()?;
2778
2779    #[cfg(all(not(target_family = "wasm"), feature = "std"))]
2780    let font_bytes = unsafe { MmapOptions::new().map(&file).ok()? };
2781
2782    #[cfg(not(all(not(target_family = "wasm"), feature = "std")))]
2783    let font_bytes = std::fs::read(filepath).ok()?;
2784
2785    let max_fonts = if font_bytes.len() >= 12 && &font_bytes[0..4] == b"ttcf" {
2786        // Read numFonts from TTC header (offset 8, 4 bytes)
2787        let num_fonts =
2788            u32::from_be_bytes([font_bytes[8], font_bytes[9], font_bytes[10], font_bytes[11]]);
2789        // Cap at a reasonable maximum as a safety measure
2790        std::cmp::min(num_fonts as usize, 100)
2791    } else {
2792        // Not a collection, just one font
2793        1
2794    };
2795
2796    let scope = ReadScope::new(&font_bytes[..]);
2797    let font_file = scope.read::<FontData<'_>>().ok()?;
2798
2799    // Handle collections properly by iterating through all fonts
2800    let mut results = Vec::new();
2801
2802    for font_index in 0..max_fonts {
2803        let provider = font_file.table_provider(font_index).ok()?;
2804        let head_data = provider.table_data(tag::HEAD).ok()??.into_owned();
2805        let head_table = ReadScope::new(&head_data).read::<HeadTable>().ok()?;
2806
2807        let is_bold = head_table.is_bold();
2808        let is_italic = head_table.is_italic();
2809        let mut detected_monospace = None;
2810
2811        let post_data = provider.table_data(tag::POST).ok()??;
2812        if let Ok(post_table) = ReadScope::new(&post_data).read::<PostTable>() {
2813            // isFixedPitch here - https://learn.microsoft.com/en-us/typography/opentype/spec/post#header
2814            detected_monospace = Some(post_table.header.is_fixed_pitch != 0);
2815        }
2816
2817        // Get font properties from OS/2 table
2818        let os2_data = provider.table_data(tag::OS_2).ok()??;
2819        let os2_table = ReadScope::new(&os2_data)
2820            .read_dep::<Os2>(os2_data.len())
2821            .ok()?;
2822
2823        // Extract additional style information
2824        let is_oblique = os2_table
2825            .fs_selection
2826            .contains(allsorts::tables::os2::FsSelection::OBLIQUE);
2827        let weight = FcWeight::from_u16(os2_table.us_weight_class);
2828        let stretch = FcStretch::from_u16(os2_table.us_width_class);
2829
2830        // Extract unicode ranges from OS/2 table (fast, but may be inaccurate)
2831        // These are hints about what the font *should* support
2832        // For actual glyph coverage verification, query the font file directly
2833        let mut unicode_ranges = Vec::new();
2834
2835        // Process the 4 Unicode range bitfields from OS/2 table
2836        let ranges = [
2837            os2_table.ul_unicode_range1,
2838            os2_table.ul_unicode_range2,
2839            os2_table.ul_unicode_range3,
2840            os2_table.ul_unicode_range4,
2841        ];
2842
2843        // Unicode range bit positions to actual ranges
2844        // Based on OpenType spec: https://learn.microsoft.com/en-us/typography/opentype/spec/os2#ur
2845        let range_mappings = [
2846            // ulUnicodeRange1 (bits 0-31)
2847            (0, 0x0000, 0x007F), // Basic Latin
2848            (1, 0x0080, 0x00FF), // Latin-1 Supplement
2849            (2, 0x0100, 0x017F), // Latin Extended-A
2850            (3, 0x0180, 0x024F), // Latin Extended-B
2851            (4, 0x0250, 0x02AF), // IPA Extensions
2852            (5, 0x02B0, 0x02FF), // Spacing Modifier Letters
2853            (6, 0x0300, 0x036F), // Combining Diacritical Marks
2854            (7, 0x0370, 0x03FF), // Greek and Coptic
2855            (8, 0x2C80, 0x2CFF), // Coptic
2856            (9, 0x0400, 0x04FF), // Cyrillic
2857            (10, 0x0530, 0x058F), // Armenian
2858            (11, 0x0590, 0x05FF), // Hebrew
2859            (12, 0x0600, 0x06FF), // Arabic
2860            (13, 0x0700, 0x074F), // Syriac
2861            (14, 0x0780, 0x07BF), // Thaana
2862            (15, 0x0900, 0x097F), // Devanagari
2863            (16, 0x0980, 0x09FF), // Bengali
2864            (17, 0x0A00, 0x0A7F), // Gurmukhi
2865            (18, 0x0A80, 0x0AFF), // Gujarati
2866            (19, 0x0B00, 0x0B7F), // Oriya
2867            (20, 0x0B80, 0x0BFF), // Tamil
2868            (21, 0x0C00, 0x0C7F), // Telugu
2869            (22, 0x0C80, 0x0CFF), // Kannada
2870            (23, 0x0D00, 0x0D7F), // Malayalam
2871            (24, 0x0E00, 0x0E7F), // Thai
2872            (25, 0x0E80, 0x0EFF), // Lao
2873            (26, 0x10A0, 0x10FF), // Georgian
2874            (27, 0x1B00, 0x1B7F), // Balinese
2875            (28, 0x1100, 0x11FF), // Hangul Jamo
2876            (29, 0x1E00, 0x1EFF), // Latin Extended Additional
2877            (30, 0x1F00, 0x1FFF), // Greek Extended
2878            (31, 0x2000, 0x206F), // General Punctuation
2879            
2880            // ulUnicodeRange2 (bits 32-63)
2881            (32, 0x2070, 0x209F), // Superscripts And Subscripts
2882            (33, 0x20A0, 0x20CF), // Currency Symbols
2883            (34, 0x20D0, 0x20FF), // Combining Diacritical Marks For Symbols
2884            (35, 0x2100, 0x214F), // Letterlike Symbols
2885            (36, 0x2150, 0x218F), // Number Forms
2886            (37, 0x2190, 0x21FF), // Arrows
2887            (38, 0x2200, 0x22FF), // Mathematical Operators
2888            (39, 0x2300, 0x23FF), // Miscellaneous Technical
2889            (40, 0x2400, 0x243F), // Control Pictures
2890            (41, 0x2440, 0x245F), // Optical Character Recognition
2891            (42, 0x2460, 0x24FF), // Enclosed Alphanumerics
2892            (43, 0x2500, 0x257F), // Box Drawing
2893            (44, 0x2580, 0x259F), // Block Elements
2894            (45, 0x25A0, 0x25FF), // Geometric Shapes
2895            (46, 0x2600, 0x26FF), // Miscellaneous Symbols
2896            (47, 0x2700, 0x27BF), // Dingbats
2897            (48, 0x3000, 0x303F), // CJK Symbols And Punctuation
2898            (49, 0x3040, 0x309F), // Hiragana
2899            (50, 0x30A0, 0x30FF), // Katakana
2900            (51, 0x3100, 0x312F), // Bopomofo
2901            (52, 0x3130, 0x318F), // Hangul Compatibility Jamo
2902            (53, 0x3190, 0x319F), // Kanbun
2903            (54, 0x31A0, 0x31BF), // Bopomofo Extended
2904            (55, 0x31C0, 0x31EF), // CJK Strokes
2905            (56, 0x31F0, 0x31FF), // Katakana Phonetic Extensions
2906            (57, 0x3200, 0x32FF), // Enclosed CJK Letters And Months
2907            (58, 0x3300, 0x33FF), // CJK Compatibility
2908            (59, 0x4E00, 0x9FFF), // CJK Unified Ideographs
2909            (60, 0xA000, 0xA48F), // Yi Syllables
2910            (61, 0xA490, 0xA4CF), // Yi Radicals
2911            (62, 0xAC00, 0xD7AF), // Hangul Syllables
2912            (63, 0xD800, 0xDFFF), // Non-Plane 0 (note: surrogates, not directly usable)
2913            
2914            // ulUnicodeRange3 (bits 64-95)
2915            (64, 0x10000, 0x10FFFF), // Phoenician and other non-BMP (bit 64 indicates non-BMP support)
2916            (65, 0xF900, 0xFAFF), // CJK Compatibility Ideographs
2917            (66, 0xFB00, 0xFB4F), // Alphabetic Presentation Forms
2918            (67, 0xFB50, 0xFDFF), // Arabic Presentation Forms-A
2919            (68, 0xFE00, 0xFE0F), // Variation Selectors
2920            (69, 0xFE10, 0xFE1F), // Vertical Forms
2921            (70, 0xFE20, 0xFE2F), // Combining Half Marks
2922            (71, 0xFE30, 0xFE4F), // CJK Compatibility Forms
2923            (72, 0xFE50, 0xFE6F), // Small Form Variants
2924            (73, 0xFE70, 0xFEFF), // Arabic Presentation Forms-B
2925            (74, 0xFF00, 0xFFEF), // Halfwidth And Fullwidth Forms
2926            (75, 0xFFF0, 0xFFFF), // Specials
2927            (76, 0x0F00, 0x0FFF), // Tibetan
2928            (77, 0x0700, 0x074F), // Syriac
2929            (78, 0x0780, 0x07BF), // Thaana
2930            (79, 0x0D80, 0x0DFF), // Sinhala
2931            (80, 0x1000, 0x109F), // Myanmar
2932            (81, 0x1200, 0x137F), // Ethiopic
2933            (82, 0x13A0, 0x13FF), // Cherokee
2934            (83, 0x1400, 0x167F), // Unified Canadian Aboriginal Syllabics
2935            (84, 0x1680, 0x169F), // Ogham
2936            (85, 0x16A0, 0x16FF), // Runic
2937            (86, 0x1780, 0x17FF), // Khmer
2938            (87, 0x1800, 0x18AF), // Mongolian
2939            (88, 0x2800, 0x28FF), // Braille Patterns
2940            (89, 0xA000, 0xA48F), // Yi Syllables
2941            (90, 0x1680, 0x169F), // Ogham
2942            (91, 0x16A0, 0x16FF), // Runic
2943            (92, 0x1700, 0x171F), // Tagalog
2944            (93, 0x1720, 0x173F), // Hanunoo
2945            (94, 0x1740, 0x175F), // Buhid
2946            (95, 0x1760, 0x177F), // Tagbanwa
2947            
2948            // ulUnicodeRange4 (bits 96-127)
2949            (96, 0x1900, 0x194F), // Limbu
2950            (97, 0x1950, 0x197F), // Tai Le
2951            (98, 0x1980, 0x19DF), // New Tai Lue
2952            (99, 0x1A00, 0x1A1F), // Buginese
2953            (100, 0x2C00, 0x2C5F), // Glagolitic
2954            (101, 0x2D30, 0x2D7F), // Tifinagh
2955            (102, 0x4DC0, 0x4DFF), // Yijing Hexagram Symbols
2956            (103, 0xA800, 0xA82F), // Syloti Nagri
2957            (104, 0x10000, 0x1007F), // Linear B Syllabary
2958            (105, 0x10080, 0x100FF), // Linear B Ideograms
2959            (106, 0x10100, 0x1013F), // Aegean Numbers
2960            (107, 0x10140, 0x1018F), // Ancient Greek Numbers
2961            (108, 0x10300, 0x1032F), // Old Italic
2962            (109, 0x10330, 0x1034F), // Gothic
2963            (110, 0x10380, 0x1039F), // Ugaritic
2964            (111, 0x103A0, 0x103DF), // Old Persian
2965            (112, 0x10400, 0x1044F), // Deseret
2966            (113, 0x10450, 0x1047F), // Shavian
2967            (114, 0x10480, 0x104AF), // Osmanya
2968            (115, 0x10800, 0x1083F), // Cypriot Syllabary
2969            (116, 0x10A00, 0x10A5F), // Kharoshthi
2970            (117, 0x1D000, 0x1D0FF), // Byzantine Musical Symbols
2971            (118, 0x1D100, 0x1D1FF), // Musical Symbols
2972            (119, 0x1D200, 0x1D24F), // Ancient Greek Musical Notation
2973            (120, 0x1D300, 0x1D35F), // Tai Xuan Jing Symbols
2974            (121, 0x1D400, 0x1D7FF), // Mathematical Alphanumeric Symbols
2975            (122, 0x1F000, 0x1F02F), // Mahjong Tiles
2976            (123, 0x1F030, 0x1F09F), // Domino Tiles
2977            (124, 0x1F300, 0x1F9FF), // Miscellaneous Symbols And Pictographs (Emoji)
2978            (125, 0x1F680, 0x1F6FF), // Transport And Map Symbols
2979            (126, 0x1F700, 0x1F77F), // Alchemical Symbols
2980            (127, 0x1F900, 0x1F9FF), // Supplemental Symbols and Pictographs
2981        ];
2982
2983        for (range_idx, bit_pos, start, end) in range_mappings.iter().map(|&(bit, start, end)| {
2984            let range_idx = bit / 32;
2985            let bit_pos = bit % 32;
2986            (range_idx, bit_pos, start, end)
2987        }) {
2988            if range_idx < 4 && (ranges[range_idx] & (1 << bit_pos)) != 0 {
2989                unicode_ranges.push(UnicodeRange { start, end });
2990            }
2991        }
2992        
2993        // Verify OS/2 reported ranges against actual CMAP support
2994        // OS/2 ulUnicodeRange bits can be unreliable - fonts may claim support
2995        // for ranges they don't actually have glyphs for
2996        unicode_ranges = verify_unicode_ranges_with_cmap(&provider, unicode_ranges);
2997        
2998        // If still empty (OS/2 had no ranges or all were invalid), do full CMAP analysis
2999        if unicode_ranges.is_empty() {
3000            if let Some(cmap_ranges) = analyze_cmap_coverage(&provider) {
3001                unicode_ranges = cmap_ranges;
3002            }
3003        }
3004
3005        // If no monospace detection yet, check using hmtx
3006        if detected_monospace.is_none() {
3007            // Try using PANOSE classification
3008            if os2_table.panose[0] == 2 {
3009                // 2 = Latin Text
3010                detected_monospace = Some(os2_table.panose[3] == 9); // 9 = Monospaced
3011            } else {
3012                let hhea_data = provider.table_data(tag::HHEA).ok()??;
3013                let hhea_table = ReadScope::new(&hhea_data).read::<HheaTable>().ok()?;
3014                let maxp_data = provider.table_data(tag::MAXP).ok()??;
3015                let maxp_table = ReadScope::new(&maxp_data).read::<MaxpTable>().ok()?;
3016                let hmtx_data = provider.table_data(tag::HMTX).ok()??;
3017                let hmtx_table = ReadScope::new(&hmtx_data)
3018                    .read_dep::<HmtxTable<'_>>((
3019                        usize::from(maxp_table.num_glyphs),
3020                        usize::from(hhea_table.num_h_metrics),
3021                    ))
3022                    .ok()?;
3023
3024                let mut monospace = true;
3025                let mut last_advance = 0;
3026                for i in 0..hhea_table.num_h_metrics as usize {
3027                    let advance = hmtx_table.h_metrics.read_item(i).ok()?.advance_width;
3028                    if i > 0 && advance != last_advance {
3029                        monospace = false;
3030                        break;
3031                    }
3032                    last_advance = advance;
3033                }
3034
3035                detected_monospace = Some(monospace);
3036            }
3037        }
3038
3039        let is_monospace = detected_monospace.unwrap_or(false);
3040
3041        let name_data = provider.table_data(tag::NAME).ok()??.into_owned();
3042        let name_table = ReadScope::new(&name_data).read::<NameTable>().ok()?;
3043
3044        // One font can support multiple patterns
3045        let mut f_family = None;
3046
3047        let patterns = name_table
3048            .name_records
3049            .iter()
3050            .filter_map(|name_record| {
3051                let name_id = name_record.name_id;
3052                if name_id == FONT_SPECIFIER_FAMILY_ID {
3053                    let family = fontcode_get_name(&name_data, FONT_SPECIFIER_FAMILY_ID).ok()??;
3054                    f_family = Some(family);
3055                    None
3056                } else if name_id == FONT_SPECIFIER_NAME_ID {
3057                    let family = f_family.as_ref()?;
3058                    let name = fontcode_get_name(&name_data, FONT_SPECIFIER_NAME_ID).ok()??;
3059                    if name.to_bytes().is_empty() {
3060                        None
3061                    } else {
3062                        // Initialize metadata structure
3063                        let mut metadata = FcFontMetadata::default();
3064
3065                        const NAME_ID_COPYRIGHT: u16 = 0;
3066                        const NAME_ID_FAMILY: u16 = 1;
3067                        const NAME_ID_SUBFAMILY: u16 = 2;
3068                        const NAME_ID_UNIQUE_ID: u16 = 3;
3069                        const NAME_ID_FULL_NAME: u16 = 4;
3070                        const NAME_ID_VERSION: u16 = 5;
3071                        const NAME_ID_POSTSCRIPT_NAME: u16 = 6;
3072                        const NAME_ID_TRADEMARK: u16 = 7;
3073                        const NAME_ID_MANUFACTURER: u16 = 8;
3074                        const NAME_ID_DESIGNER: u16 = 9;
3075                        const NAME_ID_DESCRIPTION: u16 = 10;
3076                        const NAME_ID_VENDOR_URL: u16 = 11;
3077                        const NAME_ID_DESIGNER_URL: u16 = 12;
3078                        const NAME_ID_LICENSE: u16 = 13;
3079                        const NAME_ID_LICENSE_URL: u16 = 14;
3080                        const NAME_ID_PREFERRED_FAMILY: u16 = 16;
3081                        const NAME_ID_PREFERRED_SUBFAMILY: u16 = 17;
3082
3083                        // Extract metadata from name table
3084                        metadata.copyright = get_name_string(&name_data, NAME_ID_COPYRIGHT);
3085                        metadata.font_family = get_name_string(&name_data, NAME_ID_FAMILY);
3086                        metadata.font_subfamily = get_name_string(&name_data, NAME_ID_SUBFAMILY);
3087                        metadata.full_name = get_name_string(&name_data, NAME_ID_FULL_NAME);
3088                        metadata.unique_id = get_name_string(&name_data, NAME_ID_UNIQUE_ID);
3089                        metadata.version = get_name_string(&name_data, NAME_ID_VERSION);
3090                        metadata.postscript_name =
3091                            get_name_string(&name_data, NAME_ID_POSTSCRIPT_NAME);
3092                        metadata.trademark = get_name_string(&name_data, NAME_ID_TRADEMARK);
3093                        metadata.manufacturer = get_name_string(&name_data, NAME_ID_MANUFACTURER);
3094                        metadata.designer = get_name_string(&name_data, NAME_ID_DESIGNER);
3095                        metadata.id_description = get_name_string(&name_data, NAME_ID_DESCRIPTION);
3096                        metadata.designer_url = get_name_string(&name_data, NAME_ID_DESIGNER_URL);
3097                        metadata.manufacturer_url = get_name_string(&name_data, NAME_ID_VENDOR_URL);
3098                        metadata.license = get_name_string(&name_data, NAME_ID_LICENSE);
3099                        metadata.license_url = get_name_string(&name_data, NAME_ID_LICENSE_URL);
3100                        metadata.preferred_family =
3101                            get_name_string(&name_data, NAME_ID_PREFERRED_FAMILY);
3102                        metadata.preferred_subfamily =
3103                            get_name_string(&name_data, NAME_ID_PREFERRED_SUBFAMILY);
3104
3105                        let mut name = String::from_utf8_lossy(name.to_bytes()).to_string();
3106                        let mut family = String::from_utf8_lossy(family.as_bytes()).to_string();
3107                        if name.starts_with(".") {
3108                            name = name[1..].to_string();
3109                        }
3110                        if family.starts_with(".") {
3111                            family = family[1..].to_string();
3112                        }
3113                        Some((
3114                            FcPattern {
3115                                name: Some(name),
3116                                family: Some(family),
3117                                bold: if is_bold {
3118                                    PatternMatch::True
3119                                } else {
3120                                    PatternMatch::False
3121                                },
3122                                italic: if is_italic {
3123                                    PatternMatch::True
3124                                } else {
3125                                    PatternMatch::False
3126                                },
3127                                oblique: if is_oblique {
3128                                    PatternMatch::True
3129                                } else {
3130                                    PatternMatch::False
3131                                },
3132                                monospace: if is_monospace {
3133                                    PatternMatch::True
3134                                } else {
3135                                    PatternMatch::False
3136                                },
3137                                condensed: if stretch <= FcStretch::Condensed {
3138                                    PatternMatch::True
3139                                } else {
3140                                    PatternMatch::False
3141                                },
3142                                weight,
3143                                stretch,
3144                                unicode_ranges: unicode_ranges.clone(),
3145                                metadata,
3146                            },
3147                            font_index,
3148                        ))
3149                    }
3150                } else {
3151                    None
3152                }
3153            })
3154            .collect::<BTreeSet<_>>();
3155
3156        results.extend(patterns.into_iter().map(|(pat, index)| {
3157            (
3158                pat,
3159                FcFontPath {
3160                    path: filepath.to_string_lossy().to_string(),
3161                    font_index: index,
3162                },
3163            )
3164        }));
3165    }
3166
3167    if results.is_empty() {
3168        None
3169    } else {
3170        Some(results)
3171    }
3172}
3173
3174/// Parse font bytes and extract font patterns for in-memory fonts.
3175/// 
3176/// This is the public API for parsing in-memory font data to create 
3177/// `(FcPattern, FcFont)` tuples that can be added to an `FcFontCache` 
3178/// via `with_memory_fonts()`.
3179///
3180/// # Arguments
3181/// * `font_bytes` - The raw bytes of a TrueType/OpenType font file
3182/// * `font_id` - An identifier string for this font (used internally)
3183///
3184/// # Returns
3185/// A vector of `(FcPattern, FcFont)` tuples, one for each font face in the file.
3186/// Returns `None` if the font could not be parsed.
3187///
3188/// # Example
3189/// ```ignore
3190/// use rust_fontconfig::{FcFontCache, FcParseFontBytes};
3191/// 
3192/// let font_bytes = include_bytes!("path/to/font.ttf");
3193/// let mut cache = FcFontCache::default();
3194/// 
3195/// if let Some(fonts) = FcParseFontBytes(font_bytes, "MyFont") {
3196///     cache.with_memory_fonts(fonts);
3197/// }
3198/// ```
3199#[cfg(all(feature = "std", feature = "parsing"))]
3200#[allow(non_snake_case)]
3201pub fn FcParseFontBytes(font_bytes: &[u8], font_id: &str) -> Option<Vec<(FcPattern, FcFont)>> {
3202    FcParseFontBytesInner(font_bytes, font_id)
3203}
3204
3205/// Internal implementation for parsing font bytes.
3206/// Used by both FcParseFont (for disk fonts) and FcParseFontBytes (for memory fonts).
3207#[cfg(all(feature = "std", feature = "parsing"))]
3208fn FcParseFontBytesInner(font_bytes: &[u8], font_id: &str) -> Option<Vec<(FcPattern, FcFont)>> {
3209    use allsorts::{
3210        binary::read::ReadScope,
3211        font_data::FontData,
3212        get_name::fontcode_get_name,
3213        post::PostTable,
3214        tables::{
3215            os2::Os2, FontTableProvider, HeadTable, HheaTable, HmtxTable, MaxpTable, NameTable,
3216        },
3217        tag,
3218    };
3219    use std::collections::BTreeSet;
3220
3221    const FONT_SPECIFIER_NAME_ID: u16 = 4;
3222    const FONT_SPECIFIER_FAMILY_ID: u16 = 1;
3223
3224    let max_fonts = if font_bytes.len() >= 12 && &font_bytes[0..4] == b"ttcf" {
3225        let num_fonts =
3226            u32::from_be_bytes([font_bytes[8], font_bytes[9], font_bytes[10], font_bytes[11]]);
3227        std::cmp::min(num_fonts as usize, 100)
3228    } else {
3229        1
3230    };
3231
3232    let scope = ReadScope::new(font_bytes);
3233    let font_file = scope.read::<FontData<'_>>().ok()?;
3234
3235    let mut results = Vec::new();
3236
3237    for font_index in 0..max_fonts {
3238        let provider = font_file.table_provider(font_index).ok()?;
3239        let head_data = provider.table_data(tag::HEAD).ok()??.into_owned();
3240        let head_table = ReadScope::new(&head_data).read::<HeadTable>().ok()?;
3241
3242        let is_bold = head_table.is_bold();
3243        let is_italic = head_table.is_italic();
3244        let mut detected_monospace = None;
3245
3246        let post_data = provider.table_data(tag::POST).ok()??;
3247        if let Ok(post_table) = ReadScope::new(&post_data).read::<PostTable>() {
3248            detected_monospace = Some(post_table.header.is_fixed_pitch != 0);
3249        }
3250
3251        let os2_data = provider.table_data(tag::OS_2).ok()??;
3252        let os2_table = ReadScope::new(&os2_data)
3253            .read_dep::<Os2>(os2_data.len())
3254            .ok()?;
3255
3256        let is_oblique = os2_table
3257            .fs_selection
3258            .contains(allsorts::tables::os2::FsSelection::OBLIQUE);
3259        let weight = FcWeight::from_u16(os2_table.us_weight_class);
3260        let stretch = FcStretch::from_u16(os2_table.us_width_class);
3261
3262        let mut unicode_ranges = Vec::new();
3263        let ranges = [
3264            os2_table.ul_unicode_range1,
3265            os2_table.ul_unicode_range2,
3266            os2_table.ul_unicode_range3,
3267            os2_table.ul_unicode_range4,
3268        ];
3269
3270        // Full Unicode range bit mappings (same as FcParseFont)
3271        let range_mappings = [
3272            (0, 0x0000u32, 0x007Fu32),
3273            (1, 0x0080, 0x00FF),
3274            (2, 0x0100, 0x017F),
3275            (3, 0x0180, 0x024F),
3276            (4, 0x0250, 0x02AF),
3277            (5, 0x02B0, 0x02FF),
3278            (6, 0x0300, 0x036F),
3279            (7, 0x0370, 0x03FF),
3280            (8, 0x2C80, 0x2CFF),
3281            (9, 0x0400, 0x04FF),
3282            (10, 0x0530, 0x058F),
3283            (11, 0x0590, 0x05FF),
3284            (12, 0x0600, 0x06FF),
3285            (31, 0x2000, 0x206F),
3286            (48, 0x3000, 0x303F),
3287            (49, 0x3040, 0x309F),
3288            (50, 0x30A0, 0x30FF),
3289            (59, 0x4E00, 0x9FFF),
3290            (62, 0xAC00, 0xD7AF),
3291        ];
3292
3293        for &(bit, start, end) in &range_mappings {
3294            let range_idx = (bit / 32) as usize;
3295            let bit_pos = bit % 32;
3296            if range_idx < 4 && (ranges[range_idx] & (1 << bit_pos)) != 0 {
3297                unicode_ranges.push(UnicodeRange { start, end });
3298            }
3299        }
3300
3301        unicode_ranges = verify_unicode_ranges_with_cmap(&provider, unicode_ranges);
3302
3303        if unicode_ranges.is_empty() {
3304            if let Some(cmap_ranges) = analyze_cmap_coverage(&provider) {
3305                unicode_ranges = cmap_ranges;
3306            }
3307        }
3308
3309        if detected_monospace.is_none() {
3310            if os2_table.panose[0] == 2 {
3311                detected_monospace = Some(os2_table.panose[3] == 9);
3312            } else if let (Ok(Some(hhea_data)), Ok(Some(maxp_data)), Ok(Some(hmtx_data))) = (
3313                provider.table_data(tag::HHEA),
3314                provider.table_data(tag::MAXP),
3315                provider.table_data(tag::HMTX),
3316            ) {
3317                if let (Ok(hhea_table), Ok(maxp_table)) = (
3318                    ReadScope::new(&hhea_data).read::<HheaTable>(),
3319                    ReadScope::new(&maxp_data).read::<MaxpTable>(),
3320                ) {
3321                    if let Ok(hmtx_table) = ReadScope::new(&hmtx_data).read_dep::<HmtxTable<'_>>((
3322                        usize::from(maxp_table.num_glyphs),
3323                        usize::from(hhea_table.num_h_metrics),
3324                    )) {
3325                        let mut monospace = true;
3326                        let mut last_advance = 0;
3327                        for i in 0..hhea_table.num_h_metrics as usize {
3328                            if let Ok(metric) = hmtx_table.h_metrics.read_item(i) {
3329                                if i > 0 && metric.advance_width != last_advance {
3330                                    monospace = false;
3331                                    break;
3332                                }
3333                                last_advance = metric.advance_width;
3334                            }
3335                        }
3336                        detected_monospace = Some(monospace);
3337                    }
3338                }
3339            }
3340        }
3341
3342        let is_monospace = detected_monospace.unwrap_or(false);
3343
3344        let name_data = provider.table_data(tag::NAME).ok()??.into_owned();
3345        let name_table = ReadScope::new(&name_data).read::<NameTable>().ok()?;
3346
3347        let mut f_family = None;
3348
3349        let patterns: BTreeSet<_> = name_table
3350            .name_records
3351            .iter()
3352            .filter_map(|name_record| {
3353                let name_id = name_record.name_id;
3354                if name_id == FONT_SPECIFIER_FAMILY_ID {
3355                    if let Ok(Some(family)) = fontcode_get_name(&name_data, FONT_SPECIFIER_FAMILY_ID) {
3356                        f_family = Some(family);
3357                    }
3358                    None
3359                } else if name_id == FONT_SPECIFIER_NAME_ID {
3360                    let family = f_family.as_ref()?;
3361                    let name = fontcode_get_name(&name_data, FONT_SPECIFIER_NAME_ID).ok()??;
3362                    if name.to_bytes().is_empty() {
3363                        None
3364                    } else {
3365                        let mut name_str = String::from_utf8_lossy(name.to_bytes()).to_string();
3366                        let mut family_str = String::from_utf8_lossy(family.as_bytes()).to_string();
3367                        if name_str.starts_with('.') {
3368                            name_str = name_str[1..].to_string();
3369                        }
3370                        if family_str.starts_with('.') {
3371                            family_str = family_str[1..].to_string();
3372                        }
3373
3374                        Some((
3375                            FcPattern {
3376                                name: Some(name_str),
3377                                family: Some(family_str),
3378                                bold: if is_bold { PatternMatch::True } else { PatternMatch::False },
3379                                italic: if is_italic { PatternMatch::True } else { PatternMatch::False },
3380                                oblique: if is_oblique { PatternMatch::True } else { PatternMatch::False },
3381                                monospace: if is_monospace { PatternMatch::True } else { PatternMatch::False },
3382                                condensed: if stretch <= FcStretch::Condensed { PatternMatch::True } else { PatternMatch::False },
3383                                weight,
3384                                stretch,
3385                                unicode_ranges: unicode_ranges.clone(),
3386                                metadata: FcFontMetadata::default(),
3387                            },
3388                            font_index,
3389                        ))
3390                    }
3391                } else {
3392                    None
3393                }
3394            })
3395            .collect();
3396
3397        results.extend(patterns.into_iter().map(|(pat, idx)| {
3398            (
3399                pat,
3400                FcFont {
3401                    bytes: font_bytes.to_vec(),
3402                    font_index: idx,
3403                    id: font_id.to_string(),
3404                },
3405            )
3406        }));
3407    }
3408
3409    if results.is_empty() {
3410        None
3411    } else {
3412        Some(results)
3413    }
3414}
3415
3416#[cfg(all(feature = "std", feature = "parsing"))]
3417fn FcScanDirectoriesInner(paths: &[(Option<String>, String)]) -> Vec<(FcPattern, FcFontPath)> {
3418    #[cfg(feature = "multithreading")]
3419    {
3420        use rayon::prelude::*;
3421
3422        // scan directories in parallel
3423        paths
3424            .par_iter()
3425            .filter_map(|(prefix, p)| {
3426                if let Some(path) = process_path(prefix, PathBuf::from(p), false) {
3427                    Some(FcScanSingleDirectoryRecursive(path))
3428                } else {
3429                    None
3430                }
3431            })
3432            .flatten()
3433            .collect()
3434    }
3435    #[cfg(not(feature = "multithreading"))]
3436    {
3437        paths
3438            .iter()
3439            .filter_map(|(prefix, p)| {
3440                if let Some(path) = process_path(prefix, PathBuf::from(p), false) {
3441                    Some(FcScanSingleDirectoryRecursive(path))
3442                } else {
3443                    None
3444                }
3445            })
3446            .flatten()
3447            .collect()
3448    }
3449}
3450
3451#[cfg(all(feature = "std", feature = "parsing"))]
3452fn FcScanSingleDirectoryRecursive(dir: PathBuf) -> Vec<(FcPattern, FcFontPath)> {
3453    let mut files_to_parse = Vec::new();
3454    let mut dirs_to_parse = vec![dir];
3455
3456    'outer: loop {
3457        let mut new_dirs_to_parse = Vec::new();
3458
3459        'inner: for dir in dirs_to_parse.clone() {
3460            let dir = match std::fs::read_dir(dir) {
3461                Ok(o) => o,
3462                Err(_) => continue 'inner,
3463            };
3464
3465            for (path, pathbuf) in dir.filter_map(|entry| {
3466                let entry = entry.ok()?;
3467                let path = entry.path();
3468                let pathbuf = path.to_path_buf();
3469                Some((path, pathbuf))
3470            }) {
3471                if path.is_dir() {
3472                    new_dirs_to_parse.push(pathbuf);
3473                } else {
3474                    files_to_parse.push(pathbuf);
3475                }
3476            }
3477        }
3478
3479        if new_dirs_to_parse.is_empty() {
3480            break 'outer;
3481        } else {
3482            dirs_to_parse = new_dirs_to_parse;
3483        }
3484    }
3485
3486    FcParseFontFiles(&files_to_parse)
3487}
3488
3489#[cfg(all(feature = "std", feature = "parsing"))]
3490fn FcParseFontFiles(files_to_parse: &[PathBuf]) -> Vec<(FcPattern, FcFontPath)> {
3491    let result = {
3492        #[cfg(feature = "multithreading")]
3493        {
3494            use rayon::prelude::*;
3495
3496            files_to_parse
3497                .par_iter()
3498                .filter_map(|file| FcParseFont(file))
3499                .collect::<Vec<Vec<_>>>()
3500        }
3501        #[cfg(not(feature = "multithreading"))]
3502        {
3503            files_to_parse
3504                .iter()
3505                .filter_map(|file| FcParseFont(file))
3506                .collect::<Vec<Vec<_>>>()
3507        }
3508    };
3509
3510    result.into_iter().flat_map(|f| f.into_iter()).collect()
3511}
3512
3513#[cfg(all(feature = "std", feature = "parsing"))]
3514/// Takes a path & prefix and resolves them to a usable path, or `None` if they're unsupported/unavailable.
3515///
3516/// Behaviour is based on: https://www.freedesktop.org/software/fontconfig/fontconfig-user.html
3517fn process_path(
3518    prefix: &Option<String>,
3519    mut path: PathBuf,
3520    is_include_path: bool,
3521) -> Option<PathBuf> {
3522    use std::env::var;
3523
3524    const HOME_SHORTCUT: &str = "~";
3525    const CWD_PATH: &str = ".";
3526
3527    const HOME_ENV_VAR: &str = "HOME";
3528    const XDG_CONFIG_HOME_ENV_VAR: &str = "XDG_CONFIG_HOME";
3529    const XDG_CONFIG_HOME_DEFAULT_PATH_SUFFIX: &str = ".config";
3530    const XDG_DATA_HOME_ENV_VAR: &str = "XDG_DATA_HOME";
3531    const XDG_DATA_HOME_DEFAULT_PATH_SUFFIX: &str = ".local/share";
3532
3533    const PREFIX_CWD: &str = "cwd";
3534    const PREFIX_DEFAULT: &str = "default";
3535    const PREFIX_XDG: &str = "xdg";
3536
3537    // These three could, in theory, be cached, but the work required to do so outweighs the minor benefits
3538    fn get_home_value() -> Option<PathBuf> {
3539        var(HOME_ENV_VAR).ok().map(PathBuf::from)
3540    }
3541    fn get_xdg_config_home_value() -> Option<PathBuf> {
3542        var(XDG_CONFIG_HOME_ENV_VAR)
3543            .ok()
3544            .map(PathBuf::from)
3545            .or_else(|| {
3546                get_home_value()
3547                    .map(|home_path| home_path.join(XDG_CONFIG_HOME_DEFAULT_PATH_SUFFIX))
3548            })
3549    }
3550    fn get_xdg_data_home_value() -> Option<PathBuf> {
3551        var(XDG_DATA_HOME_ENV_VAR)
3552            .ok()
3553            .map(PathBuf::from)
3554            .or_else(|| {
3555                get_home_value().map(|home_path| home_path.join(XDG_DATA_HOME_DEFAULT_PATH_SUFFIX))
3556            })
3557    }
3558
3559    // Resolve the tilde character in the path, if present
3560    if path.starts_with(HOME_SHORTCUT) {
3561        if let Some(home_path) = get_home_value() {
3562            path = home_path.join(
3563                path.strip_prefix(HOME_SHORTCUT)
3564                    .expect("already checked that it starts with the prefix"),
3565            );
3566        } else {
3567            return None;
3568        }
3569    }
3570
3571    // Resolve prefix values
3572    match prefix {
3573        Some(prefix) => match prefix.as_str() {
3574            PREFIX_CWD | PREFIX_DEFAULT => {
3575                let mut new_path = PathBuf::from(CWD_PATH);
3576                new_path.push(path);
3577
3578                Some(new_path)
3579            }
3580            PREFIX_XDG => {
3581                if is_include_path {
3582                    get_xdg_config_home_value()
3583                        .map(|xdg_config_home_path| xdg_config_home_path.join(path))
3584                } else {
3585                    get_xdg_data_home_value()
3586                        .map(|xdg_data_home_path| xdg_data_home_path.join(path))
3587                }
3588            }
3589            _ => None, // Unsupported prefix
3590        },
3591        None => Some(path),
3592    }
3593}
3594
3595// Helper function to extract a string from the name table
3596#[cfg(all(feature = "std", feature = "parsing"))]
3597fn get_name_string(name_data: &[u8], name_id: u16) -> Option<String> {
3598    fontcode_get_name(name_data, name_id)
3599        .ok()
3600        .flatten()
3601        .map(|name| String::from_utf8_lossy(name.to_bytes()).to_string())
3602}
3603
3604/// Representative test codepoints for each Unicode block.
3605/// These are carefully chosen to be actual script characters (not punctuation/symbols)
3606/// that a font claiming to support this script should definitely have.
3607#[cfg(all(feature = "std", feature = "parsing"))]
3608fn get_verification_codepoints(start: u32, end: u32) -> Vec<u32> {
3609    match start {
3610        // Basic Latin - test uppercase, lowercase, and digits
3611        0x0000 => vec!['A' as u32, 'M' as u32, 'Z' as u32, 'a' as u32, 'm' as u32, 'z' as u32],
3612        // Latin-1 Supplement - common accented letters
3613        0x0080 => vec![0x00C0, 0x00C9, 0x00D1, 0x00E0, 0x00E9, 0x00F1], // À É Ñ à é ñ
3614        // Latin Extended-A
3615        0x0100 => vec![0x0100, 0x0110, 0x0141, 0x0152, 0x0160], // Ā Đ Ł Œ Š
3616        // Latin Extended-B
3617        0x0180 => vec![0x0180, 0x01A0, 0x01B0, 0x01CD], // ƀ Ơ ư Ǎ
3618        // IPA Extensions
3619        0x0250 => vec![0x0250, 0x0259, 0x026A, 0x0279], // ɐ ə ɪ ɹ
3620        // Greek and Coptic
3621        0x0370 => vec![0x0391, 0x0392, 0x0393, 0x03B1, 0x03B2, 0x03C9], // Α Β Γ α β ω
3622        // Cyrillic
3623        0x0400 => vec![0x0410, 0x0411, 0x0412, 0x0430, 0x0431, 0x042F], // А Б В а б Я
3624        // Armenian
3625        0x0530 => vec![0x0531, 0x0532, 0x0533, 0x0561, 0x0562], // Ա Բ Գ ա բ
3626        // Hebrew
3627        0x0590 => vec![0x05D0, 0x05D1, 0x05D2, 0x05E9, 0x05EA], // א ב ג ש ת
3628        // Arabic
3629        0x0600 => vec![0x0627, 0x0628, 0x062A, 0x062C, 0x0645], // ا ب ت ج م
3630        // Syriac
3631        0x0700 => vec![0x0710, 0x0712, 0x0713, 0x0715], // ܐ ܒ ܓ ܕ
3632        // Devanagari
3633        0x0900 => vec![0x0905, 0x0906, 0x0915, 0x0916, 0x0939], // अ आ क ख ह
3634        // Bengali
3635        0x0980 => vec![0x0985, 0x0986, 0x0995, 0x0996], // অ আ ক খ
3636        // Gurmukhi
3637        0x0A00 => vec![0x0A05, 0x0A06, 0x0A15, 0x0A16], // ਅ ਆ ਕ ਖ
3638        // Gujarati
3639        0x0A80 => vec![0x0A85, 0x0A86, 0x0A95, 0x0A96], // અ આ ક ખ
3640        // Oriya
3641        0x0B00 => vec![0x0B05, 0x0B06, 0x0B15, 0x0B16], // ଅ ଆ କ ଖ
3642        // Tamil
3643        0x0B80 => vec![0x0B85, 0x0B86, 0x0B95, 0x0BA4], // அ ஆ க த
3644        // Telugu
3645        0x0C00 => vec![0x0C05, 0x0C06, 0x0C15, 0x0C16], // అ ఆ క ఖ
3646        // Kannada
3647        0x0C80 => vec![0x0C85, 0x0C86, 0x0C95, 0x0C96], // ಅ ಆ ಕ ಖ
3648        // Malayalam
3649        0x0D00 => vec![0x0D05, 0x0D06, 0x0D15, 0x0D16], // അ ആ ക ഖ
3650        // Thai
3651        0x0E00 => vec![0x0E01, 0x0E02, 0x0E04, 0x0E07, 0x0E40], // ก ข ค ง เ
3652        // Lao
3653        0x0E80 => vec![0x0E81, 0x0E82, 0x0E84, 0x0E87], // ກ ຂ ຄ ງ
3654        // Myanmar
3655        0x1000 => vec![0x1000, 0x1001, 0x1002, 0x1010, 0x1019], // က ခ ဂ တ မ
3656        // Georgian
3657        0x10A0 => vec![0x10D0, 0x10D1, 0x10D2, 0x10D3], // ა ბ გ დ
3658        // Hangul Jamo
3659        0x1100 => vec![0x1100, 0x1102, 0x1103, 0x1161, 0x1162], // ᄀ ᄂ ᄃ ᅡ ᅢ
3660        // Ethiopic
3661        0x1200 => vec![0x1200, 0x1208, 0x1210, 0x1218], // ሀ ለ ሐ መ
3662        // Cherokee
3663        0x13A0 => vec![0x13A0, 0x13A1, 0x13A2, 0x13A3], // Ꭰ Ꭱ Ꭲ Ꭳ
3664        // Khmer
3665        0x1780 => vec![0x1780, 0x1781, 0x1782, 0x1783], // ក ខ គ ឃ
3666        // Mongolian
3667        0x1800 => vec![0x1820, 0x1821, 0x1822, 0x1823], // ᠠ ᠡ ᠢ ᠣ
3668        // Hiragana
3669        0x3040 => vec![0x3042, 0x3044, 0x3046, 0x304B, 0x304D, 0x3093], // あ い う か き ん
3670        // Katakana
3671        0x30A0 => vec![0x30A2, 0x30A4, 0x30A6, 0x30AB, 0x30AD, 0x30F3], // ア イ ウ カ キ ン
3672        // Bopomofo
3673        0x3100 => vec![0x3105, 0x3106, 0x3107, 0x3108], // ㄅ ㄆ ㄇ ㄈ
3674        // CJK Unified Ideographs - common characters
3675        0x4E00 => vec![0x4E00, 0x4E2D, 0x4EBA, 0x5927, 0x65E5, 0x6708], // 一 中 人 大 日 月
3676        // Hangul Syllables
3677        0xAC00 => vec![0xAC00, 0xAC01, 0xAC04, 0xB098, 0xB2E4], // 가 각 간 나 다
3678        // CJK Compatibility Ideographs
3679        0xF900 => vec![0xF900, 0xF901, 0xF902], // 豈 更 車
3680        // Arabic Presentation Forms-A
3681        0xFB50 => vec![0xFB50, 0xFB51, 0xFB52, 0xFB56], // ﭐ ﭑ ﭒ ﭖ
3682        // Arabic Presentation Forms-B
3683        0xFE70 => vec![0xFE70, 0xFE72, 0xFE74, 0xFE76], // ﹰ ﹲ ﹴ ﹶ
3684        // Halfwidth and Fullwidth Forms
3685        0xFF00 => vec![0xFF01, 0xFF21, 0xFF41, 0xFF61], // ! A a 。
3686        // Default: sample at regular intervals
3687        _ => {
3688            let range_size = end - start;
3689            if range_size > 20 {
3690                vec![
3691                    start + range_size / 5,
3692                    start + 2 * range_size / 5,
3693                    start + 3 * range_size / 5,
3694                    start + 4 * range_size / 5,
3695                ]
3696            } else {
3697                vec![start, start + range_size / 2]
3698            }
3699        }
3700    }
3701}
3702
3703/// Verify OS/2 reported Unicode ranges against actual CMAP support.
3704/// Returns only ranges that are actually supported by the font's CMAP table.
3705#[cfg(all(feature = "std", feature = "parsing"))]
3706fn verify_unicode_ranges_with_cmap(
3707    provider: &impl FontTableProvider, 
3708    os2_ranges: Vec<UnicodeRange>
3709) -> Vec<UnicodeRange> {
3710    use allsorts::tables::cmap::{Cmap, CmapSubtable, PlatformId, EncodingId};
3711    
3712    if os2_ranges.is_empty() {
3713        return Vec::new();
3714    }
3715    
3716    // Try to get CMAP subtable
3717    let cmap_data = match provider.table_data(tag::CMAP) {
3718        Ok(Some(data)) => data,
3719        _ => return os2_ranges, // Can't verify, trust OS/2
3720    };
3721    
3722    let cmap = match ReadScope::new(&cmap_data).read::<Cmap<'_>>() {
3723        Ok(c) => c,
3724        Err(_) => return os2_ranges,
3725    };
3726    
3727    // Find the best Unicode subtable
3728    let encoding_record = cmap.find_subtable(PlatformId::UNICODE, EncodingId(3))
3729        .or_else(|| cmap.find_subtable(PlatformId::UNICODE, EncodingId(4)))
3730        .or_else(|| cmap.find_subtable(PlatformId::WINDOWS, EncodingId(1)))
3731        .or_else(|| cmap.find_subtable(PlatformId::WINDOWS, EncodingId(10)))
3732        .or_else(|| cmap.find_subtable(PlatformId::UNICODE, EncodingId(0)))
3733        .or_else(|| cmap.find_subtable(PlatformId::UNICODE, EncodingId(1)));
3734    
3735    let encoding_record = match encoding_record {
3736        Some(r) => r,
3737        None => return os2_ranges, // No suitable subtable, trust OS/2
3738    };
3739    
3740    let cmap_subtable = match ReadScope::new(&cmap_data)
3741        .offset(encoding_record.offset as usize)
3742        .read::<CmapSubtable<'_>>() 
3743    {
3744        Ok(st) => st,
3745        Err(_) => return os2_ranges,
3746    };
3747    
3748    // Verify each range
3749    let mut verified_ranges = Vec::new();
3750    
3751    for range in os2_ranges {
3752        let test_codepoints = get_verification_codepoints(range.start, range.end);
3753        
3754        // Require at least 50% of test codepoints to have valid glyphs
3755        // This is stricter than before to avoid false positives
3756        let required_hits = (test_codepoints.len() + 1) / 2; // ceil(len/2)
3757        let mut hits = 0;
3758        
3759        for cp in test_codepoints {
3760            if cp >= range.start && cp <= range.end {
3761                if let Ok(Some(gid)) = cmap_subtable.map_glyph(cp) {
3762                    if gid != 0 {
3763                        hits += 1;
3764                        if hits >= required_hits {
3765                            break;
3766                        }
3767                    }
3768                }
3769            }
3770        }
3771        
3772        if hits >= required_hits {
3773            verified_ranges.push(range);
3774        }
3775    }
3776    
3777    verified_ranges
3778}
3779
3780/// Analyze CMAP table to discover font coverage when OS/2 provides no info.
3781/// This is the fallback when OS/2 ulUnicodeRange bits are all zero.
3782#[cfg(all(feature = "std", feature = "parsing"))]
3783fn analyze_cmap_coverage(provider: &impl FontTableProvider) -> Option<Vec<UnicodeRange>> {
3784    use allsorts::tables::cmap::{Cmap, CmapSubtable, PlatformId, EncodingId};
3785    
3786    let cmap_data = provider.table_data(tag::CMAP).ok()??;
3787    let cmap = ReadScope::new(&cmap_data).read::<Cmap<'_>>().ok()?;
3788    
3789    let encoding_record = cmap.find_subtable(PlatformId::UNICODE, EncodingId(3))
3790        .or_else(|| cmap.find_subtable(PlatformId::UNICODE, EncodingId(4)))
3791        .or_else(|| cmap.find_subtable(PlatformId::WINDOWS, EncodingId(1)))
3792        .or_else(|| cmap.find_subtable(PlatformId::WINDOWS, EncodingId(10)))
3793        .or_else(|| cmap.find_subtable(PlatformId::UNICODE, EncodingId(0)))
3794        .or_else(|| cmap.find_subtable(PlatformId::UNICODE, EncodingId(1)))?;
3795    
3796    let cmap_subtable = ReadScope::new(&cmap_data)
3797        .offset(encoding_record.offset as usize)
3798        .read::<CmapSubtable<'_>>()
3799        .ok()?;
3800    
3801    // Standard Unicode blocks to probe
3802    let blocks_to_check: &[(u32, u32)] = &[
3803        (0x0000, 0x007F), // Basic Latin
3804        (0x0080, 0x00FF), // Latin-1 Supplement
3805        (0x0100, 0x017F), // Latin Extended-A
3806        (0x0180, 0x024F), // Latin Extended-B
3807        (0x0250, 0x02AF), // IPA Extensions
3808        (0x0300, 0x036F), // Combining Diacritical Marks
3809        (0x0370, 0x03FF), // Greek and Coptic
3810        (0x0400, 0x04FF), // Cyrillic
3811        (0x0500, 0x052F), // Cyrillic Supplement
3812        (0x0530, 0x058F), // Armenian
3813        (0x0590, 0x05FF), // Hebrew
3814        (0x0600, 0x06FF), // Arabic
3815        (0x0700, 0x074F), // Syriac
3816        (0x0900, 0x097F), // Devanagari
3817        (0x0980, 0x09FF), // Bengali
3818        (0x0A00, 0x0A7F), // Gurmukhi
3819        (0x0A80, 0x0AFF), // Gujarati
3820        (0x0B00, 0x0B7F), // Oriya
3821        (0x0B80, 0x0BFF), // Tamil
3822        (0x0C00, 0x0C7F), // Telugu
3823        (0x0C80, 0x0CFF), // Kannada
3824        (0x0D00, 0x0D7F), // Malayalam
3825        (0x0E00, 0x0E7F), // Thai
3826        (0x0E80, 0x0EFF), // Lao
3827        (0x1000, 0x109F), // Myanmar
3828        (0x10A0, 0x10FF), // Georgian
3829        (0x1100, 0x11FF), // Hangul Jamo
3830        (0x1200, 0x137F), // Ethiopic
3831        (0x13A0, 0x13FF), // Cherokee
3832        (0x1780, 0x17FF), // Khmer
3833        (0x1800, 0x18AF), // Mongolian
3834        (0x2000, 0x206F), // General Punctuation
3835        (0x20A0, 0x20CF), // Currency Symbols
3836        (0x2100, 0x214F), // Letterlike Symbols
3837        (0x2190, 0x21FF), // Arrows
3838        (0x2200, 0x22FF), // Mathematical Operators
3839        (0x2500, 0x257F), // Box Drawing
3840        (0x25A0, 0x25FF), // Geometric Shapes
3841        (0x2600, 0x26FF), // Miscellaneous Symbols
3842        (0x3000, 0x303F), // CJK Symbols and Punctuation
3843        (0x3040, 0x309F), // Hiragana
3844        (0x30A0, 0x30FF), // Katakana
3845        (0x3100, 0x312F), // Bopomofo
3846        (0x3130, 0x318F), // Hangul Compatibility Jamo
3847        (0x4E00, 0x9FFF), // CJK Unified Ideographs
3848        (0xAC00, 0xD7AF), // Hangul Syllables
3849        (0xF900, 0xFAFF), // CJK Compatibility Ideographs
3850        (0xFB50, 0xFDFF), // Arabic Presentation Forms-A
3851        (0xFE70, 0xFEFF), // Arabic Presentation Forms-B
3852        (0xFF00, 0xFFEF), // Halfwidth and Fullwidth Forms
3853    ];
3854    
3855    let mut ranges = Vec::new();
3856    
3857    for &(start, end) in blocks_to_check {
3858        let test_codepoints = get_verification_codepoints(start, end);
3859        let required_hits = (test_codepoints.len() + 1) / 2;
3860        let mut hits = 0;
3861        
3862        for cp in test_codepoints {
3863            if let Ok(Some(gid)) = cmap_subtable.map_glyph(cp) {
3864                if gid != 0 {
3865                    hits += 1;
3866                    if hits >= required_hits {
3867                        break;
3868                    }
3869                }
3870            }
3871        }
3872        
3873        if hits >= required_hits {
3874            ranges.push(UnicodeRange { start, end });
3875        }
3876    }
3877    
3878    if ranges.is_empty() {
3879        None
3880    } else {
3881        Some(ranges)
3882    }
3883}
3884
3885// Helper function to extract unicode ranges (unused, kept for reference)
3886#[cfg(feature = "parsing")]
3887#[allow(dead_code)]
3888fn extract_unicode_ranges(os2_table: &Os2) -> Vec<UnicodeRange> {
3889    let mut unicode_ranges = Vec::new();
3890
3891    // Process the 4 Unicode range bitfields from OS/2 table
3892    let ranges = [
3893        os2_table.ul_unicode_range1,
3894        os2_table.ul_unicode_range2,
3895        os2_table.ul_unicode_range3,
3896        os2_table.ul_unicode_range4,
3897    ];
3898
3899    // Unicode range bit positions to actual ranges
3900    // Based on OpenType spec
3901    let range_mappings = [
3902        (0, 0x0000, 0x007F),  // Basic Latin
3903        (1, 0x0080, 0x00FF),  // Latin-1 Supplement
3904        (2, 0x0100, 0x017F),  // Latin Extended-A
3905        (7, 0x0370, 0x03FF),  // Greek and Coptic
3906        (9, 0x0400, 0x04FF),  // Cyrillic
3907        (29, 0x2000, 0x206F), // General Punctuation
3908        (57, 0x4E00, 0x9FFF), // CJK Unified Ideographs
3909                              // Add more ranges as needed
3910    ];
3911
3912    for (bit, start, end) in &range_mappings {
3913        let range_idx = bit / 32;
3914        let bit_pos = bit % 32;
3915
3916        if range_idx < 4 && (ranges[range_idx] & (1 << bit_pos)) != 0 {
3917            unicode_ranges.push(UnicodeRange {
3918                start: *start,
3919                end: *end,
3920            });
3921        }
3922    }
3923
3924    unicode_ranges
3925}
3926
3927// Helper function to detect if a font is monospace
3928#[cfg(feature = "parsing")]
3929#[allow(dead_code)]
3930fn detect_monospace(
3931    provider: &impl FontTableProvider,
3932    os2_table: &Os2,
3933    detected_monospace: Option<bool>,
3934) -> Option<bool> {
3935    if let Some(is_monospace) = detected_monospace {
3936        return Some(is_monospace);
3937    }
3938
3939    // Try using PANOSE classification
3940    if os2_table.panose[0] == 2 {
3941        // 2 = Latin Text
3942        return Some(os2_table.panose[3] == 9); // 9 = Monospaced
3943    }
3944
3945    // Check glyph widths in hmtx table
3946    let hhea_data = provider.table_data(tag::HHEA).ok()??;
3947    let hhea_table = ReadScope::new(&hhea_data).read::<HheaTable>().ok()?;
3948    let maxp_data = provider.table_data(tag::MAXP).ok()??;
3949    let maxp_table = ReadScope::new(&maxp_data).read::<MaxpTable>().ok()?;
3950    let hmtx_data = provider.table_data(tag::HMTX).ok()??;
3951    let hmtx_table = ReadScope::new(&hmtx_data)
3952        .read_dep::<HmtxTable<'_>>((
3953            usize::from(maxp_table.num_glyphs),
3954            usize::from(hhea_table.num_h_metrics),
3955        ))
3956        .ok()?;
3957
3958    let mut monospace = true;
3959    let mut last_advance = 0;
3960
3961    // Check if all advance widths are the same
3962    for i in 0..hhea_table.num_h_metrics as usize {
3963        let advance = hmtx_table.h_metrics.read_item(i).ok()?.advance_width;
3964        if i > 0 && advance != last_advance {
3965            monospace = false;
3966            break;
3967        }
3968        last_advance = advance;
3969    }
3970
3971    Some(monospace)
3972}