Skip to main content

rust_fontconfig/
lib.rs

1//! # rust-fontconfig
2//!
3//! Pure-Rust rewrite of the Linux fontconfig library (no system dependencies) - using allsorts as a font parser to support `.woff`, `.woff2`, `.ttc`, `.otf` and `.ttf`
4//!
5//! **NOTE**: Also works on Windows, macOS and WASM - without external dependencies!
6//!
7//! ## Usage
8//!
9//! ### Basic Font Query
10//!
11//! ```rust,no_run
12//! use rust_fontconfig::{FcFontCache, FcPattern};
13//!
14//! fn main() {
15//!     // Build the font cache
16//!     let cache = FcFontCache::build();
17//!
18//!     // Query a font by name
19//!     let results = cache.query(
20//!         &FcPattern {
21//!             name: Some(String::from("Arial")),
22//!             ..Default::default()
23//!         },
24//!         &mut Vec::new() // Trace messages container
25//!     );
26//!
27//!     if let Some(font_match) = results {
28//!         println!("Font match ID: {:?}", font_match.id);
29//!         println!("Font unicode ranges: {:?}", font_match.unicode_ranges);
30//!     } else {
31//!         println!("No matching font found");
32//!     }
33//! }
34//! ```
35//!
36//! ### Resolve Font Chain and Query for Text
37//!
38//! ```rust,no_run
39//! use rust_fontconfig::{FcFontCache, FcWeight, PatternMatch};
40//!
41//! fn main() {
42//!     # #[cfg(feature = "std")]
43//!     # {
44//!     let cache = FcFontCache::build();
45//!
46//!     // Build font fallback chain (without text parameter)
47//!     let font_chain = cache.resolve_font_chain(
48//!         &["Arial".to_string(), "sans-serif".to_string()],
49//!         FcWeight::Normal,
50//!         PatternMatch::DontCare,
51//!         PatternMatch::DontCare,
52//!         &mut Vec::new(),
53//!     );
54//!
55//!     // Query which fonts to use for specific text
56//!     let text = "Hello 你好 Здравствуйте";
57//!     let font_runs = font_chain.query_for_text(&cache, text);
58//!
59//!     println!("Text split into {} font runs:", font_runs.len());
60//!     for run in font_runs {
61//!         println!("  '{}' -> font {:?}", run.text, run.font_id);
62//!     }
63//!     # }
64//! }
65//! ```
66
67#![allow(non_snake_case)]
68#![cfg_attr(not(feature = "std"), no_std)]
69
70extern crate alloc;
71
72#[cfg(all(feature = "std", feature = "parsing"))]
73use alloc::borrow::ToOwned;
74use alloc::collections::btree_map::BTreeMap;
75use alloc::string::{String, ToString};
76use alloc::vec::Vec;
77use alloc::{format, vec};
78#[cfg(all(feature = "std", feature = "parsing"))]
79use allsorts::binary::read::ReadScope;
80#[cfg(all(feature = "std", feature = "parsing"))]
81use allsorts::get_name::fontcode_get_name;
82#[cfg(all(feature = "std", feature = "parsing"))]
83use allsorts::tables::os2::Os2;
84#[cfg(all(feature = "std", feature = "parsing"))]
85use allsorts::tables::{FontTableProvider, HheaTable, HmtxTable, MaxpTable};
86#[cfg(all(feature = "std", feature = "parsing"))]
87use allsorts::tag;
88#[cfg(feature = "std")]
89use std::path::PathBuf;
90
91pub mod utils;
92#[cfg(feature = "std")]
93pub mod config;
94
95#[cfg(feature = "ffi")]
96pub mod ffi;
97
98#[cfg(feature = "async-registry")]
99pub mod scoring;
100#[cfg(feature = "async-registry")]
101pub mod registry;
102#[cfg(feature = "async-registry")]
103pub mod multithread;
104#[cfg(feature = "cache")]
105pub mod disk_cache;
106
107/// Operating system type for generic font family resolution
108#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash)]
109pub enum OperatingSystem {
110    Windows,
111    Linux,
112    MacOS,
113    Wasm,
114}
115
116impl OperatingSystem {
117    /// Detect the current operating system at compile time
118    pub fn current() -> Self {
119        #[cfg(target_os = "windows")]
120        return OperatingSystem::Windows;
121        
122        #[cfg(target_os = "linux")]
123        return OperatingSystem::Linux;
124        
125        #[cfg(target_os = "macos")]
126        return OperatingSystem::MacOS;
127        
128        #[cfg(target_family = "wasm")]
129        return OperatingSystem::Wasm;
130        
131        #[cfg(not(any(target_os = "windows", target_os = "linux", target_os = "macos", target_family = "wasm")))]
132        return OperatingSystem::Linux; // Default fallback
133    }
134    
135    /// Get system-specific fonts for the "serif" generic family
136    /// Prioritizes fonts based on Unicode range coverage
137    pub fn get_serif_fonts(&self, unicode_ranges: &[UnicodeRange]) -> Vec<String> {
138        let has_cjk = has_cjk_ranges(unicode_ranges);
139        let has_arabic = has_arabic_ranges(unicode_ranges);
140        let _has_cyrillic = has_cyrillic_ranges(unicode_ranges);
141        
142        match self {
143            OperatingSystem::Windows => {
144                let mut fonts = Vec::new();
145                if has_cjk {
146                    fonts.extend_from_slice(&["MS Mincho", "SimSun", "MingLiU"]);
147                }
148                if has_arabic {
149                    fonts.push("Traditional Arabic");
150                }
151                fonts.push("Times New Roman");
152                fonts.iter().map(|s| s.to_string()).collect()
153            }
154            OperatingSystem::Linux => {
155                let mut fonts = Vec::new();
156                if has_cjk {
157                    fonts.extend_from_slice(&["Noto Serif CJK SC", "Noto Serif CJK JP", "Noto Serif CJK KR"]);
158                }
159                if has_arabic {
160                    fonts.push("Noto Serif Arabic");
161                }
162                fonts.extend_from_slice(&[
163                    "Times", "Times New Roman", "DejaVu Serif", "Free Serif", 
164                    "Noto Serif", "Bitstream Vera Serif", "Roman", "Regular"
165                ]);
166                fonts.iter().map(|s| s.to_string()).collect()
167            }
168            OperatingSystem::MacOS => {
169                let mut fonts = Vec::new();
170                if has_cjk {
171                    fonts.extend_from_slice(&["Hiragino Mincho ProN", "STSong", "AppleMyungjo"]);
172                }
173                if has_arabic {
174                    fonts.push("Geeza Pro");
175                }
176                fonts.extend_from_slice(&["Times", "New York", "Palatino"]);
177                fonts.iter().map(|s| s.to_string()).collect()
178            }
179            OperatingSystem::Wasm => Vec::new(),
180        }
181    }
182    
183    /// Get system-specific fonts for the "sans-serif" generic family
184    /// Prioritizes fonts based on Unicode range coverage
185    pub fn get_sans_serif_fonts(&self, unicode_ranges: &[UnicodeRange]) -> Vec<String> {
186        let has_cjk = has_cjk_ranges(unicode_ranges);
187        let has_arabic = has_arabic_ranges(unicode_ranges);
188        let _has_cyrillic = has_cyrillic_ranges(unicode_ranges);
189        let has_hebrew = has_hebrew_ranges(unicode_ranges);
190        let has_thai = has_thai_ranges(unicode_ranges);
191        
192        match self {
193            OperatingSystem::Windows => {
194                let mut fonts = Vec::new();
195                if has_cjk {
196                    fonts.extend_from_slice(&["Microsoft YaHei", "MS Gothic", "Malgun Gothic", "SimHei"]);
197                }
198                if has_arabic {
199                    fonts.push("Segoe UI Arabic");
200                }
201                if has_hebrew {
202                    fonts.push("Segoe UI Hebrew");
203                }
204                if has_thai {
205                    fonts.push("Leelawadee UI");
206                }
207                fonts.extend_from_slice(&["Segoe UI", "Tahoma", "Microsoft Sans Serif", "MS Sans Serif", "Helv"]);
208                fonts.iter().map(|s| s.to_string()).collect()
209            }
210            OperatingSystem::Linux => {
211                let mut fonts = Vec::new();
212                if has_cjk {
213                    fonts.extend_from_slice(&[
214                        "Noto Sans CJK SC", "Noto Sans CJK JP", "Noto Sans CJK KR",
215                        "WenQuanYi Micro Hei", "Droid Sans Fallback"
216                    ]);
217                }
218                if has_arabic {
219                    fonts.push("Noto Sans Arabic");
220                }
221                if has_hebrew {
222                    fonts.push("Noto Sans Hebrew");
223                }
224                if has_thai {
225                    fonts.push("Noto Sans Thai");
226                }
227                fonts.extend_from_slice(&["Ubuntu", "Arial", "DejaVu Sans", "Noto Sans", "Liberation Sans"]);
228                fonts.iter().map(|s| s.to_string()).collect()
229            }
230            OperatingSystem::MacOS => {
231                let mut fonts = Vec::new();
232                if has_cjk {
233                    fonts.extend_from_slice(&[
234                        "Hiragino Sans", "Hiragino Kaku Gothic ProN", 
235                        "PingFang SC", "PingFang TC", "Apple SD Gothic Neo"
236                    ]);
237                }
238                if has_arabic {
239                    fonts.push("Geeza Pro");
240                }
241                if has_hebrew {
242                    fonts.push("Arial Hebrew");
243                }
244                if has_thai {
245                    fonts.push("Thonburi");
246                }
247                fonts.extend_from_slice(&["San Francisco", "Helvetica Neue", "Lucida Grande"]);
248                fonts.iter().map(|s| s.to_string()).collect()
249            }
250            OperatingSystem::Wasm => Vec::new(),
251        }
252    }
253    
254    /// Get system-specific fonts for the "monospace" generic family
255    /// Prioritizes fonts based on Unicode range coverage
256    pub fn get_monospace_fonts(&self, unicode_ranges: &[UnicodeRange]) -> Vec<String> {
257        let has_cjk = has_cjk_ranges(unicode_ranges);
258        
259        match self {
260            OperatingSystem::Windows => {
261                let mut fonts = Vec::new();
262                if has_cjk {
263                    fonts.extend_from_slice(&["MS Gothic", "SimHei"]);
264                }
265                fonts.extend_from_slice(&["Segoe UI Mono", "Courier New", "Cascadia Code", "Cascadia Mono", "Consolas"]);
266                fonts.iter().map(|s| s.to_string()).collect()
267            }
268            OperatingSystem::Linux => {
269                let mut fonts = Vec::new();
270                if has_cjk {
271                    fonts.extend_from_slice(&["Noto Sans Mono CJK SC", "Noto Sans Mono CJK JP", "WenQuanYi Zen Hei Mono"]);
272                }
273                fonts.extend_from_slice(&[
274                    "Source Code Pro", "Cantarell", "DejaVu Sans Mono", 
275                    "Roboto Mono", "Ubuntu Monospace", "Droid Sans Mono"
276                ]);
277                fonts.iter().map(|s| s.to_string()).collect()
278            }
279            OperatingSystem::MacOS => {
280                let mut fonts = Vec::new();
281                if has_cjk {
282                    fonts.extend_from_slice(&["Hiragino Sans", "PingFang SC"]);
283                }
284                fonts.extend_from_slice(&["SF Mono", "Menlo", "Monaco", "Courier", "Oxygen Mono", "Source Code Pro", "Fira Mono"]);
285                fonts.iter().map(|s| s.to_string()).collect()
286            }
287            OperatingSystem::Wasm => Vec::new(),
288        }
289    }
290    
291    /// Expand a generic CSS font family to system-specific font names
292    /// Returns the original name if not a generic family
293    /// Prioritizes fonts based on Unicode range coverage
294    pub fn expand_generic_family(&self, family: &str, unicode_ranges: &[UnicodeRange]) -> Vec<String> {
295        match family.to_lowercase().as_str() {
296            "serif" => self.get_serif_fonts(unicode_ranges),
297            "sans-serif" => self.get_sans_serif_fonts(unicode_ranges),
298            "monospace" => self.get_monospace_fonts(unicode_ranges),
299            "cursive" | "fantasy" | "system-ui" => {
300                // Use sans-serif as fallback for these
301                self.get_sans_serif_fonts(unicode_ranges)
302            }
303            _ => vec![family.to_string()],
304        }
305    }
306}
307
308/// Expand a CSS font-family stack with generic families resolved to OS-specific fonts
309/// Prioritizes fonts based on Unicode range coverage
310/// Example: ["Arial", "sans-serif"] on macOS with CJK ranges -> ["Arial", "PingFang SC", "Hiragino Sans", ...]
311pub fn expand_font_families(families: &[String], os: OperatingSystem, unicode_ranges: &[UnicodeRange]) -> Vec<String> {
312    let mut expanded = Vec::new();
313    
314    for family in families {
315        expanded.extend(os.expand_generic_family(family, unicode_ranges));
316    }
317    
318    expanded
319}
320
321/// UUID to identify a font (collections are broken up into separate fonts)
322#[derive(Clone, Copy, PartialOrd, Ord, PartialEq, Eq, Hash)]
323#[cfg_attr(feature = "cache", derive(serde::Serialize, serde::Deserialize))]
324pub struct FontId(pub u128);
325
326impl core::fmt::Debug for FontId {
327    fn fmt(&self, f: &mut core::fmt::Formatter<'_>) -> core::fmt::Result {
328        core::fmt::Display::fmt(self, f)
329    }
330}
331
332impl core::fmt::Display for FontId {
333    fn fmt(&self, f: &mut core::fmt::Formatter<'_>) -> core::fmt::Result {
334        let id = self.0;
335        write!(
336            f,
337            "{:08x}-{:04x}-{:04x}-{:04x}-{:012x}",
338            (id >> 96) & 0xFFFFFFFF,
339            (id >> 80) & 0xFFFF,
340            (id >> 64) & 0xFFFF,
341            (id >> 48) & 0xFFFF,
342            id & 0xFFFFFFFFFFFF
343        )
344    }
345}
346
347impl FontId {
348    /// Generate a new unique FontId using an atomic counter
349    pub fn new() -> Self {
350        use core::sync::atomic::{AtomicU64, Ordering};
351        static COUNTER: AtomicU64 = AtomicU64::new(1);
352        let id = COUNTER.fetch_add(1, Ordering::Relaxed) as u128;
353        FontId(id)
354    }
355}
356
357/// Whether a field is required to match (yes / no / don't care)
358#[derive(Debug, Default, Copy, Clone, PartialOrd, Ord, PartialEq, Eq, Hash)]
359#[cfg_attr(feature = "cache", derive(serde::Serialize, serde::Deserialize))]
360#[repr(C)]
361pub enum PatternMatch {
362    /// Default: don't particularly care whether the requirement matches
363    #[default]
364    DontCare,
365    /// Requirement has to be true for the selected font
366    True,
367    /// Requirement has to be false for the selected font
368    False,
369}
370
371impl PatternMatch {
372    fn needs_to_match(&self) -> bool {
373        matches!(self, PatternMatch::True | PatternMatch::False)
374    }
375
376    fn matches(&self, other: &PatternMatch) -> bool {
377        match (self, other) {
378            (PatternMatch::DontCare, _) => true,
379            (_, PatternMatch::DontCare) => true,
380            (a, b) => a == b,
381        }
382    }
383}
384
385/// Font weight values as defined in CSS specification
386#[derive(Debug, Clone, Copy, PartialOrd, Ord, PartialEq, Eq, Hash)]
387#[cfg_attr(feature = "cache", derive(serde::Serialize, serde::Deserialize))]
388#[repr(C)]
389pub enum FcWeight {
390    Thin = 100,
391    ExtraLight = 200,
392    Light = 300,
393    Normal = 400,
394    Medium = 500,
395    SemiBold = 600,
396    Bold = 700,
397    ExtraBold = 800,
398    Black = 900,
399}
400
401impl FcWeight {
402    pub fn from_u16(weight: u16) -> Self {
403        match weight {
404            0..=149 => FcWeight::Thin,
405            150..=249 => FcWeight::ExtraLight,
406            250..=349 => FcWeight::Light,
407            350..=449 => FcWeight::Normal,
408            450..=549 => FcWeight::Medium,
409            550..=649 => FcWeight::SemiBold,
410            650..=749 => FcWeight::Bold,
411            750..=849 => FcWeight::ExtraBold,
412            _ => FcWeight::Black,
413        }
414    }
415
416    pub fn find_best_match(&self, available: &[FcWeight]) -> Option<FcWeight> {
417        if available.is_empty() {
418            return None;
419        }
420
421        // Exact match
422        if available.contains(self) {
423            return Some(*self);
424        }
425
426        // Get numeric value
427        let self_value = *self as u16;
428
429        match *self {
430            FcWeight::Normal => {
431                // For Normal (400), try Medium (500) first
432                if available.contains(&FcWeight::Medium) {
433                    return Some(FcWeight::Medium);
434                }
435                // Then try lighter weights
436                for weight in &[FcWeight::Light, FcWeight::ExtraLight, FcWeight::Thin] {
437                    if available.contains(weight) {
438                        return Some(*weight);
439                    }
440                }
441                // Last, try heavier weights
442                for weight in &[
443                    FcWeight::SemiBold,
444                    FcWeight::Bold,
445                    FcWeight::ExtraBold,
446                    FcWeight::Black,
447                ] {
448                    if available.contains(weight) {
449                        return Some(*weight);
450                    }
451                }
452            }
453            FcWeight::Medium => {
454                // For Medium (500), try Normal (400) first
455                if available.contains(&FcWeight::Normal) {
456                    return Some(FcWeight::Normal);
457                }
458                // Then try lighter weights
459                for weight in &[FcWeight::Light, FcWeight::ExtraLight, FcWeight::Thin] {
460                    if available.contains(weight) {
461                        return Some(*weight);
462                    }
463                }
464                // Last, try heavier weights
465                for weight in &[
466                    FcWeight::SemiBold,
467                    FcWeight::Bold,
468                    FcWeight::ExtraBold,
469                    FcWeight::Black,
470                ] {
471                    if available.contains(weight) {
472                        return Some(*weight);
473                    }
474                }
475            }
476            FcWeight::Thin | FcWeight::ExtraLight | FcWeight::Light => {
477                // For lightweight fonts (<400), first try lighter or equal weights
478                let mut best_match = None;
479                let mut smallest_diff = u16::MAX;
480
481                // Find the closest lighter weight
482                for weight in available {
483                    let weight_value = *weight as u16;
484                    // Only consider weights <= self (per test expectation)
485                    if weight_value <= self_value {
486                        let diff = self_value - weight_value;
487                        if diff < smallest_diff {
488                            smallest_diff = diff;
489                            best_match = Some(*weight);
490                        }
491                    }
492                }
493
494                if best_match.is_some() {
495                    return best_match;
496                }
497
498                // If no lighter weight, find the closest heavier weight
499                best_match = None;
500                smallest_diff = u16::MAX;
501
502                for weight in available {
503                    let weight_value = *weight as u16;
504                    if weight_value > self_value {
505                        let diff = weight_value - self_value;
506                        if diff < smallest_diff {
507                            smallest_diff = diff;
508                            best_match = Some(*weight);
509                        }
510                    }
511                }
512
513                return best_match;
514            }
515            FcWeight::SemiBold | FcWeight::Bold | FcWeight::ExtraBold | FcWeight::Black => {
516                // For heavyweight fonts (>500), first try heavier or equal weights
517                let mut best_match = None;
518                let mut smallest_diff = u16::MAX;
519
520                // Find the closest heavier weight
521                for weight in available {
522                    let weight_value = *weight as u16;
523                    // Only consider weights >= self
524                    if weight_value >= self_value {
525                        let diff = weight_value - self_value;
526                        if diff < smallest_diff {
527                            smallest_diff = diff;
528                            best_match = Some(*weight);
529                        }
530                    }
531                }
532
533                if best_match.is_some() {
534                    return best_match;
535                }
536
537                // If no heavier weight, find the closest lighter weight
538                best_match = None;
539                smallest_diff = u16::MAX;
540
541                for weight in available {
542                    let weight_value = *weight as u16;
543                    if weight_value < self_value {
544                        let diff = self_value - weight_value;
545                        if diff < smallest_diff {
546                            smallest_diff = diff;
547                            best_match = Some(*weight);
548                        }
549                    }
550                }
551
552                return best_match;
553            }
554        }
555
556        // If nothing matches by now, return the first available weight
557        Some(available[0])
558    }
559}
560
561impl Default for FcWeight {
562    fn default() -> Self {
563        FcWeight::Normal
564    }
565}
566
567/// CSS font-stretch values
568#[derive(Debug, Clone, Copy, PartialOrd, Ord, PartialEq, Eq, Hash)]
569#[cfg_attr(feature = "cache", derive(serde::Serialize, serde::Deserialize))]
570#[repr(C)]
571pub enum FcStretch {
572    UltraCondensed = 1,
573    ExtraCondensed = 2,
574    Condensed = 3,
575    SemiCondensed = 4,
576    Normal = 5,
577    SemiExpanded = 6,
578    Expanded = 7,
579    ExtraExpanded = 8,
580    UltraExpanded = 9,
581}
582
583impl FcStretch {
584    pub fn is_condensed(&self) -> bool {
585        use self::FcStretch::*;
586        match self {
587            UltraCondensed => true,
588            ExtraCondensed => true,
589            Condensed => true,
590            SemiCondensed => true,
591            Normal => false,
592            SemiExpanded => false,
593            Expanded => false,
594            ExtraExpanded => false,
595            UltraExpanded => false,
596        }
597    }
598    pub fn from_u16(width_class: u16) -> Self {
599        match width_class {
600            1 => FcStretch::UltraCondensed,
601            2 => FcStretch::ExtraCondensed,
602            3 => FcStretch::Condensed,
603            4 => FcStretch::SemiCondensed,
604            5 => FcStretch::Normal,
605            6 => FcStretch::SemiExpanded,
606            7 => FcStretch::Expanded,
607            8 => FcStretch::ExtraExpanded,
608            9 => FcStretch::UltraExpanded,
609            _ => FcStretch::Normal,
610        }
611    }
612
613    /// Follows CSS spec for stretch matching
614    pub fn find_best_match(&self, available: &[FcStretch]) -> Option<FcStretch> {
615        if available.is_empty() {
616            return None;
617        }
618
619        if available.contains(self) {
620            return Some(*self);
621        }
622
623        // For 'normal' or condensed values, narrower widths are checked first, then wider values
624        if *self <= FcStretch::Normal {
625            // Find narrower values first
626            let mut closest_narrower = None;
627            for stretch in available.iter() {
628                if *stretch < *self
629                    && (closest_narrower.is_none() || *stretch > closest_narrower.unwrap())
630                {
631                    closest_narrower = Some(*stretch);
632                }
633            }
634
635            if closest_narrower.is_some() {
636                return closest_narrower;
637            }
638
639            // Otherwise, find wider values
640            let mut closest_wider = None;
641            for stretch in available.iter() {
642                if *stretch > *self
643                    && (closest_wider.is_none() || *stretch < closest_wider.unwrap())
644                {
645                    closest_wider = Some(*stretch);
646                }
647            }
648
649            return closest_wider;
650        } else {
651            // For expanded values, wider values are checked first, then narrower values
652            let mut closest_wider = None;
653            for stretch in available.iter() {
654                if *stretch > *self
655                    && (closest_wider.is_none() || *stretch < closest_wider.unwrap())
656                {
657                    closest_wider = Some(*stretch);
658                }
659            }
660
661            if closest_wider.is_some() {
662                return closest_wider;
663            }
664
665            // Otherwise, find narrower values
666            let mut closest_narrower = None;
667            for stretch in available.iter() {
668                if *stretch < *self
669                    && (closest_narrower.is_none() || *stretch > closest_narrower.unwrap())
670                {
671                    closest_narrower = Some(*stretch);
672                }
673            }
674
675            return closest_narrower;
676        }
677    }
678}
679
680impl Default for FcStretch {
681    fn default() -> Self {
682        FcStretch::Normal
683    }
684}
685
686/// Unicode range representation for font matching
687#[repr(C)]
688#[derive(Debug, Clone, Copy, PartialEq, Eq, PartialOrd, Ord, Hash)]
689#[cfg_attr(feature = "cache", derive(serde::Serialize, serde::Deserialize))]
690pub struct UnicodeRange {
691    pub start: u32,
692    pub end: u32,
693}
694
695impl UnicodeRange {
696    pub fn contains(&self, c: char) -> bool {
697        let c = c as u32;
698        c >= self.start && c <= self.end
699    }
700
701    pub fn overlaps(&self, other: &UnicodeRange) -> bool {
702        self.start <= other.end && other.start <= self.end
703    }
704
705    pub fn is_subset_of(&self, other: &UnicodeRange) -> bool {
706        self.start >= other.start && self.end <= other.end
707    }
708}
709
710/// Check if any range covers CJK Unified Ideographs, Hiragana, Katakana, or Hangul
711pub fn has_cjk_ranges(ranges: &[UnicodeRange]) -> bool {
712    ranges.iter().any(|r| {
713        (r.start >= 0x4E00 && r.start <= 0x9FFF) ||
714        (r.start >= 0x3040 && r.start <= 0x309F) ||
715        (r.start >= 0x30A0 && r.start <= 0x30FF) ||
716        (r.start >= 0xAC00 && r.start <= 0xD7AF)
717    })
718}
719
720/// Check if any range covers the Arabic block
721pub fn has_arabic_ranges(ranges: &[UnicodeRange]) -> bool {
722    ranges.iter().any(|r| r.start >= 0x0600 && r.start <= 0x06FF)
723}
724
725/// Check if any range covers the Cyrillic block
726pub fn has_cyrillic_ranges(ranges: &[UnicodeRange]) -> bool {
727    ranges.iter().any(|r| r.start >= 0x0400 && r.start <= 0x04FF)
728}
729
730/// Check if any range covers the Hebrew block
731pub fn has_hebrew_ranges(ranges: &[UnicodeRange]) -> bool {
732    ranges.iter().any(|r| r.start >= 0x0590 && r.start <= 0x05FF)
733}
734
735/// Check if any range covers the Thai block
736pub fn has_thai_ranges(ranges: &[UnicodeRange]) -> bool {
737    ranges.iter().any(|r| r.start >= 0x0E00 && r.start <= 0x0E7F)
738}
739
740/// Log levels for trace messages
741#[derive(Debug, Clone, Copy, PartialOrd, Ord, PartialEq, Eq, Hash)]
742pub enum TraceLevel {
743    Debug,
744    Info,
745    Warning,
746    Error,
747}
748
749/// Reason for font matching failure or success
750#[derive(Debug, Clone, PartialEq, Eq, Hash)]
751pub enum MatchReason {
752    NameMismatch {
753        requested: Option<String>,
754        found: Option<String>,
755    },
756    FamilyMismatch {
757        requested: Option<String>,
758        found: Option<String>,
759    },
760    StyleMismatch {
761        property: &'static str,
762        requested: String,
763        found: String,
764    },
765    WeightMismatch {
766        requested: FcWeight,
767        found: FcWeight,
768    },
769    StretchMismatch {
770        requested: FcStretch,
771        found: FcStretch,
772    },
773    UnicodeRangeMismatch {
774        character: char,
775        ranges: Vec<UnicodeRange>,
776    },
777    Success,
778}
779
780/// Trace message for debugging font matching
781#[derive(Debug, Clone, PartialEq, Eq)]
782pub struct TraceMsg {
783    pub level: TraceLevel,
784    pub path: String,
785    pub reason: MatchReason,
786}
787
788/// Hinting style for font rendering.
789#[repr(C)]
790#[derive(Debug, Default, Clone, Copy, PartialEq, Eq, PartialOrd, Ord, Hash)]
791#[cfg_attr(feature = "cache", derive(serde::Serialize, serde::Deserialize))]
792pub enum FcHintStyle {
793    #[default]
794    None = 0,
795    Slight = 1,
796    Medium = 2,
797    Full = 3,
798}
799
800/// Subpixel rendering order.
801#[repr(C)]
802#[derive(Debug, Default, Clone, Copy, PartialEq, Eq, PartialOrd, Ord, Hash)]
803#[cfg_attr(feature = "cache", derive(serde::Serialize, serde::Deserialize))]
804pub enum FcRgba {
805    #[default]
806    Unknown = 0,
807    Rgb = 1,
808    Bgr = 2,
809    Vrgb = 3,
810    Vbgr = 4,
811    None = 5,
812}
813
814/// LCD filter mode for subpixel rendering.
815#[repr(C)]
816#[derive(Debug, Default, Clone, Copy, PartialEq, Eq, PartialOrd, Ord, Hash)]
817#[cfg_attr(feature = "cache", derive(serde::Serialize, serde::Deserialize))]
818pub enum FcLcdFilter {
819    #[default]
820    None = 0,
821    Default = 1,
822    Light = 2,
823    Legacy = 3,
824}
825
826/// Per-font rendering configuration from system font config (Linux fonts.conf).
827///
828/// All fields are `Option<T>` -- `None` means "use system default".
829/// On non-Linux platforms, this is always all-None (no per-font overrides).
830#[derive(Debug, Default, Clone, PartialEq, PartialOrd)]
831#[cfg_attr(feature = "cache", derive(serde::Serialize, serde::Deserialize))]
832pub struct FcFontRenderConfig {
833    pub antialias: Option<bool>,
834    pub hinting: Option<bool>,
835    pub hintstyle: Option<FcHintStyle>,
836    pub autohint: Option<bool>,
837    pub rgba: Option<FcRgba>,
838    pub lcdfilter: Option<FcLcdFilter>,
839    pub embeddedbitmap: Option<bool>,
840    pub embolden: Option<bool>,
841    pub dpi: Option<f64>,
842    pub scale: Option<f64>,
843    pub minspace: Option<bool>,
844}
845
846/// Helper newtype to provide Eq/Ord for Option<f64> via total-order bit comparison.
847/// This allows FcFontRenderConfig to be used inside FcPattern which derives Eq + Ord.
848impl Eq for FcFontRenderConfig {}
849
850impl Ord for FcFontRenderConfig {
851    fn cmp(&self, other: &Self) -> core::cmp::Ordering {
852        // Compare all non-f64 fields first
853        let ord = self.antialias.cmp(&other.antialias)
854            .then_with(|| self.hinting.cmp(&other.hinting))
855            .then_with(|| self.hintstyle.cmp(&other.hintstyle))
856            .then_with(|| self.autohint.cmp(&other.autohint))
857            .then_with(|| self.rgba.cmp(&other.rgba))
858            .then_with(|| self.lcdfilter.cmp(&other.lcdfilter))
859            .then_with(|| self.embeddedbitmap.cmp(&other.embeddedbitmap))
860            .then_with(|| self.embolden.cmp(&other.embolden))
861            .then_with(|| self.minspace.cmp(&other.minspace));
862
863        // For f64 fields, use to_bits() for total ordering
864        let ord = ord.then_with(|| {
865            let a = self.dpi.map(|v| v.to_bits());
866            let b = other.dpi.map(|v| v.to_bits());
867            a.cmp(&b)
868        });
869        ord.then_with(|| {
870            let a = self.scale.map(|v| v.to_bits());
871            let b = other.scale.map(|v| v.to_bits());
872            a.cmp(&b)
873        })
874    }
875}
876
877/// Font pattern for matching
878#[derive(Default, Clone, PartialOrd, Ord, PartialEq, Eq)]
879#[cfg_attr(feature = "cache", derive(serde::Serialize, serde::Deserialize))]
880#[repr(C)]
881pub struct FcPattern {
882    // font name
883    pub name: Option<String>,
884    // family name
885    pub family: Option<String>,
886    // "italic" property
887    pub italic: PatternMatch,
888    // "oblique" property
889    pub oblique: PatternMatch,
890    // "bold" property
891    pub bold: PatternMatch,
892    // "monospace" property
893    pub monospace: PatternMatch,
894    // "condensed" property
895    pub condensed: PatternMatch,
896    // font weight
897    pub weight: FcWeight,
898    // font stretch
899    pub stretch: FcStretch,
900    // unicode ranges to match
901    pub unicode_ranges: Vec<UnicodeRange>,
902    // extended font metadata
903    pub metadata: FcFontMetadata,
904    // per-font rendering configuration (from system fonts.conf on Linux)
905    pub render_config: FcFontRenderConfig,
906}
907
908impl core::fmt::Debug for FcPattern {
909    fn fmt(&self, f: &mut core::fmt::Formatter<'_>) -> core::fmt::Result {
910        let mut d = f.debug_struct("FcPattern");
911
912        if let Some(name) = &self.name {
913            d.field("name", name);
914        }
915
916        if let Some(family) = &self.family {
917            d.field("family", family);
918        }
919
920        if self.italic != PatternMatch::DontCare {
921            d.field("italic", &self.italic);
922        }
923
924        if self.oblique != PatternMatch::DontCare {
925            d.field("oblique", &self.oblique);
926        }
927
928        if self.bold != PatternMatch::DontCare {
929            d.field("bold", &self.bold);
930        }
931
932        if self.monospace != PatternMatch::DontCare {
933            d.field("monospace", &self.monospace);
934        }
935
936        if self.condensed != PatternMatch::DontCare {
937            d.field("condensed", &self.condensed);
938        }
939
940        if self.weight != FcWeight::Normal {
941            d.field("weight", &self.weight);
942        }
943
944        if self.stretch != FcStretch::Normal {
945            d.field("stretch", &self.stretch);
946        }
947
948        if !self.unicode_ranges.is_empty() {
949            d.field("unicode_ranges", &self.unicode_ranges);
950        }
951
952        // Only show non-empty metadata fields
953        let empty_metadata = FcFontMetadata::default();
954        if self.metadata != empty_metadata {
955            d.field("metadata", &self.metadata);
956        }
957
958        // Only show render_config when it differs from default
959        let empty_render_config = FcFontRenderConfig::default();
960        if self.render_config != empty_render_config {
961            d.field("render_config", &self.render_config);
962        }
963
964        d.finish()
965    }
966}
967
968/// Font metadata from the OS/2 table
969#[derive(Debug, Default, Clone, PartialEq, Eq, PartialOrd, Ord)]
970#[cfg_attr(feature = "cache", derive(serde::Serialize, serde::Deserialize))]
971pub struct FcFontMetadata {
972    pub copyright: Option<String>,
973    pub designer: Option<String>,
974    pub designer_url: Option<String>,
975    pub font_family: Option<String>,
976    pub font_subfamily: Option<String>,
977    pub full_name: Option<String>,
978    pub id_description: Option<String>,
979    pub license: Option<String>,
980    pub license_url: Option<String>,
981    pub manufacturer: Option<String>,
982    pub manufacturer_url: Option<String>,
983    pub postscript_name: Option<String>,
984    pub preferred_family: Option<String>,
985    pub preferred_subfamily: Option<String>,
986    pub trademark: Option<String>,
987    pub unique_id: Option<String>,
988    pub version: Option<String>,
989}
990
991impl FcPattern {
992    /// Check if this pattern would match the given character
993    pub fn contains_char(&self, c: char) -> bool {
994        if self.unicode_ranges.is_empty() {
995            return true; // No ranges specified means match all characters
996        }
997
998        for range in &self.unicode_ranges {
999            if range.contains(c) {
1000                return true;
1001            }
1002        }
1003
1004        false
1005    }
1006}
1007
1008/// Font match result with UUID
1009#[derive(Debug, Clone, PartialEq, Eq)]
1010pub struct FontMatch {
1011    pub id: FontId,
1012    pub unicode_ranges: Vec<UnicodeRange>,
1013    pub fallbacks: Vec<FontMatchNoFallback>,
1014}
1015
1016/// Font match result with UUID (without fallback)
1017#[derive(Debug, Clone, PartialEq, Eq)]
1018pub struct FontMatchNoFallback {
1019    pub id: FontId,
1020    pub unicode_ranges: Vec<UnicodeRange>,
1021}
1022
1023/// A run of text that uses the same font
1024/// Returned by FontFallbackChain::query_for_text()
1025#[derive(Debug, Clone, PartialEq, Eq)]
1026pub struct ResolvedFontRun {
1027    /// The text content of this run
1028    pub text: String,
1029    /// Start byte index in the original text
1030    pub start_byte: usize,
1031    /// End byte index in the original text (exclusive)
1032    pub end_byte: usize,
1033    /// The font to use for this run (None if no font found)
1034    pub font_id: Option<FontId>,
1035    /// Which CSS font-family this came from
1036    pub css_source: String,
1037}
1038
1039/// Resolved font fallback chain for a CSS font-family stack
1040/// This represents the complete chain of fonts to use for rendering text
1041#[derive(Debug, Clone, PartialEq, Eq)]
1042pub struct FontFallbackChain {
1043    /// CSS-based fallbacks: Each CSS font expanded to its system fallbacks
1044    /// Example: ["NotoSansJP" -> [Hiragino Sans, PingFang SC], "sans-serif" -> [Helvetica]]
1045    pub css_fallbacks: Vec<CssFallbackGroup>,
1046    
1047    /// Unicode-based fallbacks: Fonts added to cover missing Unicode ranges
1048    /// Only populated if css_fallbacks don't cover all requested characters
1049    pub unicode_fallbacks: Vec<FontMatch>,
1050    
1051    /// The original CSS font-family stack that was requested
1052    pub original_stack: Vec<String>,
1053}
1054
1055impl FontFallbackChain {
1056    /// Resolve which font should be used for a specific character
1057    /// Returns (FontId, css_source_name) where css_source_name indicates which CSS font matched
1058    /// Returns None if no font in the chain can render this character
1059    pub fn resolve_char(&self, cache: &FcFontCache, ch: char) -> Option<(FontId, String)> {
1060        let codepoint = ch as u32;
1061
1062        // Check CSS fallbacks in order
1063        for group in &self.css_fallbacks {
1064            for font in &group.fonts {
1065                let Some(meta) = cache.get_metadata_by_id(&font.id) else { continue };
1066                if meta.unicode_ranges.is_empty() {
1067                    continue; // No range info — don't assume it covers everything
1068                }
1069                if meta.unicode_ranges.iter().any(|r| codepoint >= r.start && codepoint <= r.end) {
1070                    return Some((font.id, group.css_name.clone()));
1071                }
1072            }
1073        }
1074
1075        // Check Unicode fallbacks
1076        for font in &self.unicode_fallbacks {
1077            let Some(meta) = cache.get_metadata_by_id(&font.id) else { continue };
1078            if meta.unicode_ranges.iter().any(|r| codepoint >= r.start && codepoint <= r.end) {
1079                return Some((font.id, "(unicode-fallback)".to_string()));
1080            }
1081        }
1082
1083        None
1084    }
1085    
1086    /// Resolve all characters in a text string to their fonts
1087    /// Returns a vector of (character, FontId, css_source) tuples
1088    pub fn resolve_text(&self, cache: &FcFontCache, text: &str) -> Vec<(char, Option<(FontId, String)>)> {
1089        text.chars()
1090            .map(|ch| (ch, self.resolve_char(cache, ch)))
1091            .collect()
1092    }
1093    
1094    /// Query which fonts should be used for a text string, grouped by font
1095    /// Returns runs of consecutive characters that use the same font
1096    /// This is the main API for text shaping - call this to get font runs, then shape each run
1097    pub fn query_for_text(&self, cache: &FcFontCache, text: &str) -> Vec<ResolvedFontRun> {
1098        if text.is_empty() {
1099            return Vec::new();
1100        }
1101        
1102        let mut runs: Vec<ResolvedFontRun> = Vec::new();
1103        let mut current_font: Option<FontId> = None;
1104        let mut current_css_source: Option<String> = None;
1105        let mut current_start_byte: usize = 0;
1106        
1107        for (byte_idx, ch) in text.char_indices() {
1108            let resolved = self.resolve_char(cache, ch);
1109            let (font_id, css_source) = match &resolved {
1110                Some((id, source)) => (Some(*id), Some(source.clone())),
1111                None => (None, None),
1112            };
1113            
1114            // Check if we need to start a new run
1115            let font_changed = font_id != current_font;
1116            
1117            if font_changed && byte_idx > 0 {
1118                // Finalize the current run
1119                let run_text = &text[current_start_byte..byte_idx];
1120                runs.push(ResolvedFontRun {
1121                    text: run_text.to_string(),
1122                    start_byte: current_start_byte,
1123                    end_byte: byte_idx,
1124                    font_id: current_font,
1125                    css_source: current_css_source.clone().unwrap_or_default(),
1126                });
1127                current_start_byte = byte_idx;
1128            }
1129            
1130            current_font = font_id;
1131            current_css_source = css_source;
1132        }
1133        
1134        // Finalize the last run
1135        if current_start_byte < text.len() {
1136            let run_text = &text[current_start_byte..];
1137            runs.push(ResolvedFontRun {
1138                text: run_text.to_string(),
1139                start_byte: current_start_byte,
1140                end_byte: text.len(),
1141                font_id: current_font,
1142                css_source: current_css_source.unwrap_or_default(),
1143            });
1144        }
1145        
1146        runs
1147    }
1148}
1149
1150/// A group of fonts that are fallbacks for a single CSS font-family name
1151#[derive(Debug, Clone, PartialEq, Eq)]
1152pub struct CssFallbackGroup {
1153    /// The CSS font name (e.g., "NotoSansJP", "sans-serif")
1154    pub css_name: String,
1155    
1156    /// System fonts that match this CSS name
1157    /// First font in list is the best match
1158    pub fonts: Vec<FontMatch>,
1159}
1160
1161/// Cache key for font fallback chain queries
1162/// 
1163/// IMPORTANT: This key intentionally does NOT include unicode_ranges.
1164/// Font chains should be cached by CSS properties only, not by text content.
1165/// Different texts with the same CSS font-stack should share the same chain.
1166#[cfg(feature = "std")]
1167#[derive(Debug, Clone, PartialEq, Eq, Hash)]
1168pub(crate) struct FontChainCacheKey {
1169    /// CSS font stack (expanded to OS-specific fonts)
1170    pub(crate) font_families: Vec<String>,
1171    /// Font weight
1172    pub(crate) weight: FcWeight,
1173    /// Font style flags
1174    pub(crate) italic: PatternMatch,
1175    pub(crate) oblique: PatternMatch,
1176}
1177
1178/// Path to a font file
1179#[derive(Debug, Clone, PartialOrd, Ord, PartialEq, Eq)]
1180#[cfg_attr(feature = "cache", derive(serde::Serialize, serde::Deserialize))]
1181#[repr(C)]
1182pub struct FcFontPath {
1183    pub path: String,
1184    pub font_index: usize,
1185}
1186
1187/// In-memory font data
1188#[derive(Debug, Clone, PartialEq, Eq)]
1189#[repr(C)]
1190pub struct FcFont {
1191    pub bytes: Vec<u8>,
1192    pub font_index: usize,
1193    pub id: String, // For identification in tests
1194}
1195
1196/// Font source enum to represent either disk or memory fonts
1197#[derive(Debug, Clone)]
1198pub enum FontSource<'a> {
1199    /// Font loaded from memory
1200    Memory(&'a FcFont),
1201    /// Font loaded from disk
1202    Disk(&'a FcFontPath),
1203}
1204
1205/// A named font to be added to the font cache from memory.
1206/// This is the primary way to supply custom fonts to the application.
1207#[derive(Debug, Clone)]
1208pub struct NamedFont {
1209    /// Human-readable name for this font (e.g., "My Custom Font")
1210    pub name: String,
1211    /// The raw font file bytes (TTF, OTF, WOFF, WOFF2, TTC)
1212    pub bytes: Vec<u8>,
1213}
1214
1215impl NamedFont {
1216    /// Create a new named font from bytes
1217    pub fn new(name: impl Into<String>, bytes: Vec<u8>) -> Self {
1218        Self {
1219            name: name.into(),
1220            bytes,
1221        }
1222    }
1223}
1224
1225/// Font cache, initialized at startup
1226#[derive(Debug)]
1227pub struct FcFontCache {
1228    // Pattern to FontId mapping (query index)
1229    pub(crate) patterns: BTreeMap<FcPattern, FontId>,
1230    // On-disk font paths
1231    pub(crate) disk_fonts: BTreeMap<FontId, FcFontPath>,
1232    // In-memory fonts
1233    pub(crate) memory_fonts: BTreeMap<FontId, FcFont>,
1234    // Metadata cache (patterns stored by ID for quick lookup)
1235    pub(crate) metadata: BTreeMap<FontId, FcPattern>,
1236    // Token index: maps lowercase tokens ("noto", "sans", "jp") to sets of FontIds
1237    // This enables fast fuzzy search by intersecting token sets
1238    pub(crate) token_index: BTreeMap<String, alloc::collections::BTreeSet<FontId>>,
1239    // Pre-tokenized font names (lowercase): FontId -> Vec<lowercase tokens>
1240    // Avoids re-tokenization during fuzzy search
1241    pub(crate) font_tokens: BTreeMap<FontId, Vec<String>>,
1242    // Font fallback chain cache (CSS stack + unicode -> resolved chain)
1243    #[cfg(feature = "std")]
1244    pub(crate) chain_cache: std::sync::Mutex<std::collections::HashMap<FontChainCacheKey, FontFallbackChain>>,
1245}
1246
1247impl Clone for FcFontCache {
1248    fn clone(&self) -> Self {
1249        Self {
1250            patterns: self.patterns.clone(),
1251            disk_fonts: self.disk_fonts.clone(),
1252            memory_fonts: self.memory_fonts.clone(),
1253            metadata: self.metadata.clone(),
1254            token_index: self.token_index.clone(),
1255            font_tokens: self.font_tokens.clone(),
1256            #[cfg(feature = "std")]
1257            chain_cache: std::sync::Mutex::new(std::collections::HashMap::new()), // Empty cache for cloned instance
1258        }
1259    }
1260}
1261
1262impl Default for FcFontCache {
1263    fn default() -> Self {
1264        Self {
1265            patterns: BTreeMap::new(),
1266            disk_fonts: BTreeMap::new(),
1267            memory_fonts: BTreeMap::new(),
1268            metadata: BTreeMap::new(),
1269            token_index: BTreeMap::new(),
1270            font_tokens: BTreeMap::new(),
1271            #[cfg(feature = "std")]
1272            chain_cache: std::sync::Mutex::new(std::collections::HashMap::new()),
1273        }
1274    }
1275}
1276
1277impl FcFontCache {
1278    /// Helper method to add a font pattern to the token index
1279    pub(crate) fn index_pattern_tokens(&mut self, pattern: &FcPattern, id: FontId) {
1280        // Extract tokens from both name and family
1281        let mut all_tokens = Vec::new();
1282        
1283        if let Some(name) = &pattern.name {
1284            all_tokens.extend(Self::extract_font_name_tokens(name));
1285        }
1286        
1287        if let Some(family) = &pattern.family {
1288            all_tokens.extend(Self::extract_font_name_tokens(family));
1289        }
1290        
1291        // Convert tokens to lowercase and store them
1292        let tokens_lower: Vec<String> = all_tokens.iter().map(|t| t.to_lowercase()).collect();
1293        
1294        // Add each token (lowercase) to the index
1295        for token_lower in &tokens_lower {
1296            self.token_index
1297                .entry(token_lower.clone())
1298                .or_insert_with(alloc::collections::BTreeSet::new)
1299                .insert(id);
1300        }
1301        
1302        // Store pre-tokenized font name for fast lookup (no re-tokenization needed)
1303        self.font_tokens.insert(id, tokens_lower);
1304    }
1305
1306    /// Adds in-memory font files
1307    pub fn with_memory_fonts(&mut self, fonts: Vec<(FcPattern, FcFont)>) -> &mut Self {
1308        for (pattern, font) in fonts {
1309            let id = FontId::new();
1310            self.patterns.insert(pattern.clone(), id);
1311            self.metadata.insert(id, pattern.clone());
1312            self.memory_fonts.insert(id, font);
1313            self.index_pattern_tokens(&pattern, id);
1314        }
1315        self
1316    }
1317
1318    /// Adds a memory font with a specific ID (for testing)
1319    pub fn with_memory_font_with_id(
1320        &mut self,
1321        id: FontId,
1322        pattern: FcPattern,
1323        font: FcFont,
1324    ) -> &mut Self {
1325        self.patterns.insert(pattern.clone(), id);
1326        self.metadata.insert(id, pattern.clone());
1327        self.memory_fonts.insert(id, font);
1328        self.index_pattern_tokens(&pattern, id);
1329        self
1330    }
1331
1332    /// Get font data for a given font ID
1333    pub fn get_font_by_id<'a>(&'a self, id: &FontId) -> Option<FontSource<'a>> {
1334        // Check memory fonts first
1335        if let Some(font) = self.memory_fonts.get(id) {
1336            return Some(FontSource::Memory(font));
1337        }
1338        // Then check disk fonts
1339        if let Some(path) = self.disk_fonts.get(id) {
1340            return Some(FontSource::Disk(path));
1341        }
1342        None
1343    }
1344
1345    /// Get metadata directly from an ID
1346    pub fn get_metadata_by_id(&self, id: &FontId) -> Option<&FcPattern> {
1347        self.metadata.get(id)
1348    }
1349
1350    /// Get font bytes (either from disk or memory)
1351    #[cfg(feature = "std")]
1352    pub fn get_font_bytes(&self, id: &FontId) -> Option<Vec<u8>> {
1353        match self.get_font_by_id(id)? {
1354            FontSource::Memory(font) => {
1355                Some(font.bytes.clone())
1356            }
1357            FontSource::Disk(path) => {
1358                std::fs::read(&path.path).ok()
1359            }
1360        }
1361    }
1362
1363    /// Returns an empty font cache (no_std / no filesystem).
1364    #[cfg(not(feature = "std"))]
1365    pub fn build() -> Self { Self::default() }
1366
1367    /// Scans system font directories using filename heuristics (no allsorts).
1368    #[cfg(all(feature = "std", not(feature = "parsing")))]
1369    pub fn build() -> Self { Self::build_from_filenames() }
1370
1371    /// Scans and parses all system fonts via allsorts for full metadata.
1372    #[cfg(all(feature = "std", feature = "parsing"))]
1373    pub fn build() -> Self { Self::build_inner(None) }
1374
1375    /// Filename-only scan: discovers fonts on disk, guesses metadata from
1376    /// the filename using [`config::tokenize_font_stem`].
1377    #[cfg(all(feature = "std", not(feature = "parsing")))]
1378    fn build_from_filenames() -> Self {
1379        let mut cache = Self::default();
1380        for dir in crate::config::font_directories(OperatingSystem::current()) {
1381            for path in FcCollectFontFilesRecursive(dir) {
1382                let pattern = match pattern_from_filename(&path) {
1383                    Some(p) => p,
1384                    None => continue,
1385                };
1386                let id = FontId::new();
1387                cache.disk_fonts.insert(id, FcFontPath {
1388                    path: path.to_string_lossy().to_string(),
1389                    font_index: 0,
1390                });
1391                cache.index_pattern_tokens(&pattern, id);
1392                cache.metadata.insert(id, pattern.clone());
1393                cache.patterns.insert(pattern, id);
1394            }
1395        }
1396        cache
1397    }
1398    
1399    /// Builds a font cache with only specific font families (and their fallbacks).
1400    /// 
1401    /// This is a performance optimization for applications that know ahead of time
1402    /// which fonts they need. Instead of scanning all system fonts (which can be slow
1403    /// on systems with many fonts), only fonts matching the specified families are loaded.
1404    /// 
1405    /// Generic family names like "sans-serif", "serif", "monospace" are expanded
1406    /// to OS-specific font names (e.g., "sans-serif" on macOS becomes "Helvetica Neue", 
1407    /// "San Francisco", etc.).
1408    /// 
1409    /// **Note**: This will NOT automatically load fallback fonts for scripts not covered
1410    /// by the requested families. If you need Arabic, CJK, or emoji support, either:
1411    /// - Add those families explicitly to the filter
1412    /// - Use `with_memory_fonts()` to add bundled fonts
1413    /// - Use `build()` to load all system fonts
1414    /// 
1415    /// # Arguments
1416    /// * `families` - Font family names to load (e.g., ["Arial", "sans-serif"])
1417    /// 
1418    /// # Example
1419    /// ```ignore
1420    /// // Only load Arial and sans-serif fallback fonts
1421    /// let cache = FcFontCache::build_with_families(&["Arial", "sans-serif"]);
1422    /// ```
1423    #[cfg(all(feature = "std", feature = "parsing"))]
1424    pub fn build_with_families(families: &[impl AsRef<str>]) -> Self {
1425        // Expand generic families to OS-specific names
1426        let os = OperatingSystem::current();
1427        let mut target_families: Vec<String> = Vec::new();
1428        
1429        for family in families {
1430            let family_str = family.as_ref();
1431            let expanded = os.expand_generic_family(family_str, &[]);
1432            if expanded.is_empty() || (expanded.len() == 1 && expanded[0] == family_str) {
1433                target_families.push(family_str.to_string());
1434            } else {
1435                target_families.extend(expanded);
1436            }
1437        }
1438        
1439        Self::build_inner(Some(&target_families))
1440    }
1441    
1442    /// Inner build function that handles both filtered and unfiltered font loading.
1443    /// 
1444    /// # Arguments
1445    /// * `family_filter` - If Some, only load fonts matching these family names.
1446    ///                     If None, load all fonts.
1447    #[cfg(all(feature = "std", feature = "parsing"))]
1448    fn build_inner(family_filter: Option<&[String]>) -> Self {
1449        let mut cache = FcFontCache::default();
1450        
1451        // Normalize filter families for matching
1452        let filter_normalized: Option<Vec<String>> = family_filter.map(|families| {
1453            families
1454                .iter()
1455                .map(|f| crate::utils::normalize_family_name(f))
1456                .collect()
1457        });
1458
1459        // Helper closure to check if a pattern matches the filter
1460        let matches_filter = |pattern: &FcPattern| -> bool {
1461            match &filter_normalized {
1462                None => true, // No filter = accept all
1463                Some(targets) => {
1464                    pattern.name.as_ref().map_or(false, |name| {
1465                        let name_norm = crate::utils::normalize_family_name(name);
1466                        targets.iter().any(|target| name_norm.contains(target))
1467                    }) || pattern.family.as_ref().map_or(false, |family| {
1468                        let family_norm = crate::utils::normalize_family_name(family);
1469                        targets.iter().any(|target| family_norm.contains(target))
1470                    })
1471                }
1472            }
1473        };
1474
1475        #[cfg(target_os = "linux")]
1476        {
1477            if let Some((font_entries, render_configs)) = FcScanDirectories() {
1478                for (mut pattern, path) in font_entries {
1479                    if matches_filter(&pattern) {
1480                        // Apply per-font render config if a matching family rule exists
1481                        if let Some(family) = pattern.name.as_ref().or(pattern.family.as_ref()) {
1482                            if let Some(rc) = render_configs.get(family) {
1483                                pattern.render_config = rc.clone();
1484                            }
1485                        }
1486                        let id = FontId::new();
1487                        cache.patterns.insert(pattern.clone(), id);
1488                        cache.metadata.insert(id, pattern.clone());
1489                        cache.disk_fonts.insert(id, path);
1490                        cache.index_pattern_tokens(&pattern, id);
1491                    }
1492                }
1493            }
1494        }
1495
1496        #[cfg(target_os = "windows")]
1497        {
1498            let system_root = std::env::var("SystemRoot")
1499                .or_else(|_| std::env::var("WINDIR"))
1500                .unwrap_or_else(|_| "C:\\Windows".to_string());
1501            
1502            let user_profile = std::env::var("USERPROFILE")
1503                .unwrap_or_else(|_| "C:\\Users\\Default".to_string());
1504            
1505            let font_dirs = vec![
1506                (None, format!("{}\\Fonts\\", system_root)),
1507                (None, format!("{}\\AppData\\Local\\Microsoft\\Windows\\Fonts\\", user_profile)),
1508            ];
1509
1510            let font_entries = FcScanDirectoriesInner(&font_dirs);
1511            for (pattern, path) in font_entries {
1512                if matches_filter(&pattern) {
1513                    let id = FontId::new();
1514                    cache.patterns.insert(pattern.clone(), id);
1515                    cache.metadata.insert(id, pattern.clone());
1516                    cache.disk_fonts.insert(id, path);
1517                    cache.index_pattern_tokens(&pattern, id);
1518                }
1519            }
1520        }
1521
1522        #[cfg(target_os = "macos")]
1523        {
1524            let font_dirs = vec![
1525                (None, "~/Library/Fonts".to_owned()),
1526                (None, "/System/Library/Fonts".to_owned()),
1527                (None, "/Library/Fonts".to_owned()),
1528                (None, "/System/Library/AssetsV2".to_owned()),
1529            ];
1530
1531            let font_entries = FcScanDirectoriesInner(&font_dirs);
1532            for (pattern, path) in font_entries {
1533                if matches_filter(&pattern) {
1534                    let id = FontId::new();
1535                    cache.patterns.insert(pattern.clone(), id);
1536                    cache.metadata.insert(id, pattern.clone());
1537                    cache.disk_fonts.insert(id, path);
1538                    cache.index_pattern_tokens(&pattern, id);
1539                }
1540            }
1541        }
1542
1543        cache
1544    }
1545    
1546    /// Check if a font ID is a memory font (preferred over disk fonts)
1547    pub fn is_memory_font(&self, id: &FontId) -> bool {
1548        self.memory_fonts.contains_key(id)
1549    }
1550
1551    /// Returns the list of fonts and font patterns
1552    pub fn list(&self) -> Vec<(&FcPattern, FontId)> {
1553        self.patterns
1554            .iter()
1555            .map(|(pattern, id)| (pattern, *id))
1556            .collect()
1557    }
1558
1559    /// Returns true if the cache contains no font patterns
1560    pub fn is_empty(&self) -> bool {
1561        self.patterns.is_empty()
1562    }
1563
1564    /// Returns the number of font patterns in the cache
1565    pub fn len(&self) -> usize {
1566        self.patterns.len()
1567    }
1568
1569    /// Queries a font from the in-memory cache, returns the first found font (early return)
1570    /// Memory fonts are always preferred over disk fonts with the same match quality.
1571    pub fn query(&self, pattern: &FcPattern, trace: &mut Vec<TraceMsg>) -> Option<FontMatch> {
1572        let mut matches = Vec::new();
1573
1574        for (stored_pattern, id) in &self.patterns {
1575            if Self::query_matches_internal(stored_pattern, pattern, trace) {
1576                let metadata = self.metadata.get(id).unwrap_or(stored_pattern);
1577                
1578                // Calculate Unicode compatibility score
1579                let unicode_compatibility = if pattern.unicode_ranges.is_empty() {
1580                    // No specific Unicode requirements, use general coverage
1581                    Self::calculate_unicode_coverage(&metadata.unicode_ranges) as i32
1582                } else {
1583                    // Calculate how well this font covers the requested Unicode ranges
1584                    Self::calculate_unicode_compatibility(&pattern.unicode_ranges, &metadata.unicode_ranges)
1585                };
1586                
1587                let style_score = Self::calculate_style_score(pattern, metadata);
1588                
1589                // Memory fonts get a bonus to prefer them over disk fonts
1590                let is_memory = self.memory_fonts.contains_key(id);
1591                
1592                matches.push((*id, unicode_compatibility, style_score, metadata.clone(), is_memory));
1593            }
1594        }
1595
1596        // Sort by: 1. Memory font (preferred), 2. Unicode compatibility, 3. Style score
1597        matches.sort_by(|a, b| {
1598            // Memory fonts first
1599            b.4.cmp(&a.4)
1600                .then_with(|| b.1.cmp(&a.1)) // Unicode compatibility (higher is better)
1601                .then_with(|| a.2.cmp(&b.2)) // Style score (lower is better)
1602        });
1603
1604        matches.first().map(|(id, _, _, metadata, _)| {
1605            FontMatch {
1606                id: *id,
1607                unicode_ranges: metadata.unicode_ranges.clone(),
1608                fallbacks: Vec::new(), // Fallbacks computed lazily via compute_fallbacks()
1609            }
1610        })
1611    }
1612
1613    /// Queries all fonts matching a pattern (internal use only)
1614    /// 
1615    /// Note: This function is now private. Use resolve_font_chain() to build a font fallback chain,
1616    /// then call FontFallbackChain::query_for_text() to resolve fonts for specific text.
1617    fn query_internal(&self, pattern: &FcPattern, trace: &mut Vec<TraceMsg>) -> Vec<FontMatch> {
1618        let mut matches = Vec::new();
1619
1620        for (stored_pattern, id) in &self.patterns {
1621            if Self::query_matches_internal(stored_pattern, pattern, trace) {
1622                let metadata = self.metadata.get(id).unwrap_or(stored_pattern);
1623                
1624                // Calculate Unicode compatibility score
1625                let unicode_compatibility = if pattern.unicode_ranges.is_empty() {
1626                    Self::calculate_unicode_coverage(&metadata.unicode_ranges) as i32
1627                } else {
1628                    Self::calculate_unicode_compatibility(&pattern.unicode_ranges, &metadata.unicode_ranges)
1629                };
1630                
1631                let style_score = Self::calculate_style_score(pattern, metadata);
1632                matches.push((*id, unicode_compatibility, style_score, metadata.clone()));
1633            }
1634        }
1635
1636        // Sort by style score (lowest first), THEN by Unicode compatibility (highest first)
1637        // Style matching (weight, italic, etc.) is now the primary criterion
1638        // Deterministic tiebreaker: prefer non-italic, then alphabetical by name
1639        matches.sort_by(|a, b| {
1640            a.2.cmp(&b.2) // Style score (lower is better)
1641                .then_with(|| b.1.cmp(&a.1)) // Unicode compatibility (higher is better)
1642                .then_with(|| a.3.italic.cmp(&b.3.italic)) // Prefer non-italic
1643                .then_with(|| a.3.name.cmp(&b.3.name)) // Alphabetical tiebreaker
1644        });
1645
1646        matches
1647            .into_iter()
1648            .map(|(id, _, _, metadata)| {
1649                FontMatch {
1650                    id,
1651                    unicode_ranges: metadata.unicode_ranges.clone(),
1652                    fallbacks: Vec::new(), // Fallbacks computed lazily via compute_fallbacks()
1653                }
1654            })
1655            .collect()
1656    }
1657
1658    /// Compute fallback fonts for a given font
1659    /// This is a lazy operation that can be expensive - only call when actually needed
1660    /// (e.g., for FFI or debugging, not needed for resolve_char)
1661    pub fn compute_fallbacks(
1662        &self,
1663        font_id: &FontId,
1664        trace: &mut Vec<TraceMsg>,
1665    ) -> Vec<FontMatchNoFallback> {
1666        // Get the pattern for this font
1667        let pattern = match self.metadata.get(font_id) {
1668            Some(p) => p,
1669            None => return Vec::new(),
1670        };
1671        
1672        self.compute_fallbacks_for_pattern(pattern, Some(font_id), trace)
1673    }
1674    
1675    fn compute_fallbacks_for_pattern(
1676        &self,
1677        pattern: &FcPattern,
1678        exclude_id: Option<&FontId>,
1679        _trace: &mut Vec<TraceMsg>,
1680    ) -> Vec<FontMatchNoFallback> {
1681        let mut candidates = Vec::new();
1682
1683        // Collect all potential fallbacks (excluding original pattern)
1684        for (stored_pattern, id) in &self.patterns {
1685            // Skip if this is the original font
1686            if exclude_id.is_some() && exclude_id.unwrap() == id {
1687                continue;
1688            }
1689
1690            // Check if this font supports any of the unicode ranges
1691            if !stored_pattern.unicode_ranges.is_empty() && !pattern.unicode_ranges.is_empty() {
1692                // Calculate Unicode compatibility
1693                let unicode_compatibility = Self::calculate_unicode_compatibility(
1694                    &pattern.unicode_ranges,
1695                    &stored_pattern.unicode_ranges
1696                );
1697                
1698                // Only include if there's actual overlap
1699                if unicode_compatibility > 0 {
1700                    let style_score = Self::calculate_style_score(pattern, stored_pattern);
1701                    candidates.push((
1702                        FontMatchNoFallback {
1703                            id: *id,
1704                            unicode_ranges: stored_pattern.unicode_ranges.clone(),
1705                        },
1706                        unicode_compatibility,
1707                        style_score,
1708                        stored_pattern.clone(),
1709                    ));
1710                }
1711            } else if pattern.unicode_ranges.is_empty() && !stored_pattern.unicode_ranges.is_empty() {
1712                // No specific Unicode requirements, use general coverage
1713                let coverage = Self::calculate_unicode_coverage(&stored_pattern.unicode_ranges) as i32;
1714                let style_score = Self::calculate_style_score(pattern, stored_pattern);
1715                candidates.push((
1716                    FontMatchNoFallback {
1717                        id: *id,
1718                        unicode_ranges: stored_pattern.unicode_ranges.clone(),
1719                    },
1720                    coverage,
1721                    style_score,
1722                    stored_pattern.clone(),
1723                ));
1724            }
1725        }
1726
1727        // Sort by Unicode compatibility (highest first), THEN by style score (lowest first)
1728        candidates.sort_by(|a, b| {
1729            b.1.cmp(&a.1)
1730                .then_with(|| a.2.cmp(&b.2))
1731        });
1732
1733        // Deduplicate by keeping only the best match per unique unicode range
1734        let mut seen_ranges = Vec::new();
1735        let mut deduplicated = Vec::new();
1736
1737        for (id, _, _, pattern) in candidates {
1738            let mut is_new_range = false;
1739
1740            for range in &pattern.unicode_ranges {
1741                if !seen_ranges.iter().any(|r: &UnicodeRange| r.overlaps(range)) {
1742                    seen_ranges.push(*range);
1743                    is_new_range = true;
1744                }
1745            }
1746
1747            if is_new_range {
1748                deduplicated.push(id);
1749            }
1750        }
1751
1752        deduplicated
1753    }
1754
1755    /// Get in-memory font data
1756    pub fn get_memory_font(&self, id: &FontId) -> Option<&FcFont> {
1757        self.memory_fonts.get(id)
1758    }
1759
1760    /// Check if a pattern matches the query, with detailed tracing
1761    fn trace_path(k: &FcPattern) -> String {
1762        k.name.as_ref().cloned().unwrap_or_else(|| "<unknown>".to_string())
1763    }
1764
1765    pub fn query_matches_internal(
1766        k: &FcPattern,
1767        pattern: &FcPattern,
1768        trace: &mut Vec<TraceMsg>,
1769    ) -> bool {
1770        // Check name - substring match
1771        if let Some(ref name) = pattern.name {
1772            if !k.name.as_ref().map_or(false, |kn| kn.contains(name)) {
1773                trace.push(TraceMsg {
1774                    level: TraceLevel::Info,
1775                    path: Self::trace_path(k),
1776                    reason: MatchReason::NameMismatch {
1777                        requested: pattern.name.clone(),
1778                        found: k.name.clone(),
1779                    },
1780                });
1781                return false;
1782            }
1783        }
1784
1785        // Check family - substring match
1786        if let Some(ref family) = pattern.family {
1787            if !k.family.as_ref().map_or(false, |kf| kf.contains(family)) {
1788                trace.push(TraceMsg {
1789                    level: TraceLevel::Info,
1790                    path: Self::trace_path(k),
1791                    reason: MatchReason::FamilyMismatch {
1792                        requested: pattern.family.clone(),
1793                        found: k.family.clone(),
1794                    },
1795                });
1796                return false;
1797            }
1798        }
1799
1800        // Check style properties
1801        let style_properties = [
1802            (
1803                "italic",
1804                pattern.italic.needs_to_match(),
1805                pattern.italic.matches(&k.italic),
1806            ),
1807            (
1808                "oblique",
1809                pattern.oblique.needs_to_match(),
1810                pattern.oblique.matches(&k.oblique),
1811            ),
1812            (
1813                "bold",
1814                pattern.bold.needs_to_match(),
1815                pattern.bold.matches(&k.bold),
1816            ),
1817            (
1818                "monospace",
1819                pattern.monospace.needs_to_match(),
1820                pattern.monospace.matches(&k.monospace),
1821            ),
1822            (
1823                "condensed",
1824                pattern.condensed.needs_to_match(),
1825                pattern.condensed.matches(&k.condensed),
1826            ),
1827        ];
1828
1829        for (property_name, needs_to_match, matches) in style_properties {
1830            if needs_to_match && !matches {
1831                let (requested, found) = match property_name {
1832                    "italic" => (format!("{:?}", pattern.italic), format!("{:?}", k.italic)),
1833                    "oblique" => (format!("{:?}", pattern.oblique), format!("{:?}", k.oblique)),
1834                    "bold" => (format!("{:?}", pattern.bold), format!("{:?}", k.bold)),
1835                    "monospace" => (
1836                        format!("{:?}", pattern.monospace),
1837                        format!("{:?}", k.monospace),
1838                    ),
1839                    "condensed" => (
1840                        format!("{:?}", pattern.condensed),
1841                        format!("{:?}", k.condensed),
1842                    ),
1843                    _ => (String::new(), String::new()),
1844                };
1845
1846                trace.push(TraceMsg {
1847                    level: TraceLevel::Info,
1848                    path: Self::trace_path(k),
1849                    reason: MatchReason::StyleMismatch {
1850                        property: property_name,
1851                        requested,
1852                        found,
1853                    },
1854                });
1855                return false;
1856            }
1857        }
1858
1859        // Check weight - hard filter if non-normal weight is requested
1860        if pattern.weight != FcWeight::Normal && pattern.weight != k.weight {
1861            trace.push(TraceMsg {
1862                level: TraceLevel::Info,
1863                path: Self::trace_path(k),
1864                reason: MatchReason::WeightMismatch {
1865                    requested: pattern.weight,
1866                    found: k.weight,
1867                },
1868            });
1869            return false;
1870        }
1871
1872        // Check stretch - hard filter if non-normal stretch is requested
1873        if pattern.stretch != FcStretch::Normal && pattern.stretch != k.stretch {
1874            trace.push(TraceMsg {
1875                level: TraceLevel::Info,
1876                path: Self::trace_path(k),
1877                reason: MatchReason::StretchMismatch {
1878                    requested: pattern.stretch,
1879                    found: k.stretch,
1880                },
1881            });
1882            return false;
1883        }
1884
1885        // Check unicode ranges if specified
1886        if !pattern.unicode_ranges.is_empty() {
1887            let mut has_overlap = false;
1888
1889            for p_range in &pattern.unicode_ranges {
1890                for k_range in &k.unicode_ranges {
1891                    if p_range.overlaps(k_range) {
1892                        has_overlap = true;
1893                        break;
1894                    }
1895                }
1896                if has_overlap {
1897                    break;
1898                }
1899            }
1900
1901            if !has_overlap {
1902                trace.push(TraceMsg {
1903                    level: TraceLevel::Info,
1904                    path: Self::trace_path(k),
1905                    reason: MatchReason::UnicodeRangeMismatch {
1906                        character: '\0', // No specific character to report
1907                        ranges: k.unicode_ranges.clone(),
1908                    },
1909                });
1910                return false;
1911            }
1912        }
1913
1914        true
1915    }
1916    
1917    /// Resolve a complete font fallback chain for a CSS font-family stack
1918    /// This is the main entry point for font resolution with caching
1919    /// Automatically expands generic CSS families (serif, sans-serif, monospace) to OS-specific fonts
1920    /// 
1921    /// # Arguments
1922    /// * `font_families` - CSS font-family stack (e.g., ["Arial", "sans-serif"])
1923    /// * `text` - The text to render (used to extract Unicode ranges)
1924    /// * `weight` - Font weight
1925    /// * `italic` - Italic style requirement
1926    /// * `oblique` - Oblique style requirement
1927    /// * `trace` - Debug trace messages
1928    /// 
1929    /// # Returns
1930    /// A complete font fallback chain with CSS fallbacks and Unicode fallbacks
1931    /// 
1932    /// # Example
1933    /// ```no_run
1934    /// # use rust_fontconfig::{FcFontCache, FcWeight, PatternMatch};
1935    /// let cache = FcFontCache::build();
1936    /// let families = vec!["Arial".to_string(), "sans-serif".to_string()];
1937    /// let chain = cache.resolve_font_chain(&families, FcWeight::Normal, 
1938    ///                                       PatternMatch::DontCare, PatternMatch::DontCare, 
1939    ///                                       &mut Vec::new());
1940    /// // On macOS: families expanded to ["Arial", "San Francisco", "Helvetica Neue", "Lucida Grande"]
1941    /// ```
1942    #[cfg(feature = "std")]
1943    pub fn resolve_font_chain(
1944        &self,
1945        font_families: &[String],
1946        weight: FcWeight,
1947        italic: PatternMatch,
1948        oblique: PatternMatch,
1949        trace: &mut Vec<TraceMsg>,
1950    ) -> FontFallbackChain {
1951        self.resolve_font_chain_with_os(font_families, weight, italic, oblique, trace, OperatingSystem::current())
1952    }
1953    
1954    /// Resolve font chain with explicit OS specification (useful for testing)
1955    #[cfg(feature = "std")]
1956    pub fn resolve_font_chain_with_os(
1957        &self,
1958        font_families: &[String],
1959        weight: FcWeight,
1960        italic: PatternMatch,
1961        oblique: PatternMatch,
1962        trace: &mut Vec<TraceMsg>,
1963        os: OperatingSystem,
1964    ) -> FontFallbackChain {
1965        // Check cache FIRST - key uses original (unexpanded) families
1966        // This ensures all text nodes with same CSS properties share one chain
1967        let cache_key = FontChainCacheKey {
1968            font_families: font_families.to_vec(),  // Use ORIGINAL families, not expanded
1969            weight,
1970            italic,
1971            oblique,
1972        };
1973        
1974        if let Some(cached) = self.chain_cache.lock().ok().and_then(|c| c.get(&cache_key).cloned()) {
1975            return cached;
1976        }
1977
1978        // Expand generic CSS families to OS-specific fonts (no unicode ranges needed anymore)
1979        let expanded_families = expand_font_families(font_families, os, &[]);
1980        
1981        // Build the chain
1982        let chain = self.resolve_font_chain_uncached(
1983            &expanded_families,
1984            weight,
1985            italic,
1986            oblique,
1987            trace,
1988        );
1989        
1990        // Cache the result
1991        if let Ok(mut cache) = self.chain_cache.lock() {
1992            cache.insert(cache_key, chain.clone());
1993        }
1994        
1995        chain
1996    }
1997    
1998    /// Internal implementation without caching
1999    /// 
2000    /// Note: This function no longer takes text/unicode_ranges as input.
2001    /// Instead, the returned FontFallbackChain has a query_for_text() method
2002    /// that can be called to resolve which fonts to use for specific text.
2003    #[cfg(feature = "std")]
2004    fn resolve_font_chain_uncached(
2005        &self,
2006        font_families: &[String],
2007        weight: FcWeight,
2008        italic: PatternMatch,
2009        oblique: PatternMatch,
2010        trace: &mut Vec<TraceMsg>,
2011    ) -> FontFallbackChain {
2012        let mut css_fallbacks = Vec::new();
2013        
2014        // Resolve each CSS font-family to its system fallbacks
2015        for (_i, family) in font_families.iter().enumerate() {
2016            // Check if this is a generic font family
2017            let (pattern, is_generic) = if config::is_generic_family(family) {
2018                let monospace = if family.eq_ignore_ascii_case("monospace") {
2019                    PatternMatch::True
2020                } else {
2021                    PatternMatch::False
2022                };
2023                let pattern = FcPattern {
2024                    name: None,
2025                    weight,
2026                    italic,
2027                    oblique,
2028                    monospace,
2029                    unicode_ranges: Vec::new(),
2030                    ..Default::default()
2031                };
2032                (pattern, true)
2033            } else {
2034                // Specific font family name
2035                let pattern = FcPattern {
2036                    name: Some(family.clone()),
2037                    weight,
2038                    italic,
2039                    oblique,
2040                    unicode_ranges: Vec::new(),
2041                    ..Default::default()
2042                };
2043                (pattern, false)
2044            };
2045            
2046            // Use fuzzy matching for specific fonts (fast token-based lookup)
2047            // For generic families, use query (slower but necessary for property matching)
2048            let mut matches = if is_generic {
2049                // Generic families need full pattern matching
2050                self.query_internal(&pattern, trace)
2051            } else {
2052                // Specific font names: use fast token-based fuzzy matching
2053                self.fuzzy_query_by_name(family, weight, italic, oblique, &[], trace)
2054            };
2055            
2056            // For generic families, limit to top 5 fonts to avoid too many matches
2057            if is_generic && matches.len() > 5 {
2058                matches.truncate(5);
2059            }
2060            
2061            // Always add the CSS fallback group to preserve CSS ordering
2062            // even if no fonts were found for this family
2063            css_fallbacks.push(CssFallbackGroup {
2064                css_name: family.clone(),
2065                fonts: matches,
2066            });
2067        }
2068        
2069        // Populate unicode_fallbacks for major script blocks.
2070        // CSS fallback fonts may falsely claim CJK coverage via OS/2 bits
2071        // without having actual glyphs, so we always search for fallback fonts
2072        // and let resolve_char() prefer CSS fallbacks first (they come first in order).
2073        let important_ranges = [
2074            UnicodeRange { start: 0x0400, end: 0x04FF }, // Cyrillic
2075            UnicodeRange { start: 0x0600, end: 0x06FF }, // Arabic
2076            UnicodeRange { start: 0x0900, end: 0x097F }, // Devanagari
2077            UnicodeRange { start: 0x3040, end: 0x309F }, // Hiragana
2078            UnicodeRange { start: 0x30A0, end: 0x30FF }, // Katakana
2079            UnicodeRange { start: 0x4E00, end: 0x9FFF }, // CJK Unified Ideographs
2080            UnicodeRange { start: 0xAC00, end: 0xD7A3 }, // Hangul Syllables
2081        ];
2082        let all_uncovered = vec![false; important_ranges.len()];
2083        let unicode_fallbacks = self.find_unicode_fallbacks(
2084            &important_ranges,
2085            &all_uncovered,
2086            &css_fallbacks,
2087            weight,
2088            italic,
2089            oblique,
2090            trace,
2091        );
2092
2093        FontFallbackChain {
2094            css_fallbacks,
2095            unicode_fallbacks,
2096            original_stack: font_families.to_vec(),
2097        }
2098    }
2099    
2100    /// Extract Unicode ranges from text
2101    #[allow(dead_code)]
2102    fn extract_unicode_ranges(text: &str) -> Vec<UnicodeRange> {
2103        let mut chars: Vec<char> = text.chars().collect();
2104        chars.sort_unstable();
2105        chars.dedup();
2106        
2107        if chars.is_empty() {
2108            return Vec::new();
2109        }
2110        
2111        let mut ranges = Vec::new();
2112        let mut range_start = chars[0] as u32;
2113        let mut range_end = range_start;
2114        
2115        for &c in &chars[1..] {
2116            let codepoint = c as u32;
2117            if codepoint == range_end + 1 {
2118                range_end = codepoint;
2119            } else {
2120                ranges.push(UnicodeRange { start: range_start, end: range_end });
2121                range_start = codepoint;
2122                range_end = codepoint;
2123            }
2124        }
2125        
2126        ranges.push(UnicodeRange { start: range_start, end: range_end });
2127        ranges
2128    }
2129    
2130    /// Fuzzy query for fonts by name when exact match fails
2131    /// Uses intelligent token-based matching with inverted index for speed:
2132    /// 1. Break name into tokens (e.g., "NotoSansJP" -> ["noto", "sans", "jp"])
2133    /// 2. Use token_index to find candidate fonts via BTreeSet intersection
2134    /// 3. Score only the candidate fonts (instead of all 800+ patterns)
2135    /// 4. Prioritize fonts matching more tokens + Unicode coverage
2136    #[cfg(feature = "std")]
2137    fn fuzzy_query_by_name(
2138        &self,
2139        requested_name: &str,
2140        weight: FcWeight,
2141        italic: PatternMatch,
2142        oblique: PatternMatch,
2143        unicode_ranges: &[UnicodeRange],
2144        _trace: &mut Vec<TraceMsg>,
2145    ) -> Vec<FontMatch> {
2146        // Extract tokens from the requested name (e.g., "NotoSansJP" -> ["noto", "sans", "jp"])
2147        let tokens = Self::extract_font_name_tokens(requested_name);
2148        
2149        if tokens.is_empty() {
2150            return Vec::new();
2151        }
2152        
2153        // Convert tokens to lowercase for case-insensitive lookup
2154        let tokens_lower: Vec<String> = tokens.iter().map(|t| t.to_lowercase()).collect();
2155        
2156        // Progressive token matching strategy:
2157        // Start with first token, then progressively narrow down with each additional token
2158        // If adding a token results in 0 matches, use the previous (broader) set
2159        // Example: ["Noto"] -> 10 fonts, ["Noto","Sans"] -> 2 fonts, ["Noto","Sans","JP"] -> 0 fonts => use 2 fonts
2160        
2161        // Start with the first token
2162        let first_token = &tokens_lower[0];
2163        let mut candidate_ids = match self.token_index.get(first_token) {
2164            Some(ids) if !ids.is_empty() => ids.clone(),
2165            _ => {
2166                // First token not found - no fonts match, quit immediately
2167                return Vec::new();
2168            }
2169        };
2170        
2171        // Progressively narrow down with each additional token
2172        for token in &tokens_lower[1..] {
2173            if let Some(token_ids) = self.token_index.get(token) {
2174                // Calculate intersection
2175                let intersection: alloc::collections::BTreeSet<FontId> = 
2176                    candidate_ids.intersection(token_ids).copied().collect();
2177                
2178                if intersection.is_empty() {
2179                    // Adding this token results in 0 matches - keep previous set and stop
2180                    break;
2181                } else {
2182                    // Successfully narrowed down - use intersection
2183                    candidate_ids = intersection;
2184                }
2185            } else {
2186                // Token not in index - keep current set and stop
2187                break;
2188            }
2189        }
2190        
2191        // Now score only the candidate fonts (HUGE speedup!)
2192        let mut candidates = Vec::new();
2193        
2194        for id in candidate_ids {
2195            let pattern = match self.metadata.get(&id) {
2196                Some(p) => p,
2197                None => continue,
2198            };
2199            
2200            // Get pre-tokenized font name (already lowercase)
2201            let font_tokens_lower = match self.font_tokens.get(&id) {
2202                Some(tokens) => tokens,
2203                None => continue,
2204            };
2205            
2206            if font_tokens_lower.is_empty() {
2207                continue;
2208            }
2209            
2210            // Calculate token match score (how many requested tokens appear in font name)
2211            // Both tokens_lower and font_tokens_lower are already lowercase, so direct comparison
2212            let token_matches = tokens_lower.iter()
2213                .filter(|req_token| {
2214                    font_tokens_lower.iter().any(|font_token| {
2215                        // Both already lowercase — exact token match (index guarantees candidates)
2216                        font_token == *req_token
2217                    })
2218                })
2219                .count();
2220            
2221            // Skip if no tokens match (shouldn't happen due to index, but safety check)
2222            if token_matches == 0 {
2223                continue;
2224            }
2225            
2226            // Calculate token similarity score (0-100)
2227            let token_similarity = (token_matches * 100 / tokens.len()) as i32;
2228            
2229            // Calculate Unicode range similarity
2230            let unicode_similarity = if !unicode_ranges.is_empty() && !pattern.unicode_ranges.is_empty() {
2231                Self::calculate_unicode_compatibility(unicode_ranges, &pattern.unicode_ranges)
2232            } else {
2233                0
2234            };
2235            
2236            // CRITICAL: If we have Unicode requirements, ONLY accept fonts that cover them
2237            // A font with great name match but no Unicode coverage is useless
2238            if !unicode_ranges.is_empty() && unicode_similarity == 0 {
2239                continue;
2240            }
2241            
2242            let style_score = Self::calculate_style_score(&FcPattern {
2243                weight,
2244                italic,
2245                oblique,
2246                ..Default::default()
2247            }, pattern);
2248            
2249            candidates.push((
2250                id,
2251                token_similarity,
2252                unicode_similarity,
2253                style_score,
2254                pattern.clone(),
2255            ));
2256        }
2257        
2258        // Sort by:
2259        // 1. Token matches (more matches = better)
2260        // 2. Unicode compatibility (if ranges provided)
2261        // 3. Style score (lower is better)
2262        // 4. Deterministic tiebreaker: prefer non-italic, then by font name
2263        candidates.sort_by(|a, b| {
2264            if !unicode_ranges.is_empty() {
2265                // When we have Unicode requirements, prioritize coverage
2266                b.1.cmp(&a.1) // Token similarity (higher is better) - PRIMARY
2267                    .then_with(|| b.2.cmp(&a.2)) // Unicode similarity (higher is better) - SECONDARY
2268                    .then_with(|| a.3.cmp(&b.3)) // Style score (lower is better) - TERTIARY
2269                    .then_with(|| a.4.italic.cmp(&b.4.italic)) // Prefer non-italic (False < True)
2270                    .then_with(|| a.4.name.cmp(&b.4.name)) // Alphabetical by name
2271            } else {
2272                // No Unicode requirements, token similarity is primary
2273                b.1.cmp(&a.1) // Token similarity (higher is better)
2274                    .then_with(|| a.3.cmp(&b.3)) // Style score (lower is better)
2275                    .then_with(|| a.4.italic.cmp(&b.4.italic)) // Prefer non-italic (False < True)
2276                    .then_with(|| a.4.name.cmp(&b.4.name)) // Alphabetical by name
2277            }
2278        });
2279        
2280        // Take top 5 matches
2281        candidates.truncate(5);
2282        
2283        // Convert to FontMatch
2284        candidates
2285            .into_iter()
2286            .map(|(id, _token_sim, _unicode_sim, _style, pattern)| {
2287                FontMatch {
2288                    id,
2289                    unicode_ranges: pattern.unicode_ranges.clone(),
2290                    fallbacks: Vec::new(), // Fallbacks computed lazily via compute_fallbacks()
2291                }
2292            })
2293            .collect()
2294    }
2295    
2296    /// Extract tokens from a font name
2297    /// E.g., "NotoSansJP" -> ["Noto", "Sans", "JP"]
2298    /// E.g., "Noto Sans CJK JP" -> ["Noto", "Sans", "CJK", "JP"]
2299    pub fn extract_font_name_tokens(name: &str) -> Vec<String> {
2300        let mut tokens = Vec::new();
2301        let mut current_token = String::new();
2302        let mut last_was_lower = false;
2303        
2304        for c in name.chars() {
2305            if c.is_whitespace() || c == '-' || c == '_' {
2306                // Word separator
2307                if !current_token.is_empty() {
2308                    tokens.push(current_token.clone());
2309                    current_token.clear();
2310                }
2311                last_was_lower = false;
2312            } else if c.is_uppercase() && last_was_lower && !current_token.is_empty() {
2313                // CamelCase boundary (e.g., "Noto" | "Sans")
2314                tokens.push(current_token.clone());
2315                current_token.clear();
2316                current_token.push(c);
2317                last_was_lower = false;
2318            } else {
2319                current_token.push(c);
2320                last_was_lower = c.is_lowercase();
2321            }
2322        }
2323        
2324        if !current_token.is_empty() {
2325            tokens.push(current_token);
2326        }
2327        
2328        tokens
2329    }
2330    
2331    /// Find fonts to cover missing Unicode ranges
2332    /// Uses intelligent matching: prefers fonts with similar names to existing ones
2333    /// Early quits once all Unicode ranges are covered for performance
2334    fn find_unicode_fallbacks(
2335        &self,
2336        unicode_ranges: &[UnicodeRange],
2337        covered_chars: &[bool],
2338        existing_groups: &[CssFallbackGroup],
2339        _weight: FcWeight,
2340        _italic: PatternMatch,
2341        _oblique: PatternMatch,
2342        trace: &mut Vec<TraceMsg>,
2343    ) -> Vec<FontMatch> {
2344        // Extract uncovered ranges
2345        let mut uncovered_ranges = Vec::new();
2346        for (i, &covered) in covered_chars.iter().enumerate() {
2347            if !covered && i < unicode_ranges.len() {
2348                uncovered_ranges.push(unicode_ranges[i].clone());
2349            }
2350        }
2351        
2352        if uncovered_ranges.is_empty() {
2353            return Vec::new();
2354        }
2355
2356        // Query for fonts that cover these ranges.
2357        // Use DontCare for weight/italic/oblique — we want ANY font that covers
2358        // the missing characters, regardless of style. The similarity sort below
2359        // will prefer fonts matching the existing chain's style anyway.
2360        let pattern = FcPattern {
2361            name: None,
2362            weight: FcWeight::Normal, // Normal weight is not filtered by query_matches_internal (line 1836)
2363            italic: PatternMatch::DontCare,
2364            oblique: PatternMatch::DontCare,
2365            unicode_ranges: uncovered_ranges.clone(),
2366            ..Default::default()
2367        };
2368        
2369        let mut candidates = self.query_internal(&pattern, trace);
2370
2371        // Intelligent sorting: prefer fonts with similar names to existing ones
2372        // Extract font family prefixes from existing fonts (e.g., "Noto Sans" from "Noto Sans JP")
2373        let existing_prefixes: Vec<String> = existing_groups
2374            .iter()
2375            .flat_map(|group| {
2376                group.fonts.iter().filter_map(|font| {
2377                    self.get_metadata_by_id(&font.id)
2378                        .and_then(|meta| meta.family.clone())
2379                        .and_then(|family| {
2380                            // Extract prefix (e.g., "Noto Sans" from "Noto Sans JP")
2381                            family.split_whitespace()
2382                                .take(2)
2383                                .collect::<Vec<_>>()
2384                                .join(" ")
2385                                .into()
2386                        })
2387                })
2388            })
2389            .collect();
2390        
2391        // Sort candidates by:
2392        // 1. Name similarity to existing fonts (highest priority)
2393        // 2. Unicode coverage (secondary)
2394        candidates.sort_by(|a, b| {
2395            let a_meta = self.get_metadata_by_id(&a.id);
2396            let b_meta = self.get_metadata_by_id(&b.id);
2397            
2398            let a_score = Self::calculate_font_similarity_score(a_meta, &existing_prefixes);
2399            let b_score = Self::calculate_font_similarity_score(b_meta, &existing_prefixes);
2400            
2401            b_score.cmp(&a_score) // Higher score = better match
2402                .then_with(|| {
2403                    let a_coverage = Self::calculate_unicode_compatibility(&uncovered_ranges, &a.unicode_ranges);
2404                    let b_coverage = Self::calculate_unicode_compatibility(&uncovered_ranges, &b.unicode_ranges);
2405                    b_coverage.cmp(&a_coverage)
2406                })
2407        });
2408        
2409        // Early quit optimization: only take fonts until all ranges are covered
2410        let mut result = Vec::new();
2411        let mut remaining_uncovered: Vec<bool> = vec![true; uncovered_ranges.len()];
2412        
2413        for candidate in candidates {
2414            // Check which ranges this font covers
2415            let mut covers_new_range = false;
2416            
2417            for (i, range) in uncovered_ranges.iter().enumerate() {
2418                if remaining_uncovered[i] {
2419                    // Check if this font covers this range
2420                    for font_range in &candidate.unicode_ranges {
2421                        if font_range.overlaps(range) {
2422                            remaining_uncovered[i] = false;
2423                            covers_new_range = true;
2424                            break;
2425                        }
2426                    }
2427                }
2428            }
2429            
2430            // Only add fonts that cover at least one new range
2431            if covers_new_range {
2432                result.push(candidate);
2433                
2434                // Early quit: if all ranges are covered, stop
2435                if remaining_uncovered.iter().all(|&uncovered| !uncovered) {
2436                    break;
2437                }
2438            }
2439        }
2440        
2441        result
2442    }
2443    
2444    /// Calculate similarity score between a font and existing font prefixes
2445    /// Higher score = more similar
2446    fn calculate_font_similarity_score(
2447        font_meta: Option<&FcPattern>,
2448        existing_prefixes: &[String],
2449    ) -> i32 {
2450        let Some(meta) = font_meta else { return 0; };
2451        let Some(family) = &meta.family else { return 0; };
2452        
2453        // Check if this font's family matches any existing prefix
2454        for prefix in existing_prefixes {
2455            if family.starts_with(prefix) {
2456                return 100; // Strong match
2457            }
2458            if family.contains(prefix) {
2459                return 50; // Partial match
2460            }
2461        }
2462        
2463        0 // No match
2464    }
2465    
2466    /// Find fallback fonts for a given pattern
2467    // Helper to calculate total unicode coverage
2468    pub fn calculate_unicode_coverage(ranges: &[UnicodeRange]) -> u64 {
2469        ranges
2470            .iter()
2471            .map(|range| (range.end - range.start + 1) as u64)
2472            .sum()
2473    }
2474
2475    /// Calculate how well a font's Unicode ranges cover the requested ranges
2476    /// Returns a compatibility score (higher is better, 0 means no overlap)
2477    pub fn calculate_unicode_compatibility(
2478        requested: &[UnicodeRange],
2479        available: &[UnicodeRange],
2480    ) -> i32 {
2481        if requested.is_empty() {
2482            // No specific requirements, return total coverage
2483            return Self::calculate_unicode_coverage(available) as i32;
2484        }
2485        
2486        let mut total_coverage = 0u32;
2487        
2488        for req_range in requested {
2489            for avail_range in available {
2490                // Calculate overlap between requested and available ranges
2491                let overlap_start = req_range.start.max(avail_range.start);
2492                let overlap_end = req_range.end.min(avail_range.end);
2493                
2494                if overlap_start <= overlap_end {
2495                    // There is overlap
2496                    let overlap_size = overlap_end - overlap_start + 1;
2497                    total_coverage += overlap_size;
2498                }
2499            }
2500        }
2501        
2502        total_coverage as i32
2503    }
2504
2505    pub fn calculate_style_score(original: &FcPattern, candidate: &FcPattern) -> i32 {
2506
2507        let mut score = 0_i32;
2508
2509        // Weight calculation with special handling for bold property
2510        if (original.bold == PatternMatch::True && candidate.weight == FcWeight::Bold)
2511            || (original.bold == PatternMatch::False && candidate.weight != FcWeight::Bold)
2512        {
2513            // No weight penalty when bold is requested and font has Bold weight
2514            // No weight penalty when non-bold is requested and font has non-Bold weight
2515        } else {
2516            // Apply normal weight difference penalty
2517            let weight_diff = (original.weight as i32 - candidate.weight as i32).abs();
2518            score += weight_diff as i32;
2519        }
2520
2521        // Exact weight match bonus: reward fonts whose weight matches the request exactly,
2522        // with an extra bonus when both are Normal (the most common case for body text)
2523        if original.weight == candidate.weight {
2524            score -= 15;
2525            if original.weight == FcWeight::Normal {
2526                score -= 10; // Extra bonus for Normal-Normal match
2527            }
2528        }
2529
2530        // Stretch calculation with special handling for condensed property
2531        if (original.condensed == PatternMatch::True && candidate.stretch.is_condensed())
2532            || (original.condensed == PatternMatch::False && !candidate.stretch.is_condensed())
2533        {
2534            // No stretch penalty when condensed is requested and font has condensed stretch
2535            // No stretch penalty when non-condensed is requested and font has non-condensed stretch
2536        } else {
2537            // Apply normal stretch difference penalty
2538            let stretch_diff = (original.stretch as i32 - candidate.stretch as i32).abs();
2539            score += (stretch_diff * 100) as i32;
2540        }
2541
2542        // Handle style properties with standard penalties and bonuses
2543        let style_props = [
2544            (original.italic, candidate.italic, 300, 150),
2545            (original.oblique, candidate.oblique, 200, 100),
2546            (original.bold, candidate.bold, 300, 150),
2547            (original.monospace, candidate.monospace, 100, 50),
2548            (original.condensed, candidate.condensed, 100, 50),
2549        ];
2550
2551        for (orig, cand, mismatch_penalty, dontcare_penalty) in style_props {
2552            if orig.needs_to_match() {
2553                if orig == PatternMatch::False && cand == PatternMatch::DontCare {
2554                    // Requesting non-italic but font doesn't declare: small penalty
2555                    // (less than a full mismatch but more than a perfect match)
2556                    score += dontcare_penalty / 2;
2557                } else if !orig.matches(&cand) {
2558                    if cand == PatternMatch::DontCare {
2559                        score += dontcare_penalty;
2560                    } else {
2561                        score += mismatch_penalty;
2562                    }
2563                } else if orig == PatternMatch::True && cand == PatternMatch::True {
2564                    // Give bonus for exact True match
2565                    score -= 20;
2566                } else if orig == PatternMatch::False && cand == PatternMatch::False {
2567                    // Give bonus for exact False match (prefer explicitly non-italic
2568                    // over fonts with unknown/DontCare italic status)
2569                    score -= 20;
2570                }
2571            } else {
2572                // orig == DontCare: prefer "normal" fonts over styled ones.
2573                // When the caller doesn't specify italic/bold/etc., a font
2574                // that IS italic/bold should score slightly worse than one
2575                // that isn't, so Regular is chosen over Italic by default.
2576                if cand == PatternMatch::True {
2577                    score += dontcare_penalty / 3;
2578                }
2579            }
2580        }
2581
2582        // ── Name-based "base font" detection ──
2583        // The shorter the font name relative to its family, the more "basic" the
2584        // variant.  E.g. "System Font" (the base) should score better than
2585        // "System Font Regular Italic" (a variant) when the user hasn't
2586        // explicitly requested italic.
2587        if let (Some(name), Some(family)) = (&candidate.name, &candidate.family) {
2588            let name_lower = name.to_lowercase();
2589            let family_lower = family.to_lowercase();
2590
2591            // Strip the family prefix from the name to get the "extra" part
2592            let extra = if name_lower.starts_with(&family_lower) {
2593                name_lower[family_lower.len()..].to_string()
2594            } else {
2595                String::new()
2596            };
2597
2598            // Strip common neutral descriptors that don't indicate a style variant
2599            let stripped = extra
2600                .replace("regular", "")
2601                .replace("normal", "")
2602                .replace("book", "")
2603                .replace("roman", "");
2604            let stripped = stripped.trim();
2605
2606            if stripped.is_empty() {
2607                // This is a "base font" – name is just the family (± "Regular")
2608                score -= 50;
2609            } else {
2610                // Name has extra style descriptors – add a penalty per extra word
2611                let extra_words = stripped.split_whitespace().count();
2612                score += (extra_words as i32) * 25;
2613            }
2614        }
2615
2616        // ── Subfamily "Regular" bonus ──
2617        // Fonts whose OpenType subfamily is exactly "Regular" are the canonical
2618        // base variant and should be strongly preferred.
2619        if let Some(ref subfamily) = candidate.metadata.font_subfamily {
2620            let sf_lower = subfamily.to_lowercase();
2621            if sf_lower == "regular" {
2622                score -= 30;
2623            }
2624        }
2625
2626        score
2627    }
2628}
2629
2630#[cfg(all(feature = "std", feature = "parsing", target_os = "linux"))]
2631fn FcScanDirectories() -> Option<(Vec<(FcPattern, FcFontPath)>, BTreeMap<String, FcFontRenderConfig>)> {
2632    use std::fs;
2633    use std::path::Path;
2634
2635    const BASE_FONTCONFIG_PATH: &str = "/etc/fonts/fonts.conf";
2636
2637    if !Path::new(BASE_FONTCONFIG_PATH).exists() {
2638        return None;
2639    }
2640
2641    let mut font_paths = Vec::with_capacity(32);
2642    let mut paths_to_visit = vec![(None, PathBuf::from(BASE_FONTCONFIG_PATH))];
2643    let mut render_configs: BTreeMap<String, FcFontRenderConfig> = BTreeMap::new();
2644
2645    while let Some((prefix, path_to_visit)) = paths_to_visit.pop() {
2646        let path = match process_path(&prefix, path_to_visit, true) {
2647            Some(path) => path,
2648            None => continue,
2649        };
2650
2651        let metadata = match fs::metadata(&path) {
2652            Ok(metadata) => metadata,
2653            Err(_) => continue,
2654        };
2655
2656        if metadata.is_file() {
2657            let xml_utf8 = match fs::read_to_string(&path) {
2658                Ok(xml_utf8) => xml_utf8,
2659                Err(_) => continue,
2660            };
2661
2662            if ParseFontsConf(&xml_utf8, &mut paths_to_visit, &mut font_paths).is_none() {
2663                continue;
2664            }
2665
2666            // Also parse render config blocks from this file
2667            ParseFontsConfRenderConfig(&xml_utf8, &mut render_configs);
2668        } else if metadata.is_dir() {
2669            let dir_entries = match fs::read_dir(&path) {
2670                Ok(dir_entries) => dir_entries,
2671                Err(_) => continue,
2672            };
2673
2674            for entry_result in dir_entries {
2675                let entry = match entry_result {
2676                    Ok(entry) => entry,
2677                    Err(_) => continue,
2678                };
2679
2680                let entry_path = entry.path();
2681
2682                // `fs::metadata` traverses symbolic links
2683                let entry_metadata = match fs::metadata(&entry_path) {
2684                    Ok(metadata) => metadata,
2685                    Err(_) => continue,
2686                };
2687
2688                if !entry_metadata.is_file() {
2689                    continue;
2690                }
2691
2692                let file_name = match entry_path.file_name() {
2693                    Some(name) => name,
2694                    None => continue,
2695                };
2696
2697                let file_name_str = file_name.to_string_lossy();
2698                if file_name_str.starts_with(|c: char| c.is_ascii_digit())
2699                    && file_name_str.ends_with(".conf")
2700                {
2701                    paths_to_visit.push((None, entry_path));
2702                }
2703            }
2704        }
2705    }
2706
2707    if font_paths.is_empty() {
2708        return None;
2709    }
2710
2711    Some((FcScanDirectoriesInner(&font_paths), render_configs))
2712}
2713
2714// Parses the fonts.conf file
2715#[cfg(all(feature = "std", feature = "parsing", target_os = "linux"))]
2716fn ParseFontsConf(
2717    input: &str,
2718    paths_to_visit: &mut Vec<(Option<String>, PathBuf)>,
2719    font_paths: &mut Vec<(Option<String>, String)>,
2720) -> Option<()> {
2721    use xmlparser::Token::*;
2722    use xmlparser::Tokenizer;
2723
2724    const TAG_INCLUDE: &str = "include";
2725    const TAG_DIR: &str = "dir";
2726    const ATTRIBUTE_PREFIX: &str = "prefix";
2727
2728    let mut current_prefix: Option<&str> = None;
2729    let mut current_path: Option<&str> = None;
2730    let mut is_in_include = false;
2731    let mut is_in_dir = false;
2732
2733    for token_result in Tokenizer::from(input) {
2734        let token = match token_result {
2735            Ok(token) => token,
2736            Err(_) => return None,
2737        };
2738
2739        match token {
2740            ElementStart { local, .. } => {
2741                if is_in_include || is_in_dir {
2742                    return None; /* error: nested tags */
2743                }
2744
2745                match local.as_str() {
2746                    TAG_INCLUDE => {
2747                        is_in_include = true;
2748                    }
2749                    TAG_DIR => {
2750                        is_in_dir = true;
2751                    }
2752                    _ => continue,
2753                }
2754
2755                current_path = None;
2756            }
2757            Text { text, .. } => {
2758                let text = text.as_str().trim();
2759                if text.is_empty() {
2760                    continue;
2761                }
2762                if is_in_include || is_in_dir {
2763                    current_path = Some(text);
2764                }
2765            }
2766            Attribute { local, value, .. } => {
2767                if !is_in_include && !is_in_dir {
2768                    continue;
2769                }
2770                // attribute on <include> or <dir> node
2771                if local.as_str() == ATTRIBUTE_PREFIX {
2772                    current_prefix = Some(value.as_str());
2773                }
2774            }
2775            ElementEnd { end, .. } => {
2776                let end_tag = match end {
2777                    xmlparser::ElementEnd::Close(_, a) => a,
2778                    _ => continue,
2779                };
2780
2781                match end_tag.as_str() {
2782                    TAG_INCLUDE => {
2783                        if !is_in_include {
2784                            continue;
2785                        }
2786
2787                        if let Some(current_path) = current_path.as_ref() {
2788                            paths_to_visit.push((
2789                                current_prefix.map(ToOwned::to_owned),
2790                                PathBuf::from(*current_path),
2791                            ));
2792                        }
2793                    }
2794                    TAG_DIR => {
2795                        if !is_in_dir {
2796                            continue;
2797                        }
2798
2799                        if let Some(current_path) = current_path.as_ref() {
2800                            font_paths.push((
2801                                current_prefix.map(ToOwned::to_owned),
2802                                (*current_path).to_owned(),
2803                            ));
2804                        }
2805                    }
2806                    _ => continue,
2807                }
2808
2809                is_in_include = false;
2810                is_in_dir = false;
2811                current_path = None;
2812                current_prefix = None;
2813            }
2814            _ => {}
2815        }
2816    }
2817
2818    Some(())
2819}
2820
2821/// Parses `<match target="font">` blocks from fonts.conf XML and returns
2822/// a map from family name to per-font rendering configuration.
2823///
2824/// Example fonts.conf snippet that this handles:
2825/// ```xml
2826/// <match target="font">
2827///   <test name="family"><string>Inconsolata</string></test>
2828///   <edit name="antialias" mode="assign"><bool>true</bool></edit>
2829///   <edit name="hintstyle" mode="assign"><const>hintslight</const></edit>
2830/// </match>
2831/// ```
2832#[cfg(all(feature = "std", feature = "parsing", target_os = "linux"))]
2833fn ParseFontsConfRenderConfig(
2834    input: &str,
2835    configs: &mut BTreeMap<String, FcFontRenderConfig>,
2836) {
2837    use xmlparser::Token::*;
2838    use xmlparser::Tokenizer;
2839
2840    // Parser state machine
2841    #[derive(Clone, Copy, PartialEq)]
2842    enum State {
2843        /// Outside any relevant block
2844        Idle,
2845        /// Inside <match target="font">
2846        InMatchFont,
2847        /// Inside <test name="family"> within a match block
2848        InTestFamily,
2849        /// Inside <edit name="..."> within a match block
2850        InEdit,
2851        /// Inside a value element (<bool>, <double>, <const>, <string>) within <edit> or <test>
2852        InValue,
2853    }
2854
2855    let mut state = State::Idle;
2856    let mut match_is_font_target = false;
2857    let mut current_family: Option<String> = None;
2858    let mut current_edit_name: Option<String> = None;
2859    let mut current_value: Option<String> = None;
2860    let mut value_tag: Option<String> = None;
2861    let mut config = FcFontRenderConfig::default();
2862    let mut in_test = false;
2863    let mut test_name: Option<String> = None;
2864
2865    for token_result in Tokenizer::from(input) {
2866        let token = match token_result {
2867            Ok(token) => token,
2868            Err(_) => continue,
2869        };
2870
2871        match token {
2872            ElementStart { local, .. } => {
2873                let tag = local.as_str();
2874                match tag {
2875                    "match" => {
2876                        // Reset state for a new match block
2877                        match_is_font_target = false;
2878                        current_family = None;
2879                        config = FcFontRenderConfig::default();
2880                    }
2881                    "test" if state == State::InMatchFont => {
2882                        in_test = true;
2883                        test_name = None;
2884                    }
2885                    "edit" if state == State::InMatchFont => {
2886                        current_edit_name = None;
2887                    }
2888                    "bool" | "double" | "const" | "string" | "int" => {
2889                        if state == State::InTestFamily || state == State::InEdit {
2890                            value_tag = Some(tag.to_owned());
2891                            current_value = None;
2892                        }
2893                    }
2894                    _ => {}
2895                }
2896            }
2897            Attribute { local, value, .. } => {
2898                let attr_name = local.as_str();
2899                let attr_value = value.as_str();
2900
2901                match attr_name {
2902                    "target" => {
2903                        if attr_value == "font" {
2904                            match_is_font_target = true;
2905                        }
2906                    }
2907                    "name" => {
2908                        if in_test && state == State::InMatchFont {
2909                            test_name = Some(attr_value.to_owned());
2910                        } else if state == State::InMatchFont {
2911                            current_edit_name = Some(attr_value.to_owned());
2912                        }
2913                    }
2914                    _ => {}
2915                }
2916            }
2917            Text { text, .. } => {
2918                let text = text.as_str().trim();
2919                if !text.is_empty() && (state == State::InTestFamily || state == State::InEdit) {
2920                    current_value = Some(text.to_owned());
2921                }
2922            }
2923            ElementEnd { end, .. } => {
2924                match end {
2925                    xmlparser::ElementEnd::Open => {
2926                        // Tag just opened (after attributes processed)
2927                        if match_is_font_target && state == State::Idle {
2928                            state = State::InMatchFont;
2929                            match_is_font_target = false;
2930                        } else if in_test {
2931                            if test_name.as_deref() == Some("family") {
2932                                state = State::InTestFamily;
2933                            }
2934                            in_test = false;
2935                        } else if current_edit_name.is_some() && state == State::InMatchFont {
2936                            state = State::InEdit;
2937                        }
2938                    }
2939                    xmlparser::ElementEnd::Close(_, local) => {
2940                        let tag = local.as_str();
2941                        match tag {
2942                            "match" => {
2943                                // End of match block: store config if we have a family
2944                                if let Some(family) = current_family.take() {
2945                                    let empty = FcFontRenderConfig::default();
2946                                    if config != empty {
2947                                        configs.insert(family, config.clone());
2948                                    }
2949                                }
2950                                state = State::Idle;
2951                                config = FcFontRenderConfig::default();
2952                            }
2953                            "test" => {
2954                                if state == State::InTestFamily {
2955                                    // Extract the family name from the value we collected
2956                                    if let Some(ref val) = current_value {
2957                                        current_family = Some(val.clone());
2958                                    }
2959                                    state = State::InMatchFont;
2960                                }
2961                                current_value = None;
2962                                value_tag = None;
2963                            }
2964                            "edit" => {
2965                                if state == State::InEdit {
2966                                    // Apply the collected value to the config
2967                                    if let (Some(ref name), Some(ref val)) = (&current_edit_name, &current_value) {
2968                                        apply_edit_value(&mut config, name, val, value_tag.as_deref());
2969                                    }
2970                                    state = State::InMatchFont;
2971                                }
2972                                current_edit_name = None;
2973                                current_value = None;
2974                                value_tag = None;
2975                            }
2976                            "bool" | "double" | "const" | "string" | "int" => {
2977                                // value_tag and current_value already set by Text handler
2978                            }
2979                            _ => {}
2980                        }
2981                    }
2982                    xmlparser::ElementEnd::Empty => {
2983                        // Self-closing tags: nothing to do
2984                    }
2985                }
2986            }
2987            _ => {}
2988        }
2989    }
2990}
2991
2992/// Apply a parsed edit value to the render config.
2993#[cfg(all(feature = "std", feature = "parsing", target_os = "linux"))]
2994fn apply_edit_value(
2995    config: &mut FcFontRenderConfig,
2996    edit_name: &str,
2997    value: &str,
2998    value_tag: Option<&str>,
2999) {
3000    match edit_name {
3001        "antialias" => {
3002            config.antialias = parse_bool_value(value);
3003        }
3004        "hinting" => {
3005            config.hinting = parse_bool_value(value);
3006        }
3007        "autohint" => {
3008            config.autohint = parse_bool_value(value);
3009        }
3010        "embeddedbitmap" => {
3011            config.embeddedbitmap = parse_bool_value(value);
3012        }
3013        "embolden" => {
3014            config.embolden = parse_bool_value(value);
3015        }
3016        "minspace" => {
3017            config.minspace = parse_bool_value(value);
3018        }
3019        "hintstyle" => {
3020            config.hintstyle = parse_hintstyle_const(value);
3021        }
3022        "rgba" => {
3023            config.rgba = parse_rgba_const(value);
3024        }
3025        "lcdfilter" => {
3026            config.lcdfilter = parse_lcdfilter_const(value);
3027        }
3028        "dpi" => {
3029            if let Ok(v) = value.parse::<f64>() {
3030                config.dpi = Some(v);
3031            }
3032        }
3033        "scale" => {
3034            if let Ok(v) = value.parse::<f64>() {
3035                config.scale = Some(v);
3036            }
3037        }
3038        _ => {
3039            // Unknown edit property, ignore
3040        }
3041    }
3042}
3043
3044#[cfg(all(feature = "std", feature = "parsing", target_os = "linux"))]
3045fn parse_bool_value(value: &str) -> Option<bool> {
3046    match value {
3047        "true" => Some(true),
3048        "false" => Some(false),
3049        _ => None,
3050    }
3051}
3052
3053#[cfg(all(feature = "std", feature = "parsing", target_os = "linux"))]
3054fn parse_hintstyle_const(value: &str) -> Option<FcHintStyle> {
3055    match value {
3056        "hintnone" => Some(FcHintStyle::None),
3057        "hintslight" => Some(FcHintStyle::Slight),
3058        "hintmedium" => Some(FcHintStyle::Medium),
3059        "hintfull" => Some(FcHintStyle::Full),
3060        _ => None,
3061    }
3062}
3063
3064#[cfg(all(feature = "std", feature = "parsing", target_os = "linux"))]
3065fn parse_rgba_const(value: &str) -> Option<FcRgba> {
3066    match value {
3067        "unknown" => Some(FcRgba::Unknown),
3068        "rgb" => Some(FcRgba::Rgb),
3069        "bgr" => Some(FcRgba::Bgr),
3070        "vrgb" => Some(FcRgba::Vrgb),
3071        "vbgr" => Some(FcRgba::Vbgr),
3072        "none" => Some(FcRgba::None),
3073        _ => None,
3074    }
3075}
3076
3077#[cfg(all(feature = "std", feature = "parsing", target_os = "linux"))]
3078fn parse_lcdfilter_const(value: &str) -> Option<FcLcdFilter> {
3079    match value {
3080        "lcdnone" => Some(FcLcdFilter::None),
3081        "lcddefault" => Some(FcLcdFilter::Default),
3082        "lcdlight" => Some(FcLcdFilter::Light),
3083        "lcdlegacy" => Some(FcLcdFilter::Legacy),
3084        _ => None,
3085    }
3086}
3087
3088// Unicode range bit positions to actual ranges (full table from OpenType spec).
3089// Based on: https://learn.microsoft.com/en-us/typography/opentype/spec/os2#ur
3090#[cfg(all(feature = "std", feature = "parsing"))]
3091const UNICODE_RANGE_MAPPINGS: &[(usize, u32, u32)] = &[
3092    // ulUnicodeRange1 (bits 0-31)
3093    (0, 0x0000, 0x007F), // Basic Latin
3094    (1, 0x0080, 0x00FF), // Latin-1 Supplement
3095    (2, 0x0100, 0x017F), // Latin Extended-A
3096    (3, 0x0180, 0x024F), // Latin Extended-B
3097    (4, 0x0250, 0x02AF), // IPA Extensions
3098    (5, 0x02B0, 0x02FF), // Spacing Modifier Letters
3099    (6, 0x0300, 0x036F), // Combining Diacritical Marks
3100    (7, 0x0370, 0x03FF), // Greek and Coptic
3101    (8, 0x2C80, 0x2CFF), // Coptic
3102    (9, 0x0400, 0x04FF), // Cyrillic
3103    (10, 0x0530, 0x058F), // Armenian
3104    (11, 0x0590, 0x05FF), // Hebrew
3105    (12, 0x0600, 0x06FF), // Arabic
3106    (13, 0x0700, 0x074F), // Syriac
3107    (14, 0x0780, 0x07BF), // Thaana
3108    (15, 0x0900, 0x097F), // Devanagari
3109    (16, 0x0980, 0x09FF), // Bengali
3110    (17, 0x0A00, 0x0A7F), // Gurmukhi
3111    (18, 0x0A80, 0x0AFF), // Gujarati
3112    (19, 0x0B00, 0x0B7F), // Oriya
3113    (20, 0x0B80, 0x0BFF), // Tamil
3114    (21, 0x0C00, 0x0C7F), // Telugu
3115    (22, 0x0C80, 0x0CFF), // Kannada
3116    (23, 0x0D00, 0x0D7F), // Malayalam
3117    (24, 0x0E00, 0x0E7F), // Thai
3118    (25, 0x0E80, 0x0EFF), // Lao
3119    (26, 0x10A0, 0x10FF), // Georgian
3120    (27, 0x1B00, 0x1B7F), // Balinese
3121    (28, 0x1100, 0x11FF), // Hangul Jamo
3122    (29, 0x1E00, 0x1EFF), // Latin Extended Additional
3123    (30, 0x1F00, 0x1FFF), // Greek Extended
3124    (31, 0x2000, 0x206F), // General Punctuation
3125    // ulUnicodeRange2 (bits 32-63)
3126    (32, 0x2070, 0x209F), // Superscripts And Subscripts
3127    (33, 0x20A0, 0x20CF), // Currency Symbols
3128    (34, 0x20D0, 0x20FF), // Combining Diacritical Marks For Symbols
3129    (35, 0x2100, 0x214F), // Letterlike Symbols
3130    (36, 0x2150, 0x218F), // Number Forms
3131    (37, 0x2190, 0x21FF), // Arrows
3132    (38, 0x2200, 0x22FF), // Mathematical Operators
3133    (39, 0x2300, 0x23FF), // Miscellaneous Technical
3134    (40, 0x2400, 0x243F), // Control Pictures
3135    (41, 0x2440, 0x245F), // Optical Character Recognition
3136    (42, 0x2460, 0x24FF), // Enclosed Alphanumerics
3137    (43, 0x2500, 0x257F), // Box Drawing
3138    (44, 0x2580, 0x259F), // Block Elements
3139    (45, 0x25A0, 0x25FF), // Geometric Shapes
3140    (46, 0x2600, 0x26FF), // Miscellaneous Symbols
3141    (47, 0x2700, 0x27BF), // Dingbats
3142    (48, 0x3000, 0x303F), // CJK Symbols And Punctuation
3143    (49, 0x3040, 0x309F), // Hiragana
3144    (50, 0x30A0, 0x30FF), // Katakana
3145    (51, 0x3100, 0x312F), // Bopomofo
3146    (52, 0x3130, 0x318F), // Hangul Compatibility Jamo
3147    (53, 0x3190, 0x319F), // Kanbun
3148    (54, 0x31A0, 0x31BF), // Bopomofo Extended
3149    (55, 0x31C0, 0x31EF), // CJK Strokes
3150    (56, 0x31F0, 0x31FF), // Katakana Phonetic Extensions
3151    (57, 0x3200, 0x32FF), // Enclosed CJK Letters And Months
3152    (58, 0x3300, 0x33FF), // CJK Compatibility
3153    (59, 0x4E00, 0x9FFF), // CJK Unified Ideographs
3154    (60, 0xA000, 0xA48F), // Yi Syllables
3155    (61, 0xA490, 0xA4CF), // Yi Radicals
3156    (62, 0xAC00, 0xD7AF), // Hangul Syllables
3157    (63, 0xD800, 0xDFFF), // Non-Plane 0 (note: surrogates, not directly usable)
3158    // ulUnicodeRange3 (bits 64-95)
3159    (64, 0x10000, 0x10FFFF), // Phoenician and other non-BMP (bit 64 indicates non-BMP support)
3160    (65, 0xF900, 0xFAFF), // CJK Compatibility Ideographs
3161    (66, 0xFB00, 0xFB4F), // Alphabetic Presentation Forms
3162    (67, 0xFB50, 0xFDFF), // Arabic Presentation Forms-A
3163    (68, 0xFE00, 0xFE0F), // Variation Selectors
3164    (69, 0xFE10, 0xFE1F), // Vertical Forms
3165    (70, 0xFE20, 0xFE2F), // Combining Half Marks
3166    (71, 0xFE30, 0xFE4F), // CJK Compatibility Forms
3167    (72, 0xFE50, 0xFE6F), // Small Form Variants
3168    (73, 0xFE70, 0xFEFF), // Arabic Presentation Forms-B
3169    (74, 0xFF00, 0xFFEF), // Halfwidth And Fullwidth Forms
3170    (75, 0xFFF0, 0xFFFF), // Specials
3171    (76, 0x0F00, 0x0FFF), // Tibetan
3172    (77, 0x0700, 0x074F), // Syriac
3173    (78, 0x0780, 0x07BF), // Thaana
3174    (79, 0x0D80, 0x0DFF), // Sinhala
3175    (80, 0x1000, 0x109F), // Myanmar
3176    (81, 0x1200, 0x137F), // Ethiopic
3177    (82, 0x13A0, 0x13FF), // Cherokee
3178    (83, 0x1400, 0x167F), // Unified Canadian Aboriginal Syllabics
3179    (84, 0x1680, 0x169F), // Ogham
3180    (85, 0x16A0, 0x16FF), // Runic
3181    (86, 0x1780, 0x17FF), // Khmer
3182    (87, 0x1800, 0x18AF), // Mongolian
3183    (88, 0x2800, 0x28FF), // Braille Patterns
3184    (89, 0xA000, 0xA48F), // Yi Syllables
3185    (90, 0x1680, 0x169F), // Ogham
3186    (91, 0x16A0, 0x16FF), // Runic
3187    (92, 0x1700, 0x171F), // Tagalog
3188    (93, 0x1720, 0x173F), // Hanunoo
3189    (94, 0x1740, 0x175F), // Buhid
3190    (95, 0x1760, 0x177F), // Tagbanwa
3191    // ulUnicodeRange4 (bits 96-127)
3192    (96, 0x1900, 0x194F), // Limbu
3193    (97, 0x1950, 0x197F), // Tai Le
3194    (98, 0x1980, 0x19DF), // New Tai Lue
3195    (99, 0x1A00, 0x1A1F), // Buginese
3196    (100, 0x2C00, 0x2C5F), // Glagolitic
3197    (101, 0x2D30, 0x2D7F), // Tifinagh
3198    (102, 0x4DC0, 0x4DFF), // Yijing Hexagram Symbols
3199    (103, 0xA800, 0xA82F), // Syloti Nagri
3200    (104, 0x10000, 0x1007F), // Linear B Syllabary
3201    (105, 0x10080, 0x100FF), // Linear B Ideograms
3202    (106, 0x10100, 0x1013F), // Aegean Numbers
3203    (107, 0x10140, 0x1018F), // Ancient Greek Numbers
3204    (108, 0x10300, 0x1032F), // Old Italic
3205    (109, 0x10330, 0x1034F), // Gothic
3206    (110, 0x10380, 0x1039F), // Ugaritic
3207    (111, 0x103A0, 0x103DF), // Old Persian
3208    (112, 0x10400, 0x1044F), // Deseret
3209    (113, 0x10450, 0x1047F), // Shavian
3210    (114, 0x10480, 0x104AF), // Osmanya
3211    (115, 0x10800, 0x1083F), // Cypriot Syllabary
3212    (116, 0x10A00, 0x10A5F), // Kharoshthi
3213    (117, 0x1D000, 0x1D0FF), // Byzantine Musical Symbols
3214    (118, 0x1D100, 0x1D1FF), // Musical Symbols
3215    (119, 0x1D200, 0x1D24F), // Ancient Greek Musical Notation
3216    (120, 0x1D300, 0x1D35F), // Tai Xuan Jing Symbols
3217    (121, 0x1D400, 0x1D7FF), // Mathematical Alphanumeric Symbols
3218    (122, 0x1F000, 0x1F02F), // Mahjong Tiles
3219    (123, 0x1F030, 0x1F09F), // Domino Tiles
3220    (124, 0x1F300, 0x1F9FF), // Miscellaneous Symbols And Pictographs (Emoji)
3221    (125, 0x1F680, 0x1F6FF), // Transport And Map Symbols
3222    (126, 0x1F700, 0x1F77F), // Alchemical Symbols
3223    (127, 0x1F900, 0x1F9FF), // Supplemental Symbols and Pictographs
3224];
3225
3226/// Intermediate parsed data from a single font face within a font file.
3227/// Used to share parsing logic between `FcParseFont` and `FcParseFontBytesInner`.
3228#[cfg(all(feature = "std", feature = "parsing"))]
3229struct ParsedFontFace {
3230    pattern: FcPattern,
3231    font_index: usize,
3232}
3233
3234/// Parse all font table data from a single font face and return the extracted patterns.
3235///
3236/// This is the shared core of `FcParseFont` and `FcParseFontBytesInner`:
3237/// TTC detection, font table parsing, OS/2/head/post reading, unicode range extraction,
3238/// CMAP verification, monospace detection, metadata extraction, and pattern creation.
3239#[cfg(all(feature = "std", feature = "parsing"))]
3240fn parse_font_faces(font_bytes: &[u8]) -> Option<Vec<ParsedFontFace>> {
3241    use allsorts::{
3242        binary::read::ReadScope,
3243        font_data::FontData,
3244        get_name::fontcode_get_name,
3245        post::PostTable,
3246        tables::{
3247            os2::Os2, HeadTable, NameTable,
3248        },
3249        tag,
3250    };
3251    use std::collections::BTreeSet;
3252
3253    const FONT_SPECIFIER_NAME_ID: u16 = 4;
3254    const FONT_SPECIFIER_FAMILY_ID: u16 = 1;
3255
3256    let max_fonts = if font_bytes.len() >= 12 && &font_bytes[0..4] == b"ttcf" {
3257        // Read numFonts from TTC header (offset 8, 4 bytes)
3258        let num_fonts =
3259            u32::from_be_bytes([font_bytes[8], font_bytes[9], font_bytes[10], font_bytes[11]]);
3260        // Cap at a reasonable maximum as a safety measure
3261        std::cmp::min(num_fonts as usize, 100)
3262    } else {
3263        // Not a collection, just one font
3264        1
3265    };
3266
3267    let scope = ReadScope::new(font_bytes);
3268    let font_file = scope.read::<FontData<'_>>().ok()?;
3269
3270    // Handle collections properly by iterating through all fonts
3271    let mut results = Vec::new();
3272
3273    for font_index in 0..max_fonts {
3274        let provider = font_file.table_provider(font_index).ok()?;
3275        let head_data = provider.table_data(tag::HEAD).ok()??.into_owned();
3276        let head_table = ReadScope::new(&head_data).read::<HeadTable>().ok()?;
3277
3278        let is_bold = head_table.is_bold();
3279        let is_italic = head_table.is_italic();
3280        let mut detected_monospace = None;
3281
3282        let post_data = provider.table_data(tag::POST).ok()??;
3283        if let Ok(post_table) = ReadScope::new(&post_data).read::<PostTable>() {
3284            // isFixedPitch here - https://learn.microsoft.com/en-us/typography/opentype/spec/post#header
3285            detected_monospace = Some(post_table.header.is_fixed_pitch != 0);
3286        }
3287
3288        // Get font properties from OS/2 table
3289        let os2_data = provider.table_data(tag::OS_2).ok()??;
3290        let os2_table = ReadScope::new(&os2_data)
3291            .read_dep::<Os2>(os2_data.len())
3292            .ok()?;
3293
3294        // Extract additional style information
3295        let is_oblique = os2_table
3296            .fs_selection
3297            .contains(allsorts::tables::os2::FsSelection::OBLIQUE);
3298        let weight = FcWeight::from_u16(os2_table.us_weight_class);
3299        let stretch = FcStretch::from_u16(os2_table.us_width_class);
3300
3301        // Extract unicode ranges from OS/2 table (fast, but may be inaccurate)
3302        // These are hints about what the font *should* support
3303        // For actual glyph coverage verification, query the font file directly
3304        let mut unicode_ranges = Vec::new();
3305
3306        // Process the 4 Unicode range bitfields from OS/2 table
3307        let os2_ranges = [
3308            os2_table.ul_unicode_range1,
3309            os2_table.ul_unicode_range2,
3310            os2_table.ul_unicode_range3,
3311            os2_table.ul_unicode_range4,
3312        ];
3313
3314        for &(bit, start, end) in UNICODE_RANGE_MAPPINGS {
3315            let range_idx = bit / 32;
3316            let bit_pos = bit % 32;
3317            if range_idx < 4 && (os2_ranges[range_idx] & (1 << bit_pos)) != 0 {
3318                unicode_ranges.push(UnicodeRange { start, end });
3319            }
3320        }
3321
3322        // Verify OS/2 reported ranges against actual CMAP support
3323        // OS/2 ulUnicodeRange bits can be unreliable - fonts may claim support
3324        // for ranges they don't actually have glyphs for
3325        unicode_ranges = verify_unicode_ranges_with_cmap(&provider, unicode_ranges);
3326
3327        // If still empty (OS/2 had no ranges or all were invalid), do full CMAP analysis
3328        if unicode_ranges.is_empty() {
3329            if let Some(cmap_ranges) = analyze_cmap_coverage(&provider) {
3330                unicode_ranges = cmap_ranges;
3331            }
3332        }
3333
3334        // Use the shared detect_monospace helper for PANOSE + hmtx fallback
3335        let is_monospace = detect_monospace(&provider, &os2_table, detected_monospace)
3336            .unwrap_or(false);
3337
3338        let name_data = provider.table_data(tag::NAME).ok()??.into_owned();
3339        let name_table = ReadScope::new(&name_data).read::<NameTable>().ok()?;
3340
3341        // Extract metadata from name table
3342        let mut metadata = FcFontMetadata::default();
3343
3344        const NAME_ID_COPYRIGHT: u16 = 0;
3345        const NAME_ID_FAMILY: u16 = 1;
3346        const NAME_ID_SUBFAMILY: u16 = 2;
3347        const NAME_ID_UNIQUE_ID: u16 = 3;
3348        const NAME_ID_FULL_NAME: u16 = 4;
3349        const NAME_ID_VERSION: u16 = 5;
3350        const NAME_ID_POSTSCRIPT_NAME: u16 = 6;
3351        const NAME_ID_TRADEMARK: u16 = 7;
3352        const NAME_ID_MANUFACTURER: u16 = 8;
3353        const NAME_ID_DESIGNER: u16 = 9;
3354        const NAME_ID_DESCRIPTION: u16 = 10;
3355        const NAME_ID_VENDOR_URL: u16 = 11;
3356        const NAME_ID_DESIGNER_URL: u16 = 12;
3357        const NAME_ID_LICENSE: u16 = 13;
3358        const NAME_ID_LICENSE_URL: u16 = 14;
3359        const NAME_ID_PREFERRED_FAMILY: u16 = 16;
3360        const NAME_ID_PREFERRED_SUBFAMILY: u16 = 17;
3361
3362        metadata.copyright = get_name_string(&name_data, NAME_ID_COPYRIGHT);
3363        metadata.font_family = get_name_string(&name_data, NAME_ID_FAMILY);
3364        metadata.font_subfamily = get_name_string(&name_data, NAME_ID_SUBFAMILY);
3365        metadata.full_name = get_name_string(&name_data, NAME_ID_FULL_NAME);
3366        metadata.unique_id = get_name_string(&name_data, NAME_ID_UNIQUE_ID);
3367        metadata.version = get_name_string(&name_data, NAME_ID_VERSION);
3368        metadata.postscript_name = get_name_string(&name_data, NAME_ID_POSTSCRIPT_NAME);
3369        metadata.trademark = get_name_string(&name_data, NAME_ID_TRADEMARK);
3370        metadata.manufacturer = get_name_string(&name_data, NAME_ID_MANUFACTURER);
3371        metadata.designer = get_name_string(&name_data, NAME_ID_DESIGNER);
3372        metadata.id_description = get_name_string(&name_data, NAME_ID_DESCRIPTION);
3373        metadata.designer_url = get_name_string(&name_data, NAME_ID_DESIGNER_URL);
3374        metadata.manufacturer_url = get_name_string(&name_data, NAME_ID_VENDOR_URL);
3375        metadata.license = get_name_string(&name_data, NAME_ID_LICENSE);
3376        metadata.license_url = get_name_string(&name_data, NAME_ID_LICENSE_URL);
3377        metadata.preferred_family = get_name_string(&name_data, NAME_ID_PREFERRED_FAMILY);
3378        metadata.preferred_subfamily = get_name_string(&name_data, NAME_ID_PREFERRED_SUBFAMILY);
3379
3380        // One font can support multiple patterns
3381        let mut f_family = None;
3382
3383        let patterns = name_table
3384            .name_records
3385            .iter()
3386            .filter_map(|name_record| {
3387                let name_id = name_record.name_id;
3388                if name_id == FONT_SPECIFIER_FAMILY_ID {
3389                    if let Ok(Some(family)) =
3390                        fontcode_get_name(&name_data, FONT_SPECIFIER_FAMILY_ID)
3391                    {
3392                        f_family = Some(family);
3393                    }
3394                    None
3395                } else if name_id == FONT_SPECIFIER_NAME_ID {
3396                    let family = f_family.as_ref()?;
3397                    let name = fontcode_get_name(&name_data, FONT_SPECIFIER_NAME_ID).ok()??;
3398                    if name.to_bytes().is_empty() {
3399                        None
3400                    } else {
3401                        let mut name_str =
3402                            String::from_utf8_lossy(name.to_bytes()).to_string();
3403                        let mut family_str =
3404                            String::from_utf8_lossy(family.as_bytes()).to_string();
3405                        if name_str.starts_with('.') {
3406                            name_str = name_str[1..].to_string();
3407                        }
3408                        if family_str.starts_with('.') {
3409                            family_str = family_str[1..].to_string();
3410                        }
3411                        Some((
3412                            FcPattern {
3413                                name: Some(name_str),
3414                                family: Some(family_str),
3415                                bold: if is_bold {
3416                                    PatternMatch::True
3417                                } else {
3418                                    PatternMatch::False
3419                                },
3420                                italic: if is_italic {
3421                                    PatternMatch::True
3422                                } else {
3423                                    PatternMatch::False
3424                                },
3425                                oblique: if is_oblique {
3426                                    PatternMatch::True
3427                                } else {
3428                                    PatternMatch::False
3429                                },
3430                                monospace: if is_monospace {
3431                                    PatternMatch::True
3432                                } else {
3433                                    PatternMatch::False
3434                                },
3435                                condensed: if stretch <= FcStretch::Condensed {
3436                                    PatternMatch::True
3437                                } else {
3438                                    PatternMatch::False
3439                                },
3440                                weight,
3441                                stretch,
3442                                unicode_ranges: unicode_ranges.clone(),
3443                                metadata: metadata.clone(),
3444                                render_config: FcFontRenderConfig::default(),
3445                            },
3446                            font_index,
3447                        ))
3448                    }
3449                } else {
3450                    None
3451                }
3452            })
3453            .collect::<BTreeSet<_>>();
3454
3455        results.extend(patterns.into_iter().map(|(pat, idx)| ParsedFontFace {
3456            pattern: pat,
3457            font_index: idx,
3458        }));
3459    }
3460
3461    if results.is_empty() {
3462        None
3463    } else {
3464        Some(results)
3465    }
3466}
3467
3468// Remaining implementation for font scanning, parsing, etc.
3469#[cfg(all(feature = "std", feature = "parsing"))]
3470pub(crate) fn FcParseFont(filepath: &PathBuf) -> Option<Vec<(FcPattern, FcFontPath)>> {
3471    #[cfg(all(not(target_family = "wasm"), feature = "std"))]
3472    use mmapio::MmapOptions;
3473    use std::fs::File;
3474
3475    // Try parsing the font file and see if the postscript name matches
3476    let file = File::open(filepath).ok()?;
3477
3478    #[cfg(all(not(target_family = "wasm"), feature = "std"))]
3479    let font_bytes = unsafe { MmapOptions::new().map(&file).ok()? };
3480
3481    #[cfg(not(all(not(target_family = "wasm"), feature = "std")))]
3482    let font_bytes = std::fs::read(filepath).ok()?;
3483
3484    let faces = parse_font_faces(&font_bytes[..])?;
3485    let path_str = filepath.to_string_lossy().to_string();
3486
3487    Some(
3488        faces
3489            .into_iter()
3490            .map(|face| {
3491                (
3492                    face.pattern,
3493                    FcFontPath {
3494                        path: path_str.clone(),
3495                        font_index: face.font_index,
3496                    },
3497                )
3498            })
3499            .collect(),
3500    )
3501}
3502
3503/// Parse font bytes and extract font patterns for in-memory fonts.
3504///
3505/// This is the public API for parsing in-memory font data to create
3506/// `(FcPattern, FcFont)` tuples that can be added to an `FcFontCache`
3507/// via `with_memory_fonts()`.
3508///
3509/// # Arguments
3510/// * `font_bytes` - The raw bytes of a TrueType/OpenType font file
3511/// * `font_id` - An identifier string for this font (used internally)
3512///
3513/// # Returns
3514/// A vector of `(FcPattern, FcFont)` tuples, one for each font face in the file.
3515/// Returns `None` if the font could not be parsed.
3516///
3517/// # Example
3518/// ```ignore
3519/// use rust_fontconfig::{FcFontCache, FcParseFontBytes};
3520///
3521/// let font_bytes = include_bytes!("path/to/font.ttf");
3522/// let mut cache = FcFontCache::default();
3523///
3524/// if let Some(fonts) = FcParseFontBytes(font_bytes, "MyFont") {
3525///     cache.with_memory_fonts(fonts);
3526/// }
3527/// ```
3528#[cfg(all(feature = "std", feature = "parsing"))]
3529#[allow(non_snake_case)]
3530pub fn FcParseFontBytes(font_bytes: &[u8], font_id: &str) -> Option<Vec<(FcPattern, FcFont)>> {
3531    FcParseFontBytesInner(font_bytes, font_id)
3532}
3533
3534/// Internal implementation for parsing font bytes.
3535/// Delegates to `parse_font_faces` for shared parsing logic and wraps results as `FcFont`.
3536#[cfg(all(feature = "std", feature = "parsing"))]
3537fn FcParseFontBytesInner(font_bytes: &[u8], font_id: &str) -> Option<Vec<(FcPattern, FcFont)>> {
3538    let faces = parse_font_faces(font_bytes)?;
3539    let id = font_id.to_string();
3540    let bytes = font_bytes.to_vec();
3541
3542    Some(
3543        faces
3544            .into_iter()
3545            .map(|face| {
3546                (
3547                    face.pattern,
3548                    FcFont {
3549                        bytes: bytes.clone(),
3550                        font_index: face.font_index,
3551                        id: id.clone(),
3552                    },
3553                )
3554            })
3555            .collect(),
3556    )
3557}
3558
3559#[cfg(all(feature = "std", feature = "parsing"))]
3560fn FcScanDirectoriesInner(paths: &[(Option<String>, String)]) -> Vec<(FcPattern, FcFontPath)> {
3561    #[cfg(feature = "multithreading")]
3562    {
3563        use rayon::prelude::*;
3564
3565        // scan directories in parallel
3566        paths
3567            .par_iter()
3568            .filter_map(|(prefix, p)| {
3569                process_path(prefix, PathBuf::from(p), false).map(FcScanSingleDirectoryRecursive)
3570            })
3571            .flatten()
3572            .collect()
3573    }
3574    #[cfg(not(feature = "multithreading"))]
3575    {
3576        paths
3577            .iter()
3578            .filter_map(|(prefix, p)| {
3579                process_path(prefix, PathBuf::from(p), false).map(FcScanSingleDirectoryRecursive)
3580            })
3581            .flatten()
3582            .collect()
3583    }
3584}
3585
3586/// Recursively collect all files from a directory (no parsing, no allsorts).
3587#[cfg(feature = "std")]
3588fn FcCollectFontFilesRecursive(dir: PathBuf) -> Vec<PathBuf> {
3589    let mut files = Vec::new();
3590    let mut dirs_to_parse = vec![dir];
3591
3592    loop {
3593        let mut new_dirs = Vec::new();
3594        for dir in &dirs_to_parse {
3595            let entries = match std::fs::read_dir(dir) {
3596                Ok(o) => o,
3597                Err(_) => continue,
3598            };
3599            for entry in entries.flatten() {
3600                let path = entry.path();
3601                if path.is_dir() {
3602                    new_dirs.push(path);
3603                } else {
3604                    files.push(path);
3605                }
3606            }
3607        }
3608        if new_dirs.is_empty() {
3609            break;
3610        }
3611        dirs_to_parse = new_dirs;
3612    }
3613
3614    files
3615}
3616
3617#[cfg(all(feature = "std", feature = "parsing"))]
3618fn FcScanSingleDirectoryRecursive(dir: PathBuf) -> Vec<(FcPattern, FcFontPath)> {
3619    let files = FcCollectFontFilesRecursive(dir);
3620    FcParseFontFiles(&files)
3621}
3622
3623#[cfg(all(feature = "std", feature = "parsing"))]
3624fn FcParseFontFiles(files_to_parse: &[PathBuf]) -> Vec<(FcPattern, FcFontPath)> {
3625    let result = {
3626        #[cfg(feature = "multithreading")]
3627        {
3628            use rayon::prelude::*;
3629
3630            files_to_parse
3631                .par_iter()
3632                .filter_map(|file| FcParseFont(file))
3633                .collect::<Vec<Vec<_>>>()
3634        }
3635        #[cfg(not(feature = "multithreading"))]
3636        {
3637            files_to_parse
3638                .iter()
3639                .filter_map(|file| FcParseFont(file))
3640                .collect::<Vec<Vec<_>>>()
3641        }
3642    };
3643
3644    result.into_iter().flat_map(|f| f.into_iter()).collect()
3645}
3646
3647#[cfg(all(feature = "std", feature = "parsing"))]
3648/// Takes a path & prefix and resolves them to a usable path, or `None` if they're unsupported/unavailable.
3649///
3650/// Behaviour is based on: https://www.freedesktop.org/software/fontconfig/fontconfig-user.html
3651fn process_path(
3652    prefix: &Option<String>,
3653    mut path: PathBuf,
3654    is_include_path: bool,
3655) -> Option<PathBuf> {
3656    use std::env::var;
3657
3658    const HOME_SHORTCUT: &str = "~";
3659    const CWD_PATH: &str = ".";
3660
3661    const HOME_ENV_VAR: &str = "HOME";
3662    const XDG_CONFIG_HOME_ENV_VAR: &str = "XDG_CONFIG_HOME";
3663    const XDG_CONFIG_HOME_DEFAULT_PATH_SUFFIX: &str = ".config";
3664    const XDG_DATA_HOME_ENV_VAR: &str = "XDG_DATA_HOME";
3665    const XDG_DATA_HOME_DEFAULT_PATH_SUFFIX: &str = ".local/share";
3666
3667    const PREFIX_CWD: &str = "cwd";
3668    const PREFIX_DEFAULT: &str = "default";
3669    const PREFIX_XDG: &str = "xdg";
3670
3671    // These three could, in theory, be cached, but the work required to do so outweighs the minor benefits
3672    fn get_home_value() -> Option<PathBuf> {
3673        var(HOME_ENV_VAR).ok().map(PathBuf::from)
3674    }
3675    fn get_xdg_config_home_value() -> Option<PathBuf> {
3676        var(XDG_CONFIG_HOME_ENV_VAR)
3677            .ok()
3678            .map(PathBuf::from)
3679            .or_else(|| {
3680                get_home_value()
3681                    .map(|home_path| home_path.join(XDG_CONFIG_HOME_DEFAULT_PATH_SUFFIX))
3682            })
3683    }
3684    fn get_xdg_data_home_value() -> Option<PathBuf> {
3685        var(XDG_DATA_HOME_ENV_VAR)
3686            .ok()
3687            .map(PathBuf::from)
3688            .or_else(|| {
3689                get_home_value().map(|home_path| home_path.join(XDG_DATA_HOME_DEFAULT_PATH_SUFFIX))
3690            })
3691    }
3692
3693    // Resolve the tilde character in the path, if present
3694    if path.starts_with(HOME_SHORTCUT) {
3695        if let Some(home_path) = get_home_value() {
3696            path = home_path.join(
3697                path.strip_prefix(HOME_SHORTCUT)
3698                    .expect("already checked that it starts with the prefix"),
3699            );
3700        } else {
3701            return None;
3702        }
3703    }
3704
3705    // Resolve prefix values
3706    match prefix {
3707        Some(prefix) => match prefix.as_str() {
3708            PREFIX_CWD | PREFIX_DEFAULT => {
3709                let mut new_path = PathBuf::from(CWD_PATH);
3710                new_path.push(path);
3711
3712                Some(new_path)
3713            }
3714            PREFIX_XDG => {
3715                if is_include_path {
3716                    get_xdg_config_home_value()
3717                        .map(|xdg_config_home_path| xdg_config_home_path.join(path))
3718                } else {
3719                    get_xdg_data_home_value()
3720                        .map(|xdg_data_home_path| xdg_data_home_path.join(path))
3721                }
3722            }
3723            _ => None, // Unsupported prefix
3724        },
3725        None => Some(path),
3726    }
3727}
3728
3729// Helper function to extract a string from the name table
3730#[cfg(all(feature = "std", feature = "parsing"))]
3731fn get_name_string(name_data: &[u8], name_id: u16) -> Option<String> {
3732    fontcode_get_name(name_data, name_id)
3733        .ok()
3734        .flatten()
3735        .map(|name| String::from_utf8_lossy(name.to_bytes()).to_string())
3736}
3737
3738/// Representative test codepoints for each Unicode block.
3739/// These are carefully chosen to be actual script characters (not punctuation/symbols)
3740/// that a font claiming to support this script should definitely have.
3741#[cfg(all(feature = "std", feature = "parsing"))]
3742fn get_verification_codepoints(start: u32, end: u32) -> Vec<u32> {
3743    match start {
3744        // Basic Latin - test uppercase, lowercase, and digits
3745        0x0000 => vec!['A' as u32, 'M' as u32, 'Z' as u32, 'a' as u32, 'm' as u32, 'z' as u32],
3746        // Latin-1 Supplement - common accented letters
3747        0x0080 => vec![0x00C0, 0x00C9, 0x00D1, 0x00E0, 0x00E9, 0x00F1], // À É Ñ à é ñ
3748        // Latin Extended-A
3749        0x0100 => vec![0x0100, 0x0110, 0x0141, 0x0152, 0x0160], // Ā Đ Ł Œ Š
3750        // Latin Extended-B
3751        0x0180 => vec![0x0180, 0x01A0, 0x01B0, 0x01CD], // ƀ Ơ ư Ǎ
3752        // IPA Extensions
3753        0x0250 => vec![0x0250, 0x0259, 0x026A, 0x0279], // ɐ ə ɪ ɹ
3754        // Greek and Coptic
3755        0x0370 => vec![0x0391, 0x0392, 0x0393, 0x03B1, 0x03B2, 0x03C9], // Α Β Γ α β ω
3756        // Cyrillic
3757        0x0400 => vec![0x0410, 0x0411, 0x0412, 0x0430, 0x0431, 0x042F], // А Б В а б Я
3758        // Armenian
3759        0x0530 => vec![0x0531, 0x0532, 0x0533, 0x0561, 0x0562], // Ա Բ Գ ա բ
3760        // Hebrew
3761        0x0590 => vec![0x05D0, 0x05D1, 0x05D2, 0x05E9, 0x05EA], // א ב ג ש ת
3762        // Arabic
3763        0x0600 => vec![0x0627, 0x0628, 0x062A, 0x062C, 0x0645], // ا ب ت ج م
3764        // Syriac
3765        0x0700 => vec![0x0710, 0x0712, 0x0713, 0x0715], // ܐ ܒ ܓ ܕ
3766        // Devanagari
3767        0x0900 => vec![0x0905, 0x0906, 0x0915, 0x0916, 0x0939], // अ आ क ख ह
3768        // Bengali
3769        0x0980 => vec![0x0985, 0x0986, 0x0995, 0x0996], // অ আ ক খ
3770        // Gurmukhi
3771        0x0A00 => vec![0x0A05, 0x0A06, 0x0A15, 0x0A16], // ਅ ਆ ਕ ਖ
3772        // Gujarati
3773        0x0A80 => vec![0x0A85, 0x0A86, 0x0A95, 0x0A96], // અ આ ક ખ
3774        // Oriya
3775        0x0B00 => vec![0x0B05, 0x0B06, 0x0B15, 0x0B16], // ଅ ଆ କ ଖ
3776        // Tamil
3777        0x0B80 => vec![0x0B85, 0x0B86, 0x0B95, 0x0BA4], // அ ஆ க த
3778        // Telugu
3779        0x0C00 => vec![0x0C05, 0x0C06, 0x0C15, 0x0C16], // అ ఆ క ఖ
3780        // Kannada
3781        0x0C80 => vec![0x0C85, 0x0C86, 0x0C95, 0x0C96], // ಅ ಆ ಕ ಖ
3782        // Malayalam
3783        0x0D00 => vec![0x0D05, 0x0D06, 0x0D15, 0x0D16], // അ ആ ക ഖ
3784        // Thai
3785        0x0E00 => vec![0x0E01, 0x0E02, 0x0E04, 0x0E07, 0x0E40], // ก ข ค ง เ
3786        // Lao
3787        0x0E80 => vec![0x0E81, 0x0E82, 0x0E84, 0x0E87], // ກ ຂ ຄ ງ
3788        // Myanmar
3789        0x1000 => vec![0x1000, 0x1001, 0x1002, 0x1010, 0x1019], // က ခ ဂ တ မ
3790        // Georgian
3791        0x10A0 => vec![0x10D0, 0x10D1, 0x10D2, 0x10D3], // ა ბ გ დ
3792        // Hangul Jamo
3793        0x1100 => vec![0x1100, 0x1102, 0x1103, 0x1161, 0x1162], // ᄀ ᄂ ᄃ ᅡ ᅢ
3794        // Ethiopic
3795        0x1200 => vec![0x1200, 0x1208, 0x1210, 0x1218], // ሀ ለ ሐ መ
3796        // Cherokee
3797        0x13A0 => vec![0x13A0, 0x13A1, 0x13A2, 0x13A3], // Ꭰ Ꭱ Ꭲ Ꭳ
3798        // Khmer
3799        0x1780 => vec![0x1780, 0x1781, 0x1782, 0x1783], // ក ខ គ ឃ
3800        // Mongolian
3801        0x1800 => vec![0x1820, 0x1821, 0x1822, 0x1823], // ᠠ ᠡ ᠢ ᠣ
3802        // Hiragana
3803        0x3040 => vec![0x3042, 0x3044, 0x3046, 0x304B, 0x304D, 0x3093], // あ い う か き ん
3804        // Katakana
3805        0x30A0 => vec![0x30A2, 0x30A4, 0x30A6, 0x30AB, 0x30AD, 0x30F3], // ア イ ウ カ キ ン
3806        // Bopomofo
3807        0x3100 => vec![0x3105, 0x3106, 0x3107, 0x3108], // ㄅ ㄆ ㄇ ㄈ
3808        // CJK Unified Ideographs - common characters
3809        0x4E00 => vec![0x4E00, 0x4E2D, 0x4EBA, 0x5927, 0x65E5, 0x6708], // 一 中 人 大 日 月
3810        // Hangul Syllables
3811        0xAC00 => vec![0xAC00, 0xAC01, 0xAC04, 0xB098, 0xB2E4], // 가 각 간 나 다
3812        // CJK Compatibility Ideographs
3813        0xF900 => vec![0xF900, 0xF901, 0xF902], // 豈 更 車
3814        // Arabic Presentation Forms-A
3815        0xFB50 => vec![0xFB50, 0xFB51, 0xFB52, 0xFB56], // ﭐ ﭑ ﭒ ﭖ
3816        // Arabic Presentation Forms-B
3817        0xFE70 => vec![0xFE70, 0xFE72, 0xFE74, 0xFE76], // ﹰ ﹲ ﹴ ﹶ
3818        // Halfwidth and Fullwidth Forms
3819        0xFF00 => vec![0xFF01, 0xFF21, 0xFF41, 0xFF61], // ! A a 。
3820        // Default: sample at regular intervals
3821        _ => {
3822            let range_size = end - start;
3823            if range_size > 20 {
3824                vec![
3825                    start + range_size / 5,
3826                    start + 2 * range_size / 5,
3827                    start + 3 * range_size / 5,
3828                    start + 4 * range_size / 5,
3829                ]
3830            } else {
3831                vec![start, start + range_size / 2]
3832            }
3833        }
3834    }
3835}
3836
3837/// Find the best Unicode CMAP subtable from a font provider.
3838/// Tries multiple platform/encoding combinations in priority order.
3839#[cfg(all(feature = "std", feature = "parsing"))]
3840fn find_best_cmap_subtable<'a>(
3841    cmap: &allsorts::tables::cmap::Cmap<'a>,
3842) -> Option<allsorts::tables::cmap::EncodingRecord> {
3843    use allsorts::tables::cmap::{PlatformId, EncodingId};
3844
3845    cmap.find_subtable(PlatformId::UNICODE, EncodingId(3))
3846        .or_else(|| cmap.find_subtable(PlatformId::UNICODE, EncodingId(4)))
3847        .or_else(|| cmap.find_subtable(PlatformId::WINDOWS, EncodingId(1)))
3848        .or_else(|| cmap.find_subtable(PlatformId::WINDOWS, EncodingId(10)))
3849        .or_else(|| cmap.find_subtable(PlatformId::UNICODE, EncodingId(0)))
3850        .or_else(|| cmap.find_subtable(PlatformId::UNICODE, EncodingId(1)))
3851}
3852
3853/// Verify OS/2 reported Unicode ranges against actual CMAP support.
3854/// Returns only ranges that are actually supported by the font's CMAP table.
3855#[cfg(all(feature = "std", feature = "parsing"))]
3856fn verify_unicode_ranges_with_cmap(
3857    provider: &impl FontTableProvider,
3858    os2_ranges: Vec<UnicodeRange>
3859) -> Vec<UnicodeRange> {
3860    use allsorts::tables::cmap::{Cmap, CmapSubtable};
3861
3862    if os2_ranges.is_empty() {
3863        return Vec::new();
3864    }
3865
3866    // Try to get CMAP subtable
3867    let cmap_data = match provider.table_data(tag::CMAP) {
3868        Ok(Some(data)) => data,
3869        _ => return os2_ranges, // Can't verify, trust OS/2
3870    };
3871
3872    let cmap = match ReadScope::new(&cmap_data).read::<Cmap<'_>>() {
3873        Ok(c) => c,
3874        Err(_) => return os2_ranges,
3875    };
3876
3877    let encoding_record = match find_best_cmap_subtable(&cmap) {
3878        Some(r) => r,
3879        None => return os2_ranges, // No suitable subtable, trust OS/2
3880    };
3881
3882    let cmap_subtable = match ReadScope::new(&cmap_data)
3883        .offset(encoding_record.offset as usize)
3884        .read::<CmapSubtable<'_>>()
3885    {
3886        Ok(st) => st,
3887        Err(_) => return os2_ranges,
3888    };
3889
3890    // Verify each range
3891    let mut verified_ranges = Vec::new();
3892
3893    for range in os2_ranges {
3894        let test_codepoints = get_verification_codepoints(range.start, range.end);
3895
3896        // Require at least 50% of test codepoints to have valid glyphs
3897        // This is stricter than before to avoid false positives
3898        let required_hits = (test_codepoints.len() + 1) / 2; // ceil(len/2)
3899        let mut hits = 0;
3900
3901        for cp in test_codepoints {
3902            if cp >= range.start && cp <= range.end {
3903                if let Ok(Some(gid)) = cmap_subtable.map_glyph(cp) {
3904                    if gid != 0 {
3905                        hits += 1;
3906                        if hits >= required_hits {
3907                            break;
3908                        }
3909                    }
3910                }
3911            }
3912        }
3913
3914        if hits >= required_hits {
3915            verified_ranges.push(range);
3916        }
3917    }
3918
3919    verified_ranges
3920}
3921
3922/// Analyze CMAP table to discover font coverage when OS/2 provides no info.
3923/// This is the fallback when OS/2 ulUnicodeRange bits are all zero.
3924#[cfg(all(feature = "std", feature = "parsing"))]
3925fn analyze_cmap_coverage(provider: &impl FontTableProvider) -> Option<Vec<UnicodeRange>> {
3926    use allsorts::tables::cmap::{Cmap, CmapSubtable};
3927
3928    let cmap_data = provider.table_data(tag::CMAP).ok()??;
3929    let cmap = ReadScope::new(&cmap_data).read::<Cmap<'_>>().ok()?;
3930
3931    let encoding_record = find_best_cmap_subtable(&cmap)?;
3932
3933    let cmap_subtable = ReadScope::new(&cmap_data)
3934        .offset(encoding_record.offset as usize)
3935        .read::<CmapSubtable<'_>>()
3936        .ok()?;
3937
3938    // Standard Unicode blocks to probe
3939    let blocks_to_check: &[(u32, u32)] = &[
3940        (0x0000, 0x007F), // Basic Latin
3941        (0x0080, 0x00FF), // Latin-1 Supplement
3942        (0x0100, 0x017F), // Latin Extended-A
3943        (0x0180, 0x024F), // Latin Extended-B
3944        (0x0250, 0x02AF), // IPA Extensions
3945        (0x0300, 0x036F), // Combining Diacritical Marks
3946        (0x0370, 0x03FF), // Greek and Coptic
3947        (0x0400, 0x04FF), // Cyrillic
3948        (0x0500, 0x052F), // Cyrillic Supplement
3949        (0x0530, 0x058F), // Armenian
3950        (0x0590, 0x05FF), // Hebrew
3951        (0x0600, 0x06FF), // Arabic
3952        (0x0700, 0x074F), // Syriac
3953        (0x0900, 0x097F), // Devanagari
3954        (0x0980, 0x09FF), // Bengali
3955        (0x0A00, 0x0A7F), // Gurmukhi
3956        (0x0A80, 0x0AFF), // Gujarati
3957        (0x0B00, 0x0B7F), // Oriya
3958        (0x0B80, 0x0BFF), // Tamil
3959        (0x0C00, 0x0C7F), // Telugu
3960        (0x0C80, 0x0CFF), // Kannada
3961        (0x0D00, 0x0D7F), // Malayalam
3962        (0x0E00, 0x0E7F), // Thai
3963        (0x0E80, 0x0EFF), // Lao
3964        (0x1000, 0x109F), // Myanmar
3965        (0x10A0, 0x10FF), // Georgian
3966        (0x1100, 0x11FF), // Hangul Jamo
3967        (0x1200, 0x137F), // Ethiopic
3968        (0x13A0, 0x13FF), // Cherokee
3969        (0x1780, 0x17FF), // Khmer
3970        (0x1800, 0x18AF), // Mongolian
3971        (0x2000, 0x206F), // General Punctuation
3972        (0x20A0, 0x20CF), // Currency Symbols
3973        (0x2100, 0x214F), // Letterlike Symbols
3974        (0x2190, 0x21FF), // Arrows
3975        (0x2200, 0x22FF), // Mathematical Operators
3976        (0x2500, 0x257F), // Box Drawing
3977        (0x25A0, 0x25FF), // Geometric Shapes
3978        (0x2600, 0x26FF), // Miscellaneous Symbols
3979        (0x3000, 0x303F), // CJK Symbols and Punctuation
3980        (0x3040, 0x309F), // Hiragana
3981        (0x30A0, 0x30FF), // Katakana
3982        (0x3100, 0x312F), // Bopomofo
3983        (0x3130, 0x318F), // Hangul Compatibility Jamo
3984        (0x4E00, 0x9FFF), // CJK Unified Ideographs
3985        (0xAC00, 0xD7AF), // Hangul Syllables
3986        (0xF900, 0xFAFF), // CJK Compatibility Ideographs
3987        (0xFB50, 0xFDFF), // Arabic Presentation Forms-A
3988        (0xFE70, 0xFEFF), // Arabic Presentation Forms-B
3989        (0xFF00, 0xFFEF), // Halfwidth and Fullwidth Forms
3990    ];
3991
3992    let mut ranges = Vec::new();
3993
3994    for &(start, end) in blocks_to_check {
3995        let test_codepoints = get_verification_codepoints(start, end);
3996        let required_hits = (test_codepoints.len() + 1) / 2;
3997        let mut hits = 0;
3998
3999        for cp in test_codepoints {
4000            if let Ok(Some(gid)) = cmap_subtable.map_glyph(cp) {
4001                if gid != 0 {
4002                    hits += 1;
4003                    if hits >= required_hits {
4004                        break;
4005                    }
4006                }
4007            }
4008        }
4009
4010        if hits >= required_hits {
4011            ranges.push(UnicodeRange { start, end });
4012        }
4013    }
4014
4015    if ranges.is_empty() {
4016        None
4017    } else {
4018        Some(ranges)
4019    }
4020}
4021
4022// Helper function to extract unicode ranges (unused, kept for reference)
4023#[cfg(all(feature = "std", feature = "parsing"))]
4024#[allow(dead_code)]
4025fn extract_unicode_ranges(os2_table: &Os2) -> Vec<UnicodeRange> {
4026    let mut unicode_ranges = Vec::new();
4027
4028    let ranges = [
4029        os2_table.ul_unicode_range1,
4030        os2_table.ul_unicode_range2,
4031        os2_table.ul_unicode_range3,
4032        os2_table.ul_unicode_range4,
4033    ];
4034
4035    for &(bit, start, end) in UNICODE_RANGE_MAPPINGS {
4036        let range_idx = bit / 32;
4037        let bit_pos = bit % 32;
4038        if range_idx < 4 && (ranges[range_idx] & (1 << bit_pos)) != 0 {
4039            unicode_ranges.push(UnicodeRange { start, end });
4040        }
4041    }
4042
4043    unicode_ranges
4044}
4045
4046// Helper function to detect if a font is monospace
4047#[cfg(all(feature = "std", feature = "parsing"))]
4048fn detect_monospace(
4049    provider: &impl FontTableProvider,
4050    os2_table: &Os2,
4051    detected_monospace: Option<bool>,
4052) -> Option<bool> {
4053    if let Some(is_monospace) = detected_monospace {
4054        return Some(is_monospace);
4055    }
4056
4057    // Try using PANOSE classification
4058    if os2_table.panose[0] == 2 {
4059        // 2 = Latin Text
4060        return Some(os2_table.panose[3] == 9); // 9 = Monospaced
4061    }
4062
4063    // Check glyph widths in hmtx table
4064    let hhea_data = provider.table_data(tag::HHEA).ok()??;
4065    let hhea_table = ReadScope::new(&hhea_data).read::<HheaTable>().ok()?;
4066    let maxp_data = provider.table_data(tag::MAXP).ok()??;
4067    let maxp_table = ReadScope::new(&maxp_data).read::<MaxpTable>().ok()?;
4068    let hmtx_data = provider.table_data(tag::HMTX).ok()??;
4069    let hmtx_table = ReadScope::new(&hmtx_data)
4070        .read_dep::<HmtxTable<'_>>((
4071            usize::from(maxp_table.num_glyphs),
4072            usize::from(hhea_table.num_h_metrics),
4073        ))
4074        .ok()?;
4075
4076    let mut monospace = true;
4077    let mut last_advance = 0;
4078
4079    // Check if all advance widths are the same
4080    for i in 0..hhea_table.num_h_metrics as usize {
4081        let advance = hmtx_table.h_metrics.read_item(i).ok()?.advance_width;
4082        if i > 0 && advance != last_advance {
4083            monospace = false;
4084            break;
4085        }
4086        last_advance = advance;
4087    }
4088
4089    Some(monospace)
4090}
4091
4092/// Guess font metadata from a filename using the existing tokenizer.
4093///
4094/// Uses [`config::tokenize_font_stem`] and [`config::FONT_STYLE_TOKENS`]
4095/// to extract the family name and detect style hints from the filename.
4096#[cfg(feature = "std")]
4097fn pattern_from_filename(path: &std::path::Path) -> Option<FcPattern> {
4098    let ext = path.extension()?.to_str()?.to_lowercase();
4099    match ext.as_str() {
4100        "ttf" | "otf" | "ttc" | "woff" | "woff2" => {}
4101        _ => return None,
4102    }
4103
4104    let stem = path.file_stem()?.to_str()?;
4105    let all_tokens = crate::config::tokenize_lowercase(stem);
4106
4107    // Style detection: check if any token matches a known style keyword
4108    let has_token = |kw: &str| all_tokens.iter().any(|t| t == kw);
4109    let is_bold = has_token("bold") || has_token("heavy");
4110    let is_italic = has_token("italic");
4111    let is_oblique = has_token("oblique");
4112    let is_mono = has_token("mono") || has_token("monospace");
4113    let is_condensed = has_token("condensed");
4114
4115    // Family = non-style tokens joined
4116    let family_tokens = crate::config::tokenize_font_stem(stem);
4117    if family_tokens.is_empty() { return None; }
4118    let family = family_tokens.join(" ");
4119
4120    Some(FcPattern {
4121        name: Some(stem.to_string()),
4122        family: Some(family),
4123        bold: if is_bold { PatternMatch::True } else { PatternMatch::False },
4124        italic: if is_italic { PatternMatch::True } else { PatternMatch::False },
4125        oblique: if is_oblique { PatternMatch::True } else { PatternMatch::DontCare },
4126        monospace: if is_mono { PatternMatch::True } else { PatternMatch::DontCare },
4127        condensed: if is_condensed { PatternMatch::True } else { PatternMatch::DontCare },
4128        weight: if is_bold { FcWeight::Bold } else { FcWeight::Normal },
4129        stretch: if is_condensed { FcStretch::Condensed } else { FcStretch::Normal },
4130        unicode_ranges: Vec::new(),
4131        metadata: FcFontMetadata::default(),
4132        render_config: FcFontRenderConfig::default(),
4133    })
4134}