rust_fontconfig/
lib.rs

1//! # rust-fontconfig
2//!
3//! Pure-Rust rewrite of the Linux fontconfig library (no system dependencies) - using allsorts as a font parser to support `.woff`, `.woff2`, `.ttc`, `.otf` and `.ttf`
4//!
5//! **NOTE**: Also works on Windows, macOS and WASM - without external dependencies!
6//!
7//! ## Usage
8//!
9//! ### Basic Font Query
10//!
11//! ```rust,no_run
12//! use rust_fontconfig::{FcFontCache, FcPattern};
13//!
14//! fn main() {
15//!     // Build the font cache
16//!     let cache = FcFontCache::build();
17//!
18//!     // Query a font by name
19//!     let results = cache.query(
20//!         &FcPattern {
21//!             name: Some(String::from("Arial")),
22//!             ..Default::default()
23//!         },
24//!         &mut Vec::new() // Trace messages container
25//!     );
26//!
27//!     if let Some(font_match) = results {
28//!         println!("Font match ID: {:?}", font_match.id);
29//!         println!("Font unicode ranges: {:?}", font_match.unicode_ranges);
30//!     } else {
31//!         println!("No matching font found");
32//!     }
33//! }
34//! ```
35//!
36//! ### Resolve Font Chain and Query for Text
37//!
38//! ```rust,no_run
39//! use rust_fontconfig::{FcFontCache, FcWeight, PatternMatch};
40//!
41//! fn main() {
42//!     let cache = FcFontCache::build();
43//!     
44//!     // Build font fallback chain (without text parameter)
45//!     let font_chain = cache.resolve_font_chain(
46//!         &["Arial".to_string(), "sans-serif".to_string()],
47//!         FcWeight::Normal,
48//!         PatternMatch::DontCare,
49//!         PatternMatch::DontCare,
50//!         &mut Vec::new(),
51//!     );
52//!     
53//!     // Query which fonts to use for specific text
54//!     let text = "Hello 你好 Здравствуйте";
55//!     let font_runs = font_chain.query_for_text(&cache, text);
56//!     
57//!     println!("Text split into {} font runs:", font_runs.len());
58//!     for run in font_runs {
59//!         println!("  '{}' -> font {:?}", run.text, run.font_id);
60//!     }
61//! }
62//! ```
63
64#![allow(non_snake_case)]
65#![cfg_attr(not(feature = "std"), no_std)]
66
67extern crate alloc;
68
69#[cfg(all(feature = "std", feature = "parsing"))]
70use alloc::borrow::ToOwned;
71use alloc::collections::btree_map::BTreeMap;
72use alloc::string::{String, ToString};
73use alloc::vec::Vec;
74use alloc::{format, vec};
75#[cfg(feature = "parsing")]
76use allsorts::binary::read::ReadScope;
77#[cfg(all(feature = "std", feature = "parsing"))]
78use allsorts::get_name::fontcode_get_name;
79#[cfg(feature = "parsing")]
80use allsorts::tables::os2::Os2;
81#[cfg(feature = "parsing")]
82use allsorts::tables::{FontTableProvider, HheaTable, HmtxTable, MaxpTable};
83#[cfg(feature = "parsing")]
84use allsorts::tag;
85#[cfg(all(feature = "std", feature = "parsing"))]
86use std::path::PathBuf;
87
88#[cfg(feature = "ffi")]
89pub mod ffi;
90
91/// Operating system type for generic font family resolution
92#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash)]
93pub enum OperatingSystem {
94    Windows,
95    Linux,
96    MacOS,
97    Wasm,
98}
99
100impl OperatingSystem {
101    /// Detect the current operating system at compile time
102    pub fn current() -> Self {
103        #[cfg(target_os = "windows")]
104        return OperatingSystem::Windows;
105        
106        #[cfg(target_os = "linux")]
107        return OperatingSystem::Linux;
108        
109        #[cfg(target_os = "macos")]
110        return OperatingSystem::MacOS;
111        
112        #[cfg(target_family = "wasm")]
113        return OperatingSystem::Wasm;
114        
115        #[cfg(not(any(target_os = "windows", target_os = "linux", target_os = "macos", target_family = "wasm")))]
116        return OperatingSystem::Linux; // Default fallback
117    }
118    
119    /// Get system-specific fonts for the "serif" generic family
120    /// Prioritizes fonts based on Unicode range coverage
121    pub fn get_serif_fonts(&self, unicode_ranges: &[UnicodeRange]) -> Vec<String> {
122        let has_cjk = unicode_ranges.iter().any(|r| {
123            (r.start >= 0x4E00 && r.start <= 0x9FFF) || // CJK Unified Ideographs
124            (r.start >= 0x3040 && r.start <= 0x309F) || // Hiragana
125            (r.start >= 0x30A0 && r.start <= 0x30FF) || // Katakana
126            (r.start >= 0xAC00 && r.start <= 0xD7AF)    // Hangul
127        });
128        
129        let has_arabic = unicode_ranges.iter().any(|r| r.start >= 0x0600 && r.start <= 0x06FF);
130        let _has_cyrillic = unicode_ranges.iter().any(|r| r.start >= 0x0400 && r.start <= 0x04FF);
131        
132        match self {
133            OperatingSystem::Windows => {
134                let mut fonts = Vec::new();
135                if has_cjk {
136                    fonts.extend_from_slice(&["MS Mincho", "SimSun", "MingLiU"]);
137                }
138                if has_arabic {
139                    fonts.push("Traditional Arabic");
140                }
141                fonts.push("Times New Roman");
142                fonts.iter().map(|s| s.to_string()).collect()
143            }
144            OperatingSystem::Linux => {
145                let mut fonts = Vec::new();
146                if has_cjk {
147                    fonts.extend_from_slice(&["Noto Serif CJK SC", "Noto Serif CJK JP", "Noto Serif CJK KR"]);
148                }
149                if has_arabic {
150                    fonts.push("Noto Serif Arabic");
151                }
152                fonts.extend_from_slice(&[
153                    "Times", "Times New Roman", "DejaVu Serif", "Free Serif", 
154                    "Noto Serif", "Bitstream Vera Serif", "Roman", "Regular"
155                ]);
156                fonts.iter().map(|s| s.to_string()).collect()
157            }
158            OperatingSystem::MacOS => {
159                let mut fonts = Vec::new();
160                if has_cjk {
161                    fonts.extend_from_slice(&["Hiragino Mincho ProN", "STSong", "AppleMyungjo"]);
162                }
163                if has_arabic {
164                    fonts.push("Geeza Pro");
165                }
166                fonts.extend_from_slice(&["Times", "New York", "Palatino"]);
167                fonts.iter().map(|s| s.to_string()).collect()
168            }
169            OperatingSystem::Wasm => Vec::new(),
170        }
171    }
172    
173    /// Get system-specific fonts for the "sans-serif" generic family
174    /// Prioritizes fonts based on Unicode range coverage
175    pub fn get_sans_serif_fonts(&self, unicode_ranges: &[UnicodeRange]) -> Vec<String> {
176        let has_cjk = unicode_ranges.iter().any(|r| {
177            (r.start >= 0x4E00 && r.start <= 0x9FFF) || // CJK Unified Ideographs
178            (r.start >= 0x3040 && r.start <= 0x309F) || // Hiragana
179            (r.start >= 0x30A0 && r.start <= 0x30FF) || // Katakana
180            (r.start >= 0xAC00 && r.start <= 0xD7AF)    // Hangul
181        });
182        
183        let has_arabic = unicode_ranges.iter().any(|r| r.start >= 0x0600 && r.start <= 0x06FF);
184        let _has_cyrillic = unicode_ranges.iter().any(|r| r.start >= 0x0400 && r.start <= 0x04FF);
185        let has_hebrew = unicode_ranges.iter().any(|r| r.start >= 0x0590 && r.start <= 0x05FF);
186        let has_thai = unicode_ranges.iter().any(|r| r.start >= 0x0E00 && r.start <= 0x0E7F);
187        
188        match self {
189            OperatingSystem::Windows => {
190                let mut fonts = Vec::new();
191                if has_cjk {
192                    fonts.extend_from_slice(&["Microsoft YaHei", "MS Gothic", "Malgun Gothic", "SimHei"]);
193                }
194                if has_arabic {
195                    fonts.push("Segoe UI Arabic");
196                }
197                if has_hebrew {
198                    fonts.push("Segoe UI Hebrew");
199                }
200                if has_thai {
201                    fonts.push("Leelawadee UI");
202                }
203                fonts.extend_from_slice(&["Segoe UI", "Tahoma", "Microsoft Sans Serif", "MS Sans Serif", "Helv"]);
204                fonts.iter().map(|s| s.to_string()).collect()
205            }
206            OperatingSystem::Linux => {
207                let mut fonts = Vec::new();
208                if has_cjk {
209                    fonts.extend_from_slice(&[
210                        "Noto Sans CJK SC", "Noto Sans CJK JP", "Noto Sans CJK KR",
211                        "WenQuanYi Micro Hei", "Droid Sans Fallback"
212                    ]);
213                }
214                if has_arabic {
215                    fonts.push("Noto Sans Arabic");
216                }
217                if has_hebrew {
218                    fonts.push("Noto Sans Hebrew");
219                }
220                if has_thai {
221                    fonts.push("Noto Sans Thai");
222                }
223                fonts.extend_from_slice(&["Ubuntu", "Arial", "DejaVu Sans", "Noto Sans", "Liberation Sans"]);
224                fonts.iter().map(|s| s.to_string()).collect()
225            }
226            OperatingSystem::MacOS => {
227                let mut fonts = Vec::new();
228                if has_cjk {
229                    fonts.extend_from_slice(&[
230                        "Hiragino Sans", "Hiragino Kaku Gothic ProN", 
231                        "PingFang SC", "PingFang TC", "Apple SD Gothic Neo"
232                    ]);
233                }
234                if has_arabic {
235                    fonts.push("Geeza Pro");
236                }
237                if has_hebrew {
238                    fonts.push("Arial Hebrew");
239                }
240                if has_thai {
241                    fonts.push("Thonburi");
242                }
243                fonts.extend_from_slice(&["San Francisco", "Helvetica Neue", "Lucida Grande"]);
244                fonts.iter().map(|s| s.to_string()).collect()
245            }
246            OperatingSystem::Wasm => Vec::new(),
247        }
248    }
249    
250    /// Get system-specific fonts for the "monospace" generic family
251    /// Prioritizes fonts based on Unicode range coverage
252    pub fn get_monospace_fonts(&self, unicode_ranges: &[UnicodeRange]) -> Vec<String> {
253        let has_cjk = unicode_ranges.iter().any(|r| {
254            (r.start >= 0x4E00 && r.start <= 0x9FFF) || // CJK Unified Ideographs
255            (r.start >= 0x3040 && r.start <= 0x309F) || // Hiragana
256            (r.start >= 0x30A0 && r.start <= 0x30FF) || // Katakana
257            (r.start >= 0xAC00 && r.start <= 0xD7AF)    // Hangul
258        });
259        
260        match self {
261            OperatingSystem::Windows => {
262                let mut fonts = Vec::new();
263                if has_cjk {
264                    fonts.extend_from_slice(&["MS Gothic", "SimHei"]);
265                }
266                fonts.extend_from_slice(&["Segoe UI Mono", "Courier New", "Cascadia Code", "Cascadia Mono", "Consolas"]);
267                fonts.iter().map(|s| s.to_string()).collect()
268            }
269            OperatingSystem::Linux => {
270                let mut fonts = Vec::new();
271                if has_cjk {
272                    fonts.extend_from_slice(&["Noto Sans Mono CJK SC", "Noto Sans Mono CJK JP", "WenQuanYi Zen Hei Mono"]);
273                }
274                fonts.extend_from_slice(&[
275                    "Source Code Pro", "Cantarell", "DejaVu Sans Mono", 
276                    "Roboto Mono", "Ubuntu Monospace", "Droid Sans Mono"
277                ]);
278                fonts.iter().map(|s| s.to_string()).collect()
279            }
280            OperatingSystem::MacOS => {
281                let mut fonts = Vec::new();
282                if has_cjk {
283                    fonts.extend_from_slice(&["Hiragino Sans", "PingFang SC"]);
284                }
285                fonts.extend_from_slice(&["SF Mono", "Menlo", "Monaco", "Courier", "Oxygen Mono", "Source Code Pro", "Fira Mono"]);
286                fonts.iter().map(|s| s.to_string()).collect()
287            }
288            OperatingSystem::Wasm => Vec::new(),
289        }
290    }
291    
292    /// Expand a generic CSS font family to system-specific font names
293    /// Returns the original name if not a generic family
294    /// Prioritizes fonts based on Unicode range coverage
295    pub fn expand_generic_family(&self, family: &str, unicode_ranges: &[UnicodeRange]) -> Vec<String> {
296        match family.to_lowercase().as_str() {
297            "serif" => self.get_serif_fonts(unicode_ranges),
298            "sans-serif" => self.get_sans_serif_fonts(unicode_ranges),
299            "monospace" => self.get_monospace_fonts(unicode_ranges),
300            "cursive" | "fantasy" | "system-ui" => {
301                // Use sans-serif as fallback for these
302                self.get_sans_serif_fonts(unicode_ranges)
303            }
304            _ => vec![family.to_string()],
305        }
306    }
307}
308
309/// Expand a CSS font-family stack with generic families resolved to OS-specific fonts
310/// Prioritizes fonts based on Unicode range coverage
311/// Example: ["Arial", "sans-serif"] on macOS with CJK ranges -> ["Arial", "PingFang SC", "Hiragino Sans", ...]
312pub fn expand_font_families(families: &[String], os: OperatingSystem, unicode_ranges: &[UnicodeRange]) -> Vec<String> {
313    let mut expanded = Vec::new();
314    
315    for family in families {
316        expanded.extend(os.expand_generic_family(family, unicode_ranges));
317    }
318    
319    expanded
320}
321
322/// UUID to identify a font (collections are broken up into separate fonts)
323#[derive(Clone, Copy, PartialOrd, Ord, PartialEq, Eq, Hash)]
324pub struct FontId(pub u128);
325
326impl core::fmt::Debug for FontId {
327    fn fmt(&self, f: &mut core::fmt::Formatter<'_>) -> core::fmt::Result {
328        core::fmt::Display::fmt(self, f)
329    }
330}
331
332impl core::fmt::Display for FontId {
333    fn fmt(&self, f: &mut core::fmt::Formatter<'_>) -> core::fmt::Result {
334        let id = self.0;
335        write!(
336            f,
337            "{:08x}-{:04x}-{:04x}-{:04x}-{:012x}",
338            (id >> 96) & 0xFFFFFFFF,
339            (id >> 80) & 0xFFFF,
340            (id >> 64) & 0xFFFF,
341            (id >> 48) & 0xFFFF,
342            id & 0xFFFFFFFFFFFF
343        )
344    }
345}
346
347impl FontId {
348    /// Generate a new pseudo-UUID without external dependencies
349    pub fn new() -> Self {
350        #[cfg(feature = "std")]
351        {
352            use std::time::{SystemTime, UNIX_EPOCH};
353            let now = SystemTime::now()
354                .duration_since(UNIX_EPOCH)
355                .unwrap_or_default();
356
357            let time_part = now.as_nanos();
358            let random_part = {
359                // Simple PRNG based on time
360                let seed = now.as_secs() as u64;
361                let a = 6364136223846793005u64;
362                let c = 1442695040888963407u64;
363                let r = a.wrapping_mul(seed).wrapping_add(c);
364                r as u64
365            };
366
367            // Combine time and random parts
368            let id = (time_part & 0xFFFFFFFFFFFFFFFFu128) | ((random_part as u128) << 64);
369            FontId(id)
370        }
371
372        #[cfg(not(feature = "std"))]
373        {
374            // For no_std contexts, just use a counter
375            static mut COUNTER: u128 = 0;
376            let id = unsafe {
377                COUNTER += 1;
378                COUNTER
379            };
380            FontId(id)
381        }
382    }
383}
384
385/// Whether a field is required to match (yes / no / don't care)
386#[derive(Debug, Default, Copy, Clone, PartialOrd, Ord, PartialEq, Eq, Hash)]
387#[repr(C)]
388pub enum PatternMatch {
389    /// Default: don't particularly care whether the requirement matches
390    #[default]
391    DontCare,
392    /// Requirement has to be true for the selected font
393    True,
394    /// Requirement has to be false for the selected font
395    False,
396}
397
398impl PatternMatch {
399    fn needs_to_match(&self) -> bool {
400        matches!(self, PatternMatch::True | PatternMatch::False)
401    }
402
403    fn matches(&self, other: &PatternMatch) -> bool {
404        match (self, other) {
405            (PatternMatch::DontCare, _) => true,
406            (_, PatternMatch::DontCare) => true,
407            (a, b) => a == b,
408        }
409    }
410}
411
412/// Font weight values as defined in CSS specification
413#[derive(Debug, Clone, Copy, PartialOrd, Ord, PartialEq, Eq, Hash)]
414#[repr(C)]
415pub enum FcWeight {
416    Thin = 100,
417    ExtraLight = 200,
418    Light = 300,
419    Normal = 400,
420    Medium = 500,
421    SemiBold = 600,
422    Bold = 700,
423    ExtraBold = 800,
424    Black = 900,
425}
426
427impl FcWeight {
428    pub fn from_u16(weight: u16) -> Self {
429        match weight {
430            0..=149 => FcWeight::Thin,
431            150..=249 => FcWeight::ExtraLight,
432            250..=349 => FcWeight::Light,
433            350..=449 => FcWeight::Normal,
434            450..=549 => FcWeight::Medium,
435            550..=649 => FcWeight::SemiBold,
436            650..=749 => FcWeight::Bold,
437            750..=849 => FcWeight::ExtraBold,
438            _ => FcWeight::Black,
439        }
440    }
441
442    pub fn find_best_match(&self, available: &[FcWeight]) -> Option<FcWeight> {
443        if available.is_empty() {
444            return None;
445        }
446
447        // Exact match
448        if available.contains(self) {
449            return Some(*self);
450        }
451
452        // Get numeric value
453        let self_value = *self as u16;
454
455        match *self {
456            FcWeight::Normal => {
457                // For Normal (400), try Medium (500) first
458                if available.contains(&FcWeight::Medium) {
459                    return Some(FcWeight::Medium);
460                }
461                // Then try lighter weights
462                for weight in &[FcWeight::Light, FcWeight::ExtraLight, FcWeight::Thin] {
463                    if available.contains(weight) {
464                        return Some(*weight);
465                    }
466                }
467                // Last, try heavier weights
468                for weight in &[
469                    FcWeight::SemiBold,
470                    FcWeight::Bold,
471                    FcWeight::ExtraBold,
472                    FcWeight::Black,
473                ] {
474                    if available.contains(weight) {
475                        return Some(*weight);
476                    }
477                }
478            }
479            FcWeight::Medium => {
480                // For Medium (500), try Normal (400) first
481                if available.contains(&FcWeight::Normal) {
482                    return Some(FcWeight::Normal);
483                }
484                // Then try lighter weights
485                for weight in &[FcWeight::Light, FcWeight::ExtraLight, FcWeight::Thin] {
486                    if available.contains(weight) {
487                        return Some(*weight);
488                    }
489                }
490                // Last, try heavier weights
491                for weight in &[
492                    FcWeight::SemiBold,
493                    FcWeight::Bold,
494                    FcWeight::ExtraBold,
495                    FcWeight::Black,
496                ] {
497                    if available.contains(weight) {
498                        return Some(*weight);
499                    }
500                }
501            }
502            FcWeight::Thin | FcWeight::ExtraLight | FcWeight::Light => {
503                // For lightweight fonts (<400), first try lighter or equal weights
504                let mut best_match = None;
505                let mut smallest_diff = u16::MAX;
506
507                // Find the closest lighter weight
508                for weight in available {
509                    let weight_value = *weight as u16;
510                    // Only consider weights <= self (per test expectation)
511                    if weight_value <= self_value {
512                        let diff = self_value - weight_value;
513                        if diff < smallest_diff {
514                            smallest_diff = diff;
515                            best_match = Some(*weight);
516                        }
517                    }
518                }
519
520                if best_match.is_some() {
521                    return best_match;
522                }
523
524                // If no lighter weight, find the closest heavier weight
525                best_match = None;
526                smallest_diff = u16::MAX;
527
528                for weight in available {
529                    let weight_value = *weight as u16;
530                    if weight_value > self_value {
531                        let diff = weight_value - self_value;
532                        if diff < smallest_diff {
533                            smallest_diff = diff;
534                            best_match = Some(*weight);
535                        }
536                    }
537                }
538
539                return best_match;
540            }
541            FcWeight::SemiBold | FcWeight::Bold | FcWeight::ExtraBold | FcWeight::Black => {
542                // For heavyweight fonts (>500), first try heavier or equal weights
543                let mut best_match = None;
544                let mut smallest_diff = u16::MAX;
545
546                // Find the closest heavier weight
547                for weight in available {
548                    let weight_value = *weight as u16;
549                    // Only consider weights >= self
550                    if weight_value >= self_value {
551                        let diff = weight_value - self_value;
552                        if diff < smallest_diff {
553                            smallest_diff = diff;
554                            best_match = Some(*weight);
555                        }
556                    }
557                }
558
559                if best_match.is_some() {
560                    return best_match;
561                }
562
563                // If no heavier weight, find the closest lighter weight
564                best_match = None;
565                smallest_diff = u16::MAX;
566
567                for weight in available {
568                    let weight_value = *weight as u16;
569                    if weight_value < self_value {
570                        let diff = self_value - weight_value;
571                        if diff < smallest_diff {
572                            smallest_diff = diff;
573                            best_match = Some(*weight);
574                        }
575                    }
576                }
577
578                return best_match;
579            }
580        }
581
582        // If nothing matches by now, return the first available weight
583        Some(available[0])
584    }
585}
586
587impl Default for FcWeight {
588    fn default() -> Self {
589        FcWeight::Normal
590    }
591}
592
593/// CSS font-stretch values
594#[derive(Debug, Clone, Copy, PartialOrd, Ord, PartialEq, Eq, Hash)]
595#[repr(C)]
596pub enum FcStretch {
597    UltraCondensed = 1,
598    ExtraCondensed = 2,
599    Condensed = 3,
600    SemiCondensed = 4,
601    Normal = 5,
602    SemiExpanded = 6,
603    Expanded = 7,
604    ExtraExpanded = 8,
605    UltraExpanded = 9,
606}
607
608impl FcStretch {
609    pub fn is_condensed(&self) -> bool {
610        use self::FcStretch::*;
611        match self {
612            UltraCondensed => true,
613            ExtraCondensed => true,
614            Condensed => true,
615            SemiCondensed => true,
616            Normal => false,
617            SemiExpanded => false,
618            Expanded => false,
619            ExtraExpanded => false,
620            UltraExpanded => false,
621        }
622    }
623    pub fn from_u16(width_class: u16) -> Self {
624        match width_class {
625            1 => FcStretch::UltraCondensed,
626            2 => FcStretch::ExtraCondensed,
627            3 => FcStretch::Condensed,
628            4 => FcStretch::SemiCondensed,
629            5 => FcStretch::Normal,
630            6 => FcStretch::SemiExpanded,
631            7 => FcStretch::Expanded,
632            8 => FcStretch::ExtraExpanded,
633            9 => FcStretch::UltraExpanded,
634            _ => FcStretch::Normal,
635        }
636    }
637
638    /// Follows CSS spec for stretch matching
639    pub fn find_best_match(&self, available: &[FcStretch]) -> Option<FcStretch> {
640        if available.is_empty() {
641            return None;
642        }
643
644        if available.contains(self) {
645            return Some(*self);
646        }
647
648        // For 'normal' or condensed values, narrower widths are checked first, then wider values
649        if *self <= FcStretch::Normal {
650            // Find narrower values first
651            let mut closest_narrower = None;
652            for stretch in available.iter() {
653                if *stretch < *self
654                    && (closest_narrower.is_none() || *stretch > closest_narrower.unwrap())
655                {
656                    closest_narrower = Some(*stretch);
657                }
658            }
659
660            if closest_narrower.is_some() {
661                return closest_narrower;
662            }
663
664            // Otherwise, find wider values
665            let mut closest_wider = None;
666            for stretch in available.iter() {
667                if *stretch > *self
668                    && (closest_wider.is_none() || *stretch < closest_wider.unwrap())
669                {
670                    closest_wider = Some(*stretch);
671                }
672            }
673
674            return closest_wider;
675        } else {
676            // For expanded values, wider values are checked first, then narrower values
677            let mut closest_wider = None;
678            for stretch in available.iter() {
679                if *stretch > *self
680                    && (closest_wider.is_none() || *stretch < closest_wider.unwrap())
681                {
682                    closest_wider = Some(*stretch);
683                }
684            }
685
686            if closest_wider.is_some() {
687                return closest_wider;
688            }
689
690            // Otherwise, find narrower values
691            let mut closest_narrower = None;
692            for stretch in available.iter() {
693                if *stretch < *self
694                    && (closest_narrower.is_none() || *stretch > closest_narrower.unwrap())
695                {
696                    closest_narrower = Some(*stretch);
697                }
698            }
699
700            return closest_narrower;
701        }
702    }
703}
704
705impl Default for FcStretch {
706    fn default() -> Self {
707        FcStretch::Normal
708    }
709}
710
711/// Unicode range representation for font matching
712#[derive(Debug, Clone, Copy, PartialEq, Eq, PartialOrd, Ord, Hash)]
713pub struct UnicodeRange {
714    pub start: u32,
715    pub end: u32,
716}
717
718impl UnicodeRange {
719    pub fn contains(&self, c: char) -> bool {
720        let c = c as u32;
721        c >= self.start && c <= self.end
722    }
723
724    pub fn overlaps(&self, other: &UnicodeRange) -> bool {
725        self.start <= other.end && other.start <= self.end
726    }
727
728    pub fn is_subset_of(&self, other: &UnicodeRange) -> bool {
729        self.start >= other.start && self.end <= other.end
730    }
731}
732
733/// Log levels for trace messages
734#[derive(Debug, Clone, Copy, PartialOrd, Ord, PartialEq, Eq, Hash)]
735pub enum TraceLevel {
736    Debug,
737    Info,
738    Warning,
739    Error,
740}
741
742/// Reason for font matching failure or success
743#[derive(Debug, Clone, PartialEq, Eq, Hash)]
744pub enum MatchReason {
745    NameMismatch {
746        requested: Option<String>,
747        found: Option<String>,
748    },
749    FamilyMismatch {
750        requested: Option<String>,
751        found: Option<String>,
752    },
753    StyleMismatch {
754        property: &'static str,
755        requested: String,
756        found: String,
757    },
758    WeightMismatch {
759        requested: FcWeight,
760        found: FcWeight,
761    },
762    StretchMismatch {
763        requested: FcStretch,
764        found: FcStretch,
765    },
766    UnicodeRangeMismatch {
767        character: char,
768        ranges: Vec<UnicodeRange>,
769    },
770    Success,
771}
772
773/// Trace message for debugging font matching
774#[derive(Debug, Clone, PartialEq, Eq)]
775pub struct TraceMsg {
776    pub level: TraceLevel,
777    pub path: String,
778    pub reason: MatchReason,
779}
780
781/// Font pattern for matching
782#[derive(Default, Clone, PartialOrd, Ord, PartialEq, Eq)]
783#[repr(C)]
784pub struct FcPattern {
785    // font name
786    pub name: Option<String>,
787    // family name
788    pub family: Option<String>,
789    // "italic" property
790    pub italic: PatternMatch,
791    // "oblique" property
792    pub oblique: PatternMatch,
793    // "bold" property
794    pub bold: PatternMatch,
795    // "monospace" property
796    pub monospace: PatternMatch,
797    // "condensed" property
798    pub condensed: PatternMatch,
799    // font weight
800    pub weight: FcWeight,
801    // font stretch
802    pub stretch: FcStretch,
803    // unicode ranges to match
804    pub unicode_ranges: Vec<UnicodeRange>,
805    // extended font metadata
806    pub metadata: FcFontMetadata,
807}
808
809impl core::fmt::Debug for FcPattern {
810    fn fmt(&self, f: &mut core::fmt::Formatter<'_>) -> core::fmt::Result {
811        let mut d = f.debug_struct("FcPattern");
812
813        if let Some(name) = &self.name {
814            d.field("name", name);
815        }
816
817        if let Some(family) = &self.family {
818            d.field("family", family);
819        }
820
821        if self.italic != PatternMatch::DontCare {
822            d.field("italic", &self.italic);
823        }
824
825        if self.oblique != PatternMatch::DontCare {
826            d.field("oblique", &self.oblique);
827        }
828
829        if self.bold != PatternMatch::DontCare {
830            d.field("bold", &self.bold);
831        }
832
833        if self.monospace != PatternMatch::DontCare {
834            d.field("monospace", &self.monospace);
835        }
836
837        if self.condensed != PatternMatch::DontCare {
838            d.field("condensed", &self.condensed);
839        }
840
841        if self.weight != FcWeight::Normal {
842            d.field("weight", &self.weight);
843        }
844
845        if self.stretch != FcStretch::Normal {
846            d.field("stretch", &self.stretch);
847        }
848
849        if !self.unicode_ranges.is_empty() {
850            d.field("unicode_ranges", &self.unicode_ranges);
851        }
852
853        // Only show non-empty metadata fields
854        let empty_metadata = FcFontMetadata::default();
855        if self.metadata != empty_metadata {
856            d.field("metadata", &self.metadata);
857        }
858
859        d.finish()
860    }
861}
862
863/// Font metadata from the OS/2 table
864#[derive(Debug, Default, Clone, PartialEq, Eq, PartialOrd, Ord)]
865pub struct FcFontMetadata {
866    pub copyright: Option<String>,
867    pub designer: Option<String>,
868    pub designer_url: Option<String>,
869    pub font_family: Option<String>,
870    pub font_subfamily: Option<String>,
871    pub full_name: Option<String>,
872    pub id_description: Option<String>,
873    pub license: Option<String>,
874    pub license_url: Option<String>,
875    pub manufacturer: Option<String>,
876    pub manufacturer_url: Option<String>,
877    pub postscript_name: Option<String>,
878    pub preferred_family: Option<String>,
879    pub preferred_subfamily: Option<String>,
880    pub trademark: Option<String>,
881    pub unique_id: Option<String>,
882    pub version: Option<String>,
883}
884
885impl FcPattern {
886    /// Check if this pattern would match the given character
887    pub fn contains_char(&self, c: char) -> bool {
888        if self.unicode_ranges.is_empty() {
889            return true; // No ranges specified means match all characters
890        }
891
892        for range in &self.unicode_ranges {
893            if range.contains(c) {
894                return true;
895            }
896        }
897
898        false
899    }
900}
901
902/// Font match result with UUID
903#[derive(Debug, Clone, PartialEq, Eq)]
904pub struct FontMatch {
905    pub id: FontId,
906    pub unicode_ranges: Vec<UnicodeRange>,
907    pub fallbacks: Vec<FontMatchNoFallback>,
908}
909
910/// Font match result with UUID (without fallback)
911#[derive(Debug, Clone, PartialEq, Eq)]
912pub struct FontMatchNoFallback {
913    pub id: FontId,
914    pub unicode_ranges: Vec<UnicodeRange>,
915}
916
917/// A run of text that uses the same font
918/// Returned by FontFallbackChain::query_for_text()
919#[derive(Debug, Clone, PartialEq, Eq)]
920pub struct ResolvedFontRun {
921    /// The text content of this run
922    pub text: String,
923    /// Start byte index in the original text
924    pub start_byte: usize,
925    /// End byte index in the original text (exclusive)
926    pub end_byte: usize,
927    /// The font to use for this run (None if no font found)
928    pub font_id: Option<FontId>,
929    /// Which CSS font-family this came from
930    pub css_source: String,
931}
932
933/// Resolved font fallback chain for a CSS font-family stack
934/// This represents the complete chain of fonts to use for rendering text
935#[derive(Debug, Clone, PartialEq, Eq)]
936pub struct FontFallbackChain {
937    /// CSS-based fallbacks: Each CSS font expanded to its system fallbacks
938    /// Example: ["NotoSansJP" -> [Hiragino Sans, PingFang SC], "sans-serif" -> [Helvetica]]
939    pub css_fallbacks: Vec<CssFallbackGroup>,
940    
941    /// Unicode-based fallbacks: Fonts added to cover missing Unicode ranges
942    /// Only populated if css_fallbacks don't cover all requested characters
943    pub unicode_fallbacks: Vec<FontMatch>,
944    
945    /// The original CSS font-family stack that was requested
946    pub original_stack: Vec<String>,
947}
948
949impl FontFallbackChain {
950    /// Resolve which font should be used for a specific character
951    /// Returns (FontId, css_source_name) where css_source_name indicates which CSS font matched
952    /// Returns None if no font in the chain can render this character
953    pub fn resolve_char(&self, cache: &FcFontCache, ch: char) -> Option<(FontId, String)> {
954        let codepoint = ch as u32;
955        
956        // First check CSS fallbacks in order
957        for group in &self.css_fallbacks {
958            for font in &group.fonts {
959                if let Some(meta) = cache.get_metadata_by_id(&font.id) {
960                    // Check if this font's unicode ranges cover the character
961                    if meta.unicode_ranges.is_empty() {
962                        // Font has no unicode range info - skip it, don't assume it covers everything
963                        // This is important because fonts that don't properly declare their ranges
964                        // should not be used as a catch-all
965                        continue;
966                    } else {
967                        // Check if character is in any of the font's ranges
968                        for range in &meta.unicode_ranges {
969                            if codepoint >= range.start && codepoint <= range.end {
970                                return Some((font.id, group.css_name.clone()));
971                            }
972                        }
973                        // Character not in any range - continue to next font
974                    }
975                }
976            }
977        }
978        
979        // If not found in CSS fallbacks, check Unicode fallbacks
980        for font in &self.unicode_fallbacks {
981            if let Some(meta) = cache.get_metadata_by_id(&font.id) {
982                // Check if this font's unicode ranges cover the character
983                for range in &meta.unicode_ranges {
984                    if codepoint >= range.start && codepoint <= range.end {
985                        return Some((font.id, "(unicode-fallback)".to_string()));
986                    }
987                }
988            }
989        }
990        
991        None
992    }
993    
994    /// Resolve all characters in a text string to their fonts
995    /// Returns a vector of (character, FontId, css_source) tuples
996    pub fn resolve_text(&self, cache: &FcFontCache, text: &str) -> Vec<(char, Option<(FontId, String)>)> {
997        text.chars()
998            .map(|ch| (ch, self.resolve_char(cache, ch)))
999            .collect()
1000    }
1001    
1002    /// Query which fonts should be used for a text string, grouped by font
1003    /// Returns runs of consecutive characters that use the same font
1004    /// This is the main API for text shaping - call this to get font runs, then shape each run
1005    pub fn query_for_text(&self, cache: &FcFontCache, text: &str) -> Vec<ResolvedFontRun> {
1006        if text.is_empty() {
1007            return Vec::new();
1008        }
1009        
1010        let mut runs: Vec<ResolvedFontRun> = Vec::new();
1011        let mut current_font: Option<FontId> = None;
1012        let mut current_css_source: Option<String> = None;
1013        let mut current_start_byte: usize = 0;
1014        
1015        for (byte_idx, ch) in text.char_indices() {
1016            let resolved = self.resolve_char(cache, ch);
1017            let (font_id, css_source) = match &resolved {
1018                Some((id, source)) => (Some(*id), Some(source.clone())),
1019                None => (None, None),
1020            };
1021            
1022            // Check if we need to start a new run
1023            let font_changed = font_id != current_font;
1024            
1025            if font_changed && byte_idx > 0 {
1026                // Finalize the current run
1027                let run_text = &text[current_start_byte..byte_idx];
1028                runs.push(ResolvedFontRun {
1029                    text: run_text.to_string(),
1030                    start_byte: current_start_byte,
1031                    end_byte: byte_idx,
1032                    font_id: current_font,
1033                    css_source: current_css_source.clone().unwrap_or_default(),
1034                });
1035                current_start_byte = byte_idx;
1036            }
1037            
1038            current_font = font_id;
1039            current_css_source = css_source;
1040        }
1041        
1042        // Finalize the last run
1043        if current_start_byte < text.len() {
1044            let run_text = &text[current_start_byte..];
1045            runs.push(ResolvedFontRun {
1046                text: run_text.to_string(),
1047                start_byte: current_start_byte,
1048                end_byte: text.len(),
1049                font_id: current_font,
1050                css_source: current_css_source.unwrap_or_default(),
1051            });
1052        }
1053        
1054        runs
1055    }
1056}
1057
1058/// A group of fonts that are fallbacks for a single CSS font-family name
1059#[derive(Debug, Clone, PartialEq, Eq)]
1060pub struct CssFallbackGroup {
1061    /// The CSS font name (e.g., "NotoSansJP", "sans-serif")
1062    pub css_name: String,
1063    
1064    /// System fonts that match this CSS name
1065    /// First font in list is the best match
1066    pub fonts: Vec<FontMatch>,
1067}
1068
1069/// Cache key for font fallback chain queries
1070/// 
1071/// IMPORTANT: This key intentionally does NOT include unicode_ranges.
1072/// Font chains should be cached by CSS properties only, not by text content.
1073/// Different texts with the same CSS font-stack should share the same chain.
1074#[cfg(feature = "std")]
1075#[derive(Debug, Clone, PartialEq, Eq, Hash)]
1076struct FontChainCacheKey {
1077    /// CSS font stack (expanded to OS-specific fonts)
1078    font_families: Vec<String>,
1079    /// Font weight
1080    weight: FcWeight,
1081    /// Font style flags
1082    italic: PatternMatch,
1083    oblique: PatternMatch,
1084}
1085
1086/// Path to a font file
1087#[derive(Debug, Clone, PartialOrd, Ord, PartialEq, Eq)]
1088#[repr(C)]
1089pub struct FcFontPath {
1090    pub path: String,
1091    pub font_index: usize,
1092}
1093
1094/// In-memory font data
1095#[derive(Debug, Clone, PartialEq, Eq)]
1096#[repr(C)]
1097pub struct FcFont {
1098    pub bytes: Vec<u8>,
1099    pub font_index: usize,
1100    pub id: String, // For identification in tests
1101}
1102
1103/// Font source enum to represent either disk or memory fonts
1104#[derive(Debug, Clone)]
1105pub enum FontSource<'a> {
1106    /// Font loaded from memory
1107    Memory(&'a FcFont),
1108    /// Font loaded from disk
1109    Disk(&'a FcFontPath),
1110}
1111
1112/// Font cache, initialized at startup
1113#[derive(Debug)]
1114pub struct FcFontCache {
1115    // Pattern to FontId mapping (query index)
1116    patterns: BTreeMap<FcPattern, FontId>,
1117    // On-disk font paths
1118    disk_fonts: BTreeMap<FontId, FcFontPath>,
1119    // In-memory fonts
1120    memory_fonts: BTreeMap<FontId, FcFont>,
1121    // Metadata cache (patterns stored by ID for quick lookup)
1122    metadata: BTreeMap<FontId, FcPattern>,
1123    // Token index: maps lowercase tokens ("noto", "sans", "jp") to sets of FontIds
1124    // This enables fast fuzzy search by intersecting token sets
1125    token_index: BTreeMap<String, alloc::collections::BTreeSet<FontId>>,
1126    // Pre-tokenized font names (lowercase): FontId -> Vec<lowercase tokens>
1127    // Avoids re-tokenization during fuzzy search
1128    font_tokens: BTreeMap<FontId, Vec<String>>,
1129    // Font fallback chain cache (CSS stack + unicode -> resolved chain)
1130    #[cfg(feature = "std")]
1131    chain_cache: std::sync::Mutex<std::collections::HashMap<FontChainCacheKey, FontFallbackChain>>,
1132}
1133
1134impl Clone for FcFontCache {
1135    fn clone(&self) -> Self {
1136        Self {
1137            patterns: self.patterns.clone(),
1138            disk_fonts: self.disk_fonts.clone(),
1139            memory_fonts: self.memory_fonts.clone(),
1140            metadata: self.metadata.clone(),
1141            token_index: self.token_index.clone(),
1142            font_tokens: self.font_tokens.clone(),
1143            #[cfg(feature = "std")]
1144            chain_cache: std::sync::Mutex::new(std::collections::HashMap::new()), // Empty cache for cloned instance
1145        }
1146    }
1147}
1148
1149impl Default for FcFontCache {
1150    fn default() -> Self {
1151        Self {
1152            patterns: BTreeMap::new(),
1153            disk_fonts: BTreeMap::new(),
1154            memory_fonts: BTreeMap::new(),
1155            metadata: BTreeMap::new(),
1156            token_index: BTreeMap::new(),
1157            font_tokens: BTreeMap::new(),
1158            #[cfg(feature = "std")]
1159            chain_cache: std::sync::Mutex::new(std::collections::HashMap::new()),
1160        }
1161    }
1162}
1163
1164impl FcFontCache {
1165    /// Helper method to add a font pattern to the token index
1166    fn index_pattern_tokens(&mut self, pattern: &FcPattern, id: FontId) {
1167        // Extract tokens from both name and family
1168        let mut all_tokens = Vec::new();
1169        
1170        if let Some(name) = &pattern.name {
1171            all_tokens.extend(Self::extract_font_name_tokens(name));
1172        }
1173        
1174        if let Some(family) = &pattern.family {
1175            all_tokens.extend(Self::extract_font_name_tokens(family));
1176        }
1177        
1178        // Convert tokens to lowercase and store them
1179        let tokens_lower: Vec<String> = all_tokens.iter().map(|t| t.to_lowercase()).collect();
1180        
1181        // Add each token (lowercase) to the index
1182        for token_lower in &tokens_lower {
1183            self.token_index
1184                .entry(token_lower.clone())
1185                .or_insert_with(alloc::collections::BTreeSet::new)
1186                .insert(id);
1187        }
1188        
1189        // Store pre-tokenized font name for fast lookup (no re-tokenization needed)
1190        self.font_tokens.insert(id, tokens_lower);
1191    }
1192
1193    /// Adds in-memory font files
1194    pub fn with_memory_fonts(&mut self, fonts: Vec<(FcPattern, FcFont)>) -> &mut Self {
1195        for (pattern, font) in fonts {
1196            let id = FontId::new();
1197            self.patterns.insert(pattern.clone(), id);
1198            self.metadata.insert(id, pattern.clone());
1199            self.memory_fonts.insert(id, font);
1200            self.index_pattern_tokens(&pattern, id);
1201        }
1202        self
1203    }
1204
1205    /// Adds a memory font with a specific ID (for testing)
1206    pub fn with_memory_font_with_id(
1207        &mut self,
1208        id: FontId,
1209        pattern: FcPattern,
1210        font: FcFont,
1211    ) -> &mut Self {
1212        self.patterns.insert(pattern.clone(), id);
1213        self.metadata.insert(id, pattern.clone());
1214        self.memory_fonts.insert(id, font);
1215        self.index_pattern_tokens(&pattern, id);
1216        self
1217    }
1218
1219    /// Get font data for a given font ID
1220    pub fn get_font_by_id<'a>(&'a self, id: &FontId) -> Option<FontSource<'a>> {
1221        // Check memory fonts first
1222        if let Some(font) = self.memory_fonts.get(id) {
1223            return Some(FontSource::Memory(font));
1224        }
1225        // Then check disk fonts
1226        if let Some(path) = self.disk_fonts.get(id) {
1227            return Some(FontSource::Disk(path));
1228        }
1229        None
1230    }
1231
1232    /// Get metadata directly from an ID
1233    pub fn get_metadata_by_id(&self, id: &FontId) -> Option<&FcPattern> {
1234        self.metadata.get(id)
1235    }
1236
1237    /// Get font bytes (either from disk or memory)
1238    #[cfg(feature = "std")]
1239    pub fn get_font_bytes(&self, id: &FontId) -> Option<Vec<u8>> {
1240        match self.get_font_by_id(id)? {
1241            FontSource::Memory(font) => {
1242                Some(font.bytes.clone())
1243            }
1244            FontSource::Disk(path) => {
1245                std::fs::read(&path.path).ok()
1246            }
1247        }
1248    }
1249
1250    /// Builds a new font cache
1251    #[cfg(not(all(feature = "std", feature = "parsing")))]
1252    pub fn build() -> Self {
1253        Self::default()
1254    }
1255
1256    /// Builds a new font cache from all fonts discovered on the system
1257    #[cfg(all(feature = "std", feature = "parsing"))]
1258    pub fn build() -> Self {
1259        let mut cache = FcFontCache::default();
1260
1261        #[cfg(target_os = "linux")]
1262        {
1263            if let Some(font_entries) = FcScanDirectories() {
1264                for (pattern, path) in font_entries {
1265                    let id = FontId::new();
1266                    cache.patterns.insert(pattern.clone(), id);
1267                    cache.metadata.insert(id, pattern.clone());
1268                    cache.disk_fonts.insert(id, path);
1269                    cache.index_pattern_tokens(&pattern, id);
1270                }
1271            }
1272        }
1273
1274        #[cfg(target_os = "windows")]
1275        {
1276            // Get the Windows system root directory from environment variable
1277            // Falls back to C:\Windows if not found
1278            let system_root = std::env::var("SystemRoot")
1279                .or_else(|_| std::env::var("WINDIR"))
1280                .unwrap_or_else(|_| "C:\\Windows".to_string());
1281            
1282            // Get user profile directory for user-installed fonts
1283            let user_profile = std::env::var("USERPROFILE")
1284                .unwrap_or_else(|_| "C:\\Users\\Default".to_string());
1285            
1286            let font_dirs = vec![
1287                (None, format!("{}\\Fonts\\", system_root)),
1288                (
1289                    None,
1290                    format!("{}\\AppData\\Local\\Microsoft\\Windows\\Fonts\\", user_profile),
1291                ),
1292            ];
1293
1294            let font_entries = FcScanDirectoriesInner(&font_dirs);
1295            for (pattern, path) in font_entries {
1296                let id = FontId::new();
1297                cache.patterns.insert(pattern.clone(), id);
1298                cache.metadata.insert(id, pattern.clone());
1299                cache.disk_fonts.insert(id, path);
1300                cache.index_pattern_tokens(&pattern, id);
1301            }
1302        }
1303
1304        #[cfg(target_os = "macos")]
1305        {
1306            let font_dirs = vec![
1307                (None, "~/Library/Fonts".to_owned()),
1308                (None, "/System/Library/Fonts".to_owned()),
1309                (None, "/Library/Fonts".to_owned()),
1310                // Scan AssetsV2 for dynamic system fonts (PingFang, SF Pro, etc.)
1311                (None, "/System/Library/AssetsV2".to_owned()),
1312            ];
1313
1314            let font_entries = FcScanDirectoriesInner(&font_dirs);
1315            for (pattern, path) in font_entries {
1316                let id = FontId::new();
1317                cache.patterns.insert(pattern.clone(), id);
1318                cache.metadata.insert(id, pattern.clone());
1319                cache.disk_fonts.insert(id, path);
1320                cache.index_pattern_tokens(&pattern, id);
1321            }
1322        }
1323
1324        cache
1325    }
1326
1327    /// Returns the list of fonts and font patterns
1328    pub fn list(&self) -> Vec<(&FcPattern, FontId)> {
1329        self.patterns
1330            .iter()
1331            .map(|(pattern, id)| (pattern, *id))
1332            .collect()
1333    }
1334
1335    /// Queries a font from the in-memory cache, returns the first found font (early return)
1336    pub fn query(&self, pattern: &FcPattern, trace: &mut Vec<TraceMsg>) -> Option<FontMatch> {
1337        let mut matches = Vec::new();
1338
1339        for (stored_pattern, id) in &self.patterns {
1340            if Self::query_matches_internal(stored_pattern, pattern, trace) {
1341                let metadata = self.metadata.get(id).unwrap_or(stored_pattern);
1342                
1343                // Calculate Unicode compatibility score
1344                let unicode_compatibility = if pattern.unicode_ranges.is_empty() {
1345                    // No specific Unicode requirements, use general coverage
1346                    Self::calculate_unicode_coverage(&metadata.unicode_ranges) as i32
1347                } else {
1348                    // Calculate how well this font covers the requested Unicode ranges
1349                    Self::calculate_unicode_compatibility(&pattern.unicode_ranges, &metadata.unicode_ranges)
1350                };
1351                
1352                let style_score = Self::calculate_style_score(pattern, metadata);
1353                matches.push((*id, unicode_compatibility, style_score, metadata.clone()));
1354            }
1355        }
1356
1357        // Sort by Unicode compatibility (highest first), THEN by style score (lowest first)
1358        // This ensures legibility is supreme priority
1359        matches.sort_by(|a, b| {
1360            b.1.cmp(&a.1) // Unicode compatibility (higher is better)
1361                .then_with(|| a.2.cmp(&b.2)) // Style score (lower is better)
1362        });
1363
1364        matches.first().map(|(id, _, _, metadata)| {
1365            FontMatch {
1366                id: *id,
1367                unicode_ranges: metadata.unicode_ranges.clone(),
1368                fallbacks: Vec::new(), // Fallbacks computed lazily via compute_fallbacks()
1369            }
1370        })
1371    }
1372
1373    /// Queries all fonts matching a pattern (internal use only)
1374    /// 
1375    /// Note: This function is now private. Use resolve_font_chain() to build a font fallback chain,
1376    /// then call FontFallbackChain::query_for_text() to resolve fonts for specific text.
1377    fn query_internal(&self, pattern: &FcPattern, trace: &mut Vec<TraceMsg>) -> Vec<FontMatch> {
1378        let mut matches = Vec::new();
1379
1380        for (stored_pattern, id) in &self.patterns {
1381            if Self::query_matches_internal(stored_pattern, pattern, trace) {
1382                let metadata = self.metadata.get(id).unwrap_or(stored_pattern);
1383                
1384                // Calculate Unicode compatibility score
1385                let unicode_compatibility = if pattern.unicode_ranges.is_empty() {
1386                    Self::calculate_unicode_coverage(&metadata.unicode_ranges) as i32
1387                } else {
1388                    Self::calculate_unicode_compatibility(&pattern.unicode_ranges, &metadata.unicode_ranges)
1389                };
1390                
1391                let style_score = Self::calculate_style_score(pattern, metadata);
1392                matches.push((*id, unicode_compatibility, style_score, metadata.clone()));
1393            }
1394        }
1395
1396        // Sort by style score (lowest first), THEN by Unicode compatibility (highest first)
1397        // Style matching (weight, italic, etc.) is now the primary criterion
1398        matches.sort_by(|a, b| {
1399            a.2.cmp(&b.2) // Style score (lower is better)
1400                .then_with(|| b.1.cmp(&a.1)) // Unicode compatibility (higher is better)
1401        });
1402
1403        matches
1404            .into_iter()
1405            .map(|(id, _, _, metadata)| {
1406                FontMatch {
1407                    id,
1408                    unicode_ranges: metadata.unicode_ranges.clone(),
1409                    fallbacks: Vec::new(), // Fallbacks computed lazily via compute_fallbacks()
1410                }
1411            })
1412            .collect()
1413    }
1414
1415    /// Compute fallback fonts for a given font
1416    /// This is a lazy operation that can be expensive - only call when actually needed
1417    /// (e.g., for FFI or debugging, not needed for resolve_char)
1418    pub fn compute_fallbacks(
1419        &self,
1420        font_id: &FontId,
1421        trace: &mut Vec<TraceMsg>,
1422    ) -> Vec<FontMatchNoFallback> {
1423        // Get the pattern for this font
1424        let pattern = match self.metadata.get(font_id) {
1425            Some(p) => p,
1426            None => return Vec::new(),
1427        };
1428        
1429        self.compute_fallbacks_for_pattern(pattern, Some(font_id), trace)
1430    }
1431    
1432    fn compute_fallbacks_for_pattern(
1433        &self,
1434        pattern: &FcPattern,
1435        exclude_id: Option<&FontId>,
1436        _trace: &mut Vec<TraceMsg>,
1437    ) -> Vec<FontMatchNoFallback> {
1438        let mut candidates = Vec::new();
1439
1440        // Collect all potential fallbacks (excluding original pattern)
1441        for (stored_pattern, id) in &self.patterns {
1442            // Skip if this is the original font
1443            if exclude_id.is_some() && exclude_id.unwrap() == id {
1444                continue;
1445            }
1446
1447            // Check if this font supports any of the unicode ranges
1448            if !stored_pattern.unicode_ranges.is_empty() && !pattern.unicode_ranges.is_empty() {
1449                // Calculate Unicode compatibility
1450                let unicode_compatibility = Self::calculate_unicode_compatibility(
1451                    &pattern.unicode_ranges,
1452                    &stored_pattern.unicode_ranges
1453                );
1454                
1455                // Only include if there's actual overlap
1456                if unicode_compatibility > 0 {
1457                    let style_score = Self::calculate_style_score(pattern, stored_pattern);
1458                    candidates.push((
1459                        FontMatchNoFallback {
1460                            id: *id,
1461                            unicode_ranges: stored_pattern.unicode_ranges.clone(),
1462                        },
1463                        unicode_compatibility,
1464                        style_score,
1465                        stored_pattern.clone(),
1466                    ));
1467                }
1468            } else if pattern.unicode_ranges.is_empty() && !stored_pattern.unicode_ranges.is_empty() {
1469                // No specific Unicode requirements, use general coverage
1470                let coverage = Self::calculate_unicode_coverage(&stored_pattern.unicode_ranges) as i32;
1471                let style_score = Self::calculate_style_score(pattern, stored_pattern);
1472                candidates.push((
1473                    FontMatchNoFallback {
1474                        id: *id,
1475                        unicode_ranges: stored_pattern.unicode_ranges.clone(),
1476                    },
1477                    coverage,
1478                    style_score,
1479                    stored_pattern.clone(),
1480                ));
1481            }
1482        }
1483
1484        // Sort by Unicode compatibility (highest first), THEN by style score (lowest first)
1485        candidates.sort_by(|a, b| {
1486            b.1.cmp(&a.1)
1487                .then_with(|| a.2.cmp(&b.2))
1488        });
1489
1490        // Deduplicate by keeping only the best match per unique unicode range
1491        let mut seen_ranges = Vec::new();
1492        let mut deduplicated = Vec::new();
1493
1494        for (id, _, _, pattern) in candidates {
1495            let mut is_new_range = false;
1496
1497            for range in &pattern.unicode_ranges {
1498                if !seen_ranges.iter().any(|r: &UnicodeRange| r.overlaps(range)) {
1499                    seen_ranges.push(*range);
1500                    is_new_range = true;
1501                }
1502            }
1503
1504            if is_new_range {
1505                deduplicated.push(id);
1506            }
1507        }
1508
1509        deduplicated
1510    }
1511
1512    /// Get in-memory font data
1513    pub fn get_memory_font(&self, id: &FontId) -> Option<&FcFont> {
1514        self.memory_fonts.get(id)
1515    }
1516
1517    /// Check if a pattern matches the query, with detailed tracing
1518    fn query_matches_internal(
1519        k: &FcPattern,
1520        pattern: &FcPattern,
1521        trace: &mut Vec<TraceMsg>,
1522    ) -> bool {
1523        // Check name - substring match
1524        if let Some(ref name) = pattern.name {
1525            let matches = k
1526                .name
1527                .as_ref()
1528                .map_or(false, |k_name| k_name.contains(name));
1529
1530            if !matches {
1531                trace.push(TraceMsg {
1532                    level: TraceLevel::Info,
1533                    path: k
1534                        .name
1535                        .as_ref()
1536                        .map_or_else(|| "<unknown>".to_string(), Clone::clone),
1537                    reason: MatchReason::NameMismatch {
1538                        requested: pattern.name.clone(),
1539                        found: k.name.clone(),
1540                    },
1541                });
1542                return false;
1543            }
1544        }
1545
1546        // Check family - substring match
1547        if let Some(ref family) = pattern.family {
1548            let matches = k
1549                .family
1550                .as_ref()
1551                .map_or(false, |k_family| k_family.contains(family));
1552
1553            if !matches {
1554                trace.push(TraceMsg {
1555                    level: TraceLevel::Info,
1556                    path: k
1557                        .name
1558                        .as_ref()
1559                        .map_or_else(|| "<unknown>".to_string(), Clone::clone),
1560                    reason: MatchReason::FamilyMismatch {
1561                        requested: pattern.family.clone(),
1562                        found: k.family.clone(),
1563                    },
1564                });
1565                return false;
1566            }
1567        }
1568
1569        // Check style properties
1570        let style_properties = [
1571            (
1572                "italic",
1573                pattern.italic.needs_to_match(),
1574                pattern.italic.matches(&k.italic),
1575            ),
1576            (
1577                "oblique",
1578                pattern.oblique.needs_to_match(),
1579                pattern.oblique.matches(&k.oblique),
1580            ),
1581            (
1582                "bold",
1583                pattern.bold.needs_to_match(),
1584                pattern.bold.matches(&k.bold),
1585            ),
1586            (
1587                "monospace",
1588                pattern.monospace.needs_to_match(),
1589                pattern.monospace.matches(&k.monospace),
1590            ),
1591            (
1592                "condensed",
1593                pattern.condensed.needs_to_match(),
1594                pattern.condensed.matches(&k.condensed),
1595            ),
1596        ];
1597
1598        for (property_name, needs_to_match, matches) in style_properties {
1599            if needs_to_match && !matches {
1600                let (requested, found) = match property_name {
1601                    "italic" => (format!("{:?}", pattern.italic), format!("{:?}", k.italic)),
1602                    "oblique" => (format!("{:?}", pattern.oblique), format!("{:?}", k.oblique)),
1603                    "bold" => (format!("{:?}", pattern.bold), format!("{:?}", k.bold)),
1604                    "monospace" => (
1605                        format!("{:?}", pattern.monospace),
1606                        format!("{:?}", k.monospace),
1607                    ),
1608                    "condensed" => (
1609                        format!("{:?}", pattern.condensed),
1610                        format!("{:?}", k.condensed),
1611                    ),
1612                    _ => (String::new(), String::new()),
1613                };
1614
1615                trace.push(TraceMsg {
1616                    level: TraceLevel::Info,
1617                    path: k
1618                        .name
1619                        .as_ref()
1620                        .map_or_else(|| "<unknown>".to_string(), |s| s.clone()),
1621                    reason: MatchReason::StyleMismatch {
1622                        property: property_name,
1623                        requested,
1624                        found,
1625                    },
1626                });
1627                return false;
1628            }
1629        }
1630
1631        // Check weight - hard filter if non-normal weight is requested
1632        if pattern.weight != FcWeight::Normal && pattern.weight != k.weight {
1633            trace.push(TraceMsg {
1634                level: TraceLevel::Info,
1635                path: k
1636                    .name
1637                    .as_ref()
1638                    .map_or_else(|| "<unknown>".to_string(), |s| s.clone()),
1639                reason: MatchReason::WeightMismatch {
1640                    requested: pattern.weight,
1641                    found: k.weight,
1642                },
1643            });
1644            return false;
1645        }
1646
1647        // Check stretch - hard filter if non-normal stretch is requested
1648        if pattern.stretch != FcStretch::Normal && pattern.stretch != k.stretch {
1649            trace.push(TraceMsg {
1650                level: TraceLevel::Info,
1651                path: k
1652                    .name
1653                    .as_ref()
1654                    .map_or_else(|| "<unknown>".to_string(), |s| s.clone()),
1655                reason: MatchReason::StretchMismatch {
1656                    requested: pattern.stretch,
1657                    found: k.stretch,
1658                },
1659            });
1660            return false;
1661        }
1662
1663        // Check unicode ranges if specified
1664        if !pattern.unicode_ranges.is_empty() {
1665            let mut has_overlap = false;
1666
1667            for p_range in &pattern.unicode_ranges {
1668                for k_range in &k.unicode_ranges {
1669                    if p_range.overlaps(k_range) {
1670                        has_overlap = true;
1671                        break;
1672                    }
1673                }
1674                if has_overlap {
1675                    break;
1676                }
1677            }
1678
1679            if !has_overlap {
1680                trace.push(TraceMsg {
1681                    level: TraceLevel::Info,
1682                    path: k
1683                        .name
1684                        .as_ref()
1685                        .map_or_else(|| "<unknown>".to_string(), |s| s.clone()),
1686                    reason: MatchReason::UnicodeRangeMismatch {
1687                        character: '\0', // No specific character to report
1688                        ranges: k.unicode_ranges.clone(),
1689                    },
1690                });
1691                return false;
1692            }
1693        }
1694
1695        true
1696    }
1697    
1698    /// Resolve a complete font fallback chain for a CSS font-family stack
1699    /// This is the main entry point for font resolution with caching
1700    /// Automatically expands generic CSS families (serif, sans-serif, monospace) to OS-specific fonts
1701    /// 
1702    /// # Arguments
1703    /// * `font_families` - CSS font-family stack (e.g., ["Arial", "sans-serif"])
1704    /// * `text` - The text to render (used to extract Unicode ranges)
1705    /// * `weight` - Font weight
1706    /// * `italic` - Italic style requirement
1707    /// * `oblique` - Oblique style requirement
1708    /// * `trace` - Debug trace messages
1709    /// 
1710    /// # Returns
1711    /// A complete font fallback chain with CSS fallbacks and Unicode fallbacks
1712    /// 
1713    /// # Example
1714    /// ```no_run
1715    /// # use rust_fontconfig::{FcFontCache, FcWeight, PatternMatch};
1716    /// let cache = FcFontCache::build();
1717    /// let families = vec!["Arial".to_string(), "sans-serif".to_string()];
1718    /// let chain = cache.resolve_font_chain(&families, FcWeight::Normal, 
1719    ///                                       PatternMatch::DontCare, PatternMatch::DontCare, 
1720    ///                                       &mut Vec::new());
1721    /// // On macOS: families expanded to ["Arial", "San Francisco", "Helvetica Neue", "Lucida Grande"]
1722    /// ```
1723    #[cfg(feature = "std")]
1724    pub fn resolve_font_chain(
1725        &self,
1726        font_families: &[String],
1727        weight: FcWeight,
1728        italic: PatternMatch,
1729        oblique: PatternMatch,
1730        trace: &mut Vec<TraceMsg>,
1731    ) -> FontFallbackChain {
1732        self.resolve_font_chain_with_os(font_families, weight, italic, oblique, trace, OperatingSystem::current())
1733    }
1734    
1735    /// Resolve font chain with explicit OS specification (useful for testing)
1736    #[cfg(feature = "std")]
1737    pub fn resolve_font_chain_with_os(
1738        &self,
1739        font_families: &[String],
1740        weight: FcWeight,
1741        italic: PatternMatch,
1742        oblique: PatternMatch,
1743        trace: &mut Vec<TraceMsg>,
1744        os: OperatingSystem,
1745    ) -> FontFallbackChain {
1746        // Check cache FIRST - key uses original (unexpanded) families
1747        // This ensures all text nodes with same CSS properties share one chain
1748        let cache_key = FontChainCacheKey {
1749            font_families: font_families.to_vec(),  // Use ORIGINAL families, not expanded
1750            weight,
1751            italic,
1752            oblique,
1753        };
1754        
1755        if let Ok(cache) = self.chain_cache.lock() {
1756            if let Some(cached) = cache.get(&cache_key) {
1757                return cached.clone();
1758            }
1759        }
1760        
1761        // Expand generic CSS families to OS-specific fonts (no unicode ranges needed anymore)
1762        let expanded_families = expand_font_families(font_families, os, &[]);
1763        
1764        // Build the chain
1765        let chain = self.resolve_font_chain_uncached(
1766            &expanded_families,
1767            weight,
1768            italic,
1769            oblique,
1770            trace,
1771        );
1772        
1773        // Cache the result
1774        if let Ok(mut cache) = self.chain_cache.lock() {
1775            cache.insert(cache_key, chain.clone());
1776        }
1777        
1778        chain
1779    }
1780    
1781    /// Internal implementation without caching
1782    /// 
1783    /// Note: This function no longer takes text/unicode_ranges as input.
1784    /// Instead, the returned FontFallbackChain has a query_for_text() method
1785    /// that can be called to resolve which fonts to use for specific text.
1786    #[cfg(feature = "std")]
1787    fn resolve_font_chain_uncached(
1788        &self,
1789        font_families: &[String],
1790        weight: FcWeight,
1791        italic: PatternMatch,
1792        oblique: PatternMatch,
1793        trace: &mut Vec<TraceMsg>,
1794    ) -> FontFallbackChain {
1795        let mut css_fallbacks = Vec::new();
1796        
1797        // Resolve each CSS font-family to its system fallbacks
1798        for (_i, family) in font_families.iter().enumerate() {
1799            // Check if this is a generic font family
1800            let (pattern, is_generic) = if Self::is_generic_family(family) {
1801                // For generic families, don't filter by name, use font properties instead
1802                let pattern = match family.as_str() {
1803                    "sans-serif" => FcPattern {
1804                        name: None,
1805                        weight,
1806                        italic,
1807                        oblique,
1808                        monospace: PatternMatch::False,
1809                        unicode_ranges: Vec::new(),
1810                        ..Default::default()
1811                    },
1812                    "serif" => FcPattern {
1813                        name: None,
1814                        weight,
1815                        italic,
1816                        oblique,
1817                        monospace: PatternMatch::False,
1818                        unicode_ranges: Vec::new(),
1819                        ..Default::default()
1820                    },
1821                    "monospace" => FcPattern {
1822                        name: None,
1823                        weight,
1824                        italic,
1825                        oblique,
1826                        monospace: PatternMatch::True,
1827                        unicode_ranges: Vec::new(),
1828                        ..Default::default()
1829                    },
1830                    _ => FcPattern {
1831                        name: None,
1832                        weight,
1833                        italic,
1834                        oblique,
1835                        unicode_ranges: Vec::new(),
1836                        ..Default::default()
1837                    },
1838                };
1839                (pattern, true)
1840            } else {
1841                // Specific font family name
1842                let pattern = FcPattern {
1843                    name: Some(family.clone()),
1844                    weight,
1845                    italic,
1846                    oblique,
1847                    unicode_ranges: Vec::new(),
1848                    ..Default::default()
1849                };
1850                (pattern, false)
1851            };
1852            
1853            // Use fuzzy matching for specific fonts (fast token-based lookup)
1854            // For generic families, use query (slower but necessary for property matching)
1855            let mut matches = if is_generic {
1856                // Generic families need full pattern matching
1857                self.query_internal(&pattern, trace)
1858            } else {
1859                // Specific font names: use fast token-based fuzzy matching
1860                self.fuzzy_query_by_name(family, weight, italic, oblique, &[], trace)
1861            };
1862            
1863            // For generic families, limit to top 5 fonts to avoid too many matches
1864            if is_generic && matches.len() > 5 {
1865                matches.truncate(5);
1866            }
1867            
1868            // Always add the CSS fallback group to preserve CSS ordering
1869            // even if no fonts were found for this family
1870            css_fallbacks.push(CssFallbackGroup {
1871                css_name: family.clone(),
1872                fonts: matches,
1873            });
1874        }
1875        
1876        // Unicode fallbacks are now resolved lazily in query_for_text()
1877        // This avoids the expensive unicode coverage check during chain building
1878        FontFallbackChain {
1879            css_fallbacks,
1880            unicode_fallbacks: Vec::new(), // Will be populated on-demand
1881            original_stack: font_families.to_vec(),
1882        }
1883    }
1884    
1885    /// Extract Unicode ranges from text
1886    #[allow(dead_code)]
1887    fn extract_unicode_ranges(text: &str) -> Vec<UnicodeRange> {
1888        let mut chars: Vec<char> = text.chars().collect();
1889        chars.sort_unstable();
1890        chars.dedup();
1891        
1892        if chars.is_empty() {
1893            return Vec::new();
1894        }
1895        
1896        let mut ranges = Vec::new();
1897        let mut range_start = chars[0] as u32;
1898        let mut range_end = range_start;
1899        
1900        for &c in &chars[1..] {
1901            let codepoint = c as u32;
1902            if codepoint == range_end + 1 {
1903                range_end = codepoint;
1904            } else {
1905                ranges.push(UnicodeRange { start: range_start, end: range_end });
1906                range_start = codepoint;
1907                range_end = codepoint;
1908            }
1909        }
1910        
1911        ranges.push(UnicodeRange { start: range_start, end: range_end });
1912        ranges
1913    }
1914    
1915    /// Check if a font family name is a generic CSS family
1916    #[cfg(feature = "std")]
1917    fn is_generic_family(family: &str) -> bool {
1918        matches!(
1919            family.to_lowercase().as_str(),
1920            "serif" | "sans-serif" | "monospace" | "cursive" | "fantasy" | "system-ui"
1921        )
1922    }
1923    
1924    /// Fuzzy query for fonts by name when exact match fails
1925    /// Uses intelligent token-based matching with inverted index for speed:
1926    /// 1. Break name into tokens (e.g., "NotoSansJP" -> ["noto", "sans", "jp"])
1927    /// 2. Use token_index to find candidate fonts via BTreeSet intersection
1928    /// 3. Score only the candidate fonts (instead of all 800+ patterns)
1929    /// 4. Prioritize fonts matching more tokens + Unicode coverage
1930    #[cfg(feature = "std")]
1931    fn fuzzy_query_by_name(
1932        &self,
1933        requested_name: &str,
1934        weight: FcWeight,
1935        italic: PatternMatch,
1936        oblique: PatternMatch,
1937        unicode_ranges: &[UnicodeRange],
1938        _trace: &mut Vec<TraceMsg>,
1939    ) -> Vec<FontMatch> {
1940        // Extract tokens from the requested name (e.g., "NotoSansJP" -> ["noto", "sans", "jp"])
1941        let tokens = Self::extract_font_name_tokens(requested_name);
1942        
1943        if tokens.is_empty() {
1944            return Vec::new();
1945        }
1946        
1947        // Convert tokens to lowercase for case-insensitive lookup
1948        let tokens_lower: Vec<String> = tokens.iter().map(|t| t.to_lowercase()).collect();
1949        
1950        // Progressive token matching strategy:
1951        // Start with first token, then progressively narrow down with each additional token
1952        // If adding a token results in 0 matches, use the previous (broader) set
1953        // Example: ["Noto"] -> 10 fonts, ["Noto","Sans"] -> 2 fonts, ["Noto","Sans","JP"] -> 0 fonts => use 2 fonts
1954        
1955        // Start with the first token
1956        let first_token = &tokens_lower[0];
1957        let mut candidate_ids = match self.token_index.get(first_token) {
1958            Some(ids) if !ids.is_empty() => ids.clone(),
1959            _ => {
1960                // First token not found - no fonts match, quit immediately
1961                return Vec::new();
1962            }
1963        };
1964        
1965        // Progressively narrow down with each additional token
1966        for token in &tokens_lower[1..] {
1967            if let Some(token_ids) = self.token_index.get(token) {
1968                // Calculate intersection
1969                let intersection: alloc::collections::BTreeSet<FontId> = 
1970                    candidate_ids.intersection(token_ids).copied().collect();
1971                
1972                if intersection.is_empty() {
1973                    // Adding this token results in 0 matches - keep previous set and stop
1974                    break;
1975                } else {
1976                    // Successfully narrowed down - use intersection
1977                    candidate_ids = intersection;
1978                }
1979            } else {
1980                // Token not in index - keep current set and stop
1981                break;
1982            }
1983        }
1984        
1985        // Now score only the candidate fonts (HUGE speedup!)
1986        let mut candidates = Vec::new();
1987        
1988        for id in candidate_ids {
1989            let pattern = match self.metadata.get(&id) {
1990                Some(p) => p,
1991                None => continue,
1992            };
1993            
1994            // Get pre-tokenized font name (already lowercase)
1995            let font_tokens_lower = match self.font_tokens.get(&id) {
1996                Some(tokens) => tokens,
1997                None => continue,
1998            };
1999            
2000            if font_tokens_lower.is_empty() {
2001                continue;
2002            }
2003            
2004            // Calculate token match score (how many requested tokens appear in font name)
2005            // Both tokens_lower and font_tokens_lower are already lowercase, so direct comparison
2006            let token_matches = tokens_lower.iter()
2007                .filter(|req_token| {
2008                    font_tokens_lower.iter().any(|font_token| {
2009                        // Both already lowercase - just check if font token contains request token
2010                        font_token.contains(req_token.as_str())
2011                    })
2012                })
2013                .count();
2014            
2015            // Skip if no tokens match (shouldn't happen due to index, but safety check)
2016            if token_matches == 0 {
2017                continue;
2018            }
2019            
2020            // Calculate token similarity score (0-100)
2021            let token_similarity = (token_matches * 100 / tokens.len()) as i32;
2022            
2023            // Calculate Unicode range similarity
2024            let unicode_similarity = if !unicode_ranges.is_empty() && !pattern.unicode_ranges.is_empty() {
2025                Self::calculate_unicode_compatibility(unicode_ranges, &pattern.unicode_ranges)
2026            } else {
2027                0
2028            };
2029            
2030            // CRITICAL: If we have Unicode requirements, ONLY accept fonts that cover them
2031            // A font with great name match but no Unicode coverage is useless
2032            if !unicode_ranges.is_empty() && unicode_similarity == 0 {
2033                continue;
2034            }
2035            
2036            let style_score = Self::calculate_style_score(&FcPattern {
2037                weight,
2038                italic,
2039                oblique,
2040                ..Default::default()
2041            }, pattern);
2042            
2043            candidates.push((
2044                id,
2045                token_similarity,
2046                unicode_similarity,
2047                style_score,
2048                pattern.clone(),
2049            ));
2050        }
2051        
2052        // Sort by:
2053        // 1. Token matches (more matches = better)
2054        // 2. Unicode compatibility (if ranges provided)
2055        // 3. Style score (lower is better)
2056        candidates.sort_by(|a, b| {
2057            if !unicode_ranges.is_empty() {
2058                // When we have Unicode requirements, prioritize coverage
2059                b.1.cmp(&a.1) // Token similarity (higher is better) - PRIMARY
2060                    .then_with(|| b.2.cmp(&a.2)) // Unicode similarity (higher is better) - SECONDARY
2061                    .then_with(|| a.3.cmp(&b.3)) // Style score (lower is better) - TERTIARY
2062            } else {
2063                // No Unicode requirements, token similarity is primary
2064                b.1.cmp(&a.1) // Token similarity (higher is better)
2065                    .then_with(|| a.3.cmp(&b.3)) // Style score (lower is better)
2066            }
2067        });
2068        
2069        // Take top 5 matches
2070        candidates.truncate(5);
2071        
2072        // Convert to FontMatch
2073        candidates
2074            .into_iter()
2075            .map(|(id, _token_sim, _unicode_sim, _style, pattern)| {
2076                FontMatch {
2077                    id,
2078                    unicode_ranges: pattern.unicode_ranges.clone(),
2079                    fallbacks: Vec::new(), // Fallbacks computed lazily via compute_fallbacks()
2080                }
2081            })
2082            .collect()
2083    }
2084    
2085    /// Extract tokens from a font name
2086    /// E.g., "NotoSansJP" -> ["Noto", "Sans", "JP"]
2087    /// E.g., "Noto Sans CJK JP" -> ["Noto", "Sans", "CJK", "JP"]
2088    fn extract_font_name_tokens(name: &str) -> Vec<String> {
2089        let mut tokens = Vec::new();
2090        let mut current_token = String::new();
2091        let mut last_was_lower = false;
2092        
2093        for c in name.chars() {
2094            if c.is_whitespace() || c == '-' || c == '_' {
2095                // Word separator
2096                if !current_token.is_empty() {
2097                    tokens.push(current_token.clone());
2098                    current_token.clear();
2099                }
2100                last_was_lower = false;
2101            } else if c.is_uppercase() && last_was_lower && !current_token.is_empty() {
2102                // CamelCase boundary (e.g., "Noto" | "Sans")
2103                tokens.push(current_token.clone());
2104                current_token.clear();
2105                current_token.push(c);
2106                last_was_lower = false;
2107            } else {
2108                current_token.push(c);
2109                last_was_lower = c.is_lowercase();
2110            }
2111        }
2112        
2113        if !current_token.is_empty() {
2114            tokens.push(current_token);
2115        }
2116        
2117        tokens
2118    }
2119    
2120    /// Normalize font name for comparison (remove spaces, lowercase, keep only ASCII alphanumeric)
2121    /// This ensures we only compare Latin-script names, ignoring localized names
2122    #[allow(dead_code)]
2123    fn normalize_font_name(name: &str) -> String {
2124        name.chars()
2125            .filter(|c| c.is_ascii_alphanumeric())
2126            .map(|c| c.to_ascii_lowercase())
2127            .collect()
2128    }
2129    
2130    /// Calculate Levenshtein distance between two strings
2131    #[allow(dead_code)]
2132    fn levenshtein_distance(s1: &str, s2: &str) -> usize {
2133        let len1 = s1.chars().count();
2134        let len2 = s2.chars().count();
2135        
2136        if len1 == 0 {
2137            return len2;
2138        }
2139        if len2 == 0 {
2140            return len1;
2141        }
2142        
2143        let mut prev_row: Vec<usize> = (0..=len2).collect();
2144        let mut curr_row = vec![0; len2 + 1];
2145        
2146        for (i, c1) in s1.chars().enumerate() {
2147            curr_row[0] = i + 1;
2148            
2149            for (j, c2) in s2.chars().enumerate() {
2150                let cost = if c1 == c2 { 0 } else { 1 };
2151                curr_row[j + 1] = (curr_row[j] + 1)
2152                    .min(prev_row[j + 1] + 1)
2153                    .min(prev_row[j] + cost);
2154            }
2155            
2156            core::mem::swap(&mut prev_row, &mut curr_row);
2157        }
2158        
2159        prev_row[len2]
2160    }
2161    
2162    /// Find fonts to cover missing Unicode ranges
2163    /// Uses intelligent matching: prefers fonts with similar names to existing ones
2164    /// Early quits once all Unicode ranges are covered for performance
2165    #[allow(dead_code)]
2166    fn find_unicode_fallbacks(
2167        &self,
2168        unicode_ranges: &[UnicodeRange],
2169        covered_chars: &[bool],
2170        existing_groups: &[CssFallbackGroup],
2171        weight: FcWeight,
2172        italic: PatternMatch,
2173        oblique: PatternMatch,
2174        trace: &mut Vec<TraceMsg>,
2175    ) -> Vec<FontMatch> {
2176        // Extract uncovered ranges
2177        let mut uncovered_ranges = Vec::new();
2178        for (i, &covered) in covered_chars.iter().enumerate() {
2179            if !covered && i < unicode_ranges.len() {
2180                uncovered_ranges.push(unicode_ranges[i].clone());
2181            }
2182        }
2183        
2184        if uncovered_ranges.is_empty() {
2185            return Vec::new();
2186        }
2187        
2188        // Query for fonts that cover these ranges
2189        let pattern = FcPattern {
2190            name: None, // Wildcard - match any font
2191            weight,
2192            italic,
2193            oblique,
2194            unicode_ranges: uncovered_ranges.clone(),
2195            ..Default::default()
2196        };
2197        
2198        let mut candidates = self.query_internal(&pattern, trace);
2199        
2200        // Intelligent sorting: prefer fonts with similar names to existing ones
2201        // Extract font family prefixes from existing fonts (e.g., "Noto Sans" from "Noto Sans JP")
2202        let existing_prefixes: Vec<String> = existing_groups
2203            .iter()
2204            .flat_map(|group| {
2205                group.fonts.iter().filter_map(|font| {
2206                    self.get_metadata_by_id(&font.id)
2207                        .and_then(|meta| meta.family.clone())
2208                        .and_then(|family| {
2209                            // Extract prefix (e.g., "Noto Sans" from "Noto Sans JP")
2210                            family.split_whitespace()
2211                                .take(2)
2212                                .collect::<Vec<_>>()
2213                                .join(" ")
2214                                .into()
2215                        })
2216                })
2217            })
2218            .collect();
2219        
2220        // Sort candidates by:
2221        // 1. Name similarity to existing fonts (highest priority)
2222        // 2. Unicode coverage (secondary)
2223        candidates.sort_by(|a, b| {
2224            let a_meta = self.get_metadata_by_id(&a.id);
2225            let b_meta = self.get_metadata_by_id(&b.id);
2226            
2227            let a_score = Self::calculate_font_similarity_score(a_meta, &existing_prefixes);
2228            let b_score = Self::calculate_font_similarity_score(b_meta, &existing_prefixes);
2229            
2230            b_score.cmp(&a_score) // Higher score = better match
2231                .then_with(|| {
2232                    let a_coverage = Self::calculate_unicode_compatibility(&uncovered_ranges, &a.unicode_ranges);
2233                    let b_coverage = Self::calculate_unicode_compatibility(&uncovered_ranges, &b.unicode_ranges);
2234                    b_coverage.cmp(&a_coverage)
2235                })
2236        });
2237        
2238        // Early quit optimization: only take fonts until all ranges are covered
2239        let mut result = Vec::new();
2240        let mut remaining_uncovered: Vec<bool> = vec![true; uncovered_ranges.len()];
2241        
2242        for candidate in candidates {
2243            // Check which ranges this font covers
2244            let mut covers_new_range = false;
2245            
2246            for (i, range) in uncovered_ranges.iter().enumerate() {
2247                if remaining_uncovered[i] {
2248                    // Check if this font covers this range
2249                    for font_range in &candidate.unicode_ranges {
2250                        if font_range.overlaps(range) {
2251                            remaining_uncovered[i] = false;
2252                            covers_new_range = true;
2253                            break;
2254                        }
2255                    }
2256                }
2257            }
2258            
2259            // Only add fonts that cover at least one new range
2260            if covers_new_range {
2261                result.push(candidate);
2262                
2263                // Early quit: if all ranges are covered, stop
2264                if remaining_uncovered.iter().all(|&uncovered| !uncovered) {
2265                    break;
2266                }
2267            }
2268        }
2269        
2270        result
2271    }
2272    
2273    /// Calculate similarity score between a font and existing font prefixes
2274    /// Higher score = more similar
2275    #[allow(dead_code)]
2276    fn calculate_font_similarity_score(
2277        font_meta: Option<&FcPattern>,
2278        existing_prefixes: &[String],
2279    ) -> i32 {
2280        let Some(meta) = font_meta else { return 0; };
2281        let Some(family) = &meta.family else { return 0; };
2282        
2283        // Check if this font's family matches any existing prefix
2284        for prefix in existing_prefixes {
2285            if family.starts_with(prefix) {
2286                return 100; // Strong match
2287            }
2288            if family.contains(prefix) {
2289                return 50; // Partial match
2290            }
2291        }
2292        
2293        0 // No match
2294    }
2295    
2296    /// Find fallback fonts for a given pattern
2297    // Helper to calculate total unicode coverage
2298    fn calculate_unicode_coverage(ranges: &[UnicodeRange]) -> u64 {
2299        ranges
2300            .iter()
2301            .map(|range| (range.end - range.start + 1) as u64)
2302            .sum()
2303    }
2304
2305    /// Calculate how well a font's Unicode ranges cover the requested ranges
2306    /// Returns a compatibility score (higher is better, 0 means no overlap)
2307    fn calculate_unicode_compatibility(
2308        requested: &[UnicodeRange],
2309        available: &[UnicodeRange],
2310    ) -> i32 {
2311        if requested.is_empty() {
2312            // No specific requirements, return total coverage
2313            return Self::calculate_unicode_coverage(available) as i32;
2314        }
2315        
2316        let mut total_coverage = 0u32;
2317        
2318        for req_range in requested {
2319            for avail_range in available {
2320                // Calculate overlap between requested and available ranges
2321                let overlap_start = req_range.start.max(avail_range.start);
2322                let overlap_end = req_range.end.min(avail_range.end);
2323                
2324                if overlap_start <= overlap_end {
2325                    // There is overlap
2326                    let overlap_size = overlap_end - overlap_start + 1;
2327                    total_coverage += overlap_size;
2328                }
2329            }
2330        }
2331        
2332        total_coverage as i32
2333    }
2334
2335    fn calculate_style_score(original: &FcPattern, candidate: &FcPattern) -> i32 {
2336
2337        let mut score = 0_i32;
2338
2339        // Weight calculation with special handling for bold property
2340        if (original.bold == PatternMatch::True && candidate.weight == FcWeight::Bold)
2341            || (original.bold == PatternMatch::False && candidate.weight != FcWeight::Bold)
2342        {
2343            // No weight penalty when bold is requested and font has Bold weight
2344            // No weight penalty when non-bold is requested and font has non-Bold weight
2345        } else {
2346            // Apply normal weight difference penalty
2347            let weight_diff = (original.weight as i32 - candidate.weight as i32).abs();
2348            score += weight_diff as i32;
2349        }
2350
2351        // Stretch calculation with special handling for condensed property
2352        if (original.condensed == PatternMatch::True && candidate.stretch.is_condensed())
2353            || (original.condensed == PatternMatch::False && !candidate.stretch.is_condensed())
2354        {
2355            // No stretch penalty when condensed is requested and font has condensed stretch
2356            // No stretch penalty when non-condensed is requested and font has non-condensed stretch
2357        } else {
2358            // Apply normal stretch difference penalty
2359            let stretch_diff = (original.stretch as i32 - candidate.stretch as i32).abs();
2360            score += (stretch_diff * 100) as i32;
2361        }
2362
2363        // Handle style properties with standard penalties and bonuses
2364        let style_props = [
2365            (original.italic, candidate.italic, 300, 150),
2366            (original.oblique, candidate.oblique, 200, 100),
2367            (original.bold, candidate.bold, 300, 150),
2368            (original.monospace, candidate.monospace, 100, 50),
2369            (original.condensed, candidate.condensed, 100, 50),
2370        ];
2371
2372        for (orig, cand, mismatch_penalty, dontcare_penalty) in style_props {
2373            if orig.needs_to_match() {
2374                if !orig.matches(&cand) {
2375                    if cand == PatternMatch::DontCare {
2376                        score += dontcare_penalty;
2377                    } else {
2378                        score += mismatch_penalty;
2379                    }
2380                } else if orig == PatternMatch::True && cand == PatternMatch::True {
2381                    // Give bonus for exact True match to solve the test case
2382                    score -= 20;
2383                }
2384            }
2385        }
2386
2387        score
2388    }
2389}
2390
2391#[cfg(all(feature = "std", feature = "parsing", target_os = "linux"))]
2392fn FcScanDirectories() -> Option<Vec<(FcPattern, FcFontPath)>> {
2393    use std::fs;
2394    use std::path::Path;
2395
2396    const BASE_FONTCONFIG_PATH: &str = "/etc/fonts/fonts.conf";
2397
2398    if !Path::new(BASE_FONTCONFIG_PATH).exists() {
2399        return None;
2400    }
2401
2402    let mut font_paths = Vec::with_capacity(32);
2403    let mut paths_to_visit = vec![(None, PathBuf::from(BASE_FONTCONFIG_PATH))];
2404
2405    while let Some((prefix, path_to_visit)) = paths_to_visit.pop() {
2406        let path = match process_path(&prefix, path_to_visit, true) {
2407            Some(path) => path,
2408            None => continue,
2409        };
2410
2411        let metadata = match fs::metadata(&path) {
2412            Ok(metadata) => metadata,
2413            Err(_) => continue,
2414        };
2415
2416        if metadata.is_file() {
2417            let xml_utf8 = match fs::read_to_string(&path) {
2418                Ok(xml_utf8) => xml_utf8,
2419                Err(_) => continue,
2420            };
2421
2422            if ParseFontsConf(&xml_utf8, &mut paths_to_visit, &mut font_paths).is_none() {
2423                continue;
2424            }
2425        } else if metadata.is_dir() {
2426            let dir_entries = match fs::read_dir(&path) {
2427                Ok(dir_entries) => dir_entries,
2428                Err(_) => continue,
2429            };
2430
2431            for entry_result in dir_entries {
2432                let entry = match entry_result {
2433                    Ok(entry) => entry,
2434                    Err(_) => continue,
2435                };
2436
2437                let entry_path = entry.path();
2438
2439                // `fs::metadata` traverses symbolic links
2440                let entry_metadata = match fs::metadata(&entry_path) {
2441                    Ok(metadata) => metadata,
2442                    Err(_) => continue,
2443                };
2444
2445                if !entry_metadata.is_file() {
2446                    continue;
2447                }
2448
2449                let file_name = match entry_path.file_name() {
2450                    Some(name) => name,
2451                    None => continue,
2452                };
2453
2454                let file_name_str = file_name.to_string_lossy();
2455                if file_name_str.starts_with(|c: char| c.is_ascii_digit())
2456                    && file_name_str.ends_with(".conf")
2457                {
2458                    paths_to_visit.push((None, entry_path));
2459                }
2460            }
2461        }
2462    }
2463
2464    if font_paths.is_empty() {
2465        return None;
2466    }
2467
2468    Some(FcScanDirectoriesInner(&font_paths))
2469}
2470
2471// Parses the fonts.conf file
2472#[cfg(all(feature = "std", feature = "parsing", target_os = "linux"))]
2473fn ParseFontsConf(
2474    input: &str,
2475    paths_to_visit: &mut Vec<(Option<String>, PathBuf)>,
2476    font_paths: &mut Vec<(Option<String>, String)>,
2477) -> Option<()> {
2478    use xmlparser::Token::*;
2479    use xmlparser::Tokenizer;
2480
2481    const TAG_INCLUDE: &str = "include";
2482    const TAG_DIR: &str = "dir";
2483    const ATTRIBUTE_PREFIX: &str = "prefix";
2484
2485    let mut current_prefix: Option<&str> = None;
2486    let mut current_path: Option<&str> = None;
2487    let mut is_in_include = false;
2488    let mut is_in_dir = false;
2489
2490    for token_result in Tokenizer::from(input) {
2491        let token = match token_result {
2492            Ok(token) => token,
2493            Err(_) => return None,
2494        };
2495
2496        match token {
2497            ElementStart { local, .. } => {
2498                if is_in_include || is_in_dir {
2499                    return None; /* error: nested tags */
2500                }
2501
2502                match local.as_str() {
2503                    TAG_INCLUDE => {
2504                        is_in_include = true;
2505                    }
2506                    TAG_DIR => {
2507                        is_in_dir = true;
2508                    }
2509                    _ => continue,
2510                }
2511
2512                current_path = None;
2513            }
2514            Text { text, .. } => {
2515                let text = text.as_str().trim();
2516                if text.is_empty() {
2517                    continue;
2518                }
2519                if is_in_include || is_in_dir {
2520                    current_path = Some(text);
2521                }
2522            }
2523            Attribute { local, value, .. } => {
2524                if !is_in_include && !is_in_dir {
2525                    continue;
2526                }
2527                // attribute on <include> or <dir> node
2528                if local.as_str() == ATTRIBUTE_PREFIX {
2529                    current_prefix = Some(value.as_str());
2530                }
2531            }
2532            ElementEnd { end, .. } => {
2533                let end_tag = match end {
2534                    xmlparser::ElementEnd::Close(_, a) => a,
2535                    _ => continue,
2536                };
2537
2538                match end_tag.as_str() {
2539                    TAG_INCLUDE => {
2540                        if !is_in_include {
2541                            continue;
2542                        }
2543
2544                        if let Some(current_path) = current_path.as_ref() {
2545                            paths_to_visit.push((
2546                                current_prefix.map(ToOwned::to_owned),
2547                                PathBuf::from(*current_path),
2548                            ));
2549                        }
2550                    }
2551                    TAG_DIR => {
2552                        if !is_in_dir {
2553                            continue;
2554                        }
2555
2556                        if let Some(current_path) = current_path.as_ref() {
2557                            font_paths.push((
2558                                current_prefix.map(ToOwned::to_owned),
2559                                (*current_path).to_owned(),
2560                            ));
2561                        }
2562                    }
2563                    _ => continue,
2564                }
2565
2566                is_in_include = false;
2567                is_in_dir = false;
2568                current_path = None;
2569                current_prefix = None;
2570            }
2571            _ => {}
2572        }
2573    }
2574
2575    Some(())
2576}
2577
2578// Remaining implementation for font scanning, parsing, etc.
2579#[cfg(all(feature = "std", feature = "parsing"))]
2580fn FcParseFont(filepath: &PathBuf) -> Option<Vec<(FcPattern, FcFontPath)>> {
2581    use allsorts::{
2582        binary::read::ReadScope,
2583        font_data::FontData,
2584        get_name::fontcode_get_name,
2585        post::PostTable,
2586        tables::{
2587            os2::Os2, FontTableProvider, HeadTable, HheaTable, HmtxTable, MaxpTable, NameTable,
2588        },
2589        tag,
2590    };
2591    #[cfg(all(not(target_family = "wasm"), feature = "std"))]
2592    use mmapio::MmapOptions;
2593    use std::collections::BTreeSet;
2594    use std::fs::File;
2595
2596    const FONT_SPECIFIER_NAME_ID: u16 = 4;
2597    const FONT_SPECIFIER_FAMILY_ID: u16 = 1;
2598
2599    // Try parsing the font file and see if the postscript name matches
2600    let file = File::open(filepath).ok()?;
2601
2602    #[cfg(all(not(target_family = "wasm"), feature = "std"))]
2603    let font_bytes = unsafe { MmapOptions::new().map(&file).ok()? };
2604
2605    #[cfg(not(all(not(target_family = "wasm"), feature = "std")))]
2606    let font_bytes = std::fs::read(filepath).ok()?;
2607
2608    let max_fonts = if font_bytes.len() >= 12 && &font_bytes[0..4] == b"ttcf" {
2609        // Read numFonts from TTC header (offset 8, 4 bytes)
2610        let num_fonts =
2611            u32::from_be_bytes([font_bytes[8], font_bytes[9], font_bytes[10], font_bytes[11]]);
2612        // Cap at a reasonable maximum as a safety measure
2613        std::cmp::min(num_fonts as usize, 100)
2614    } else {
2615        // Not a collection, just one font
2616        1
2617    };
2618
2619    let scope = ReadScope::new(&font_bytes[..]);
2620    let font_file = scope.read::<FontData<'_>>().ok()?;
2621
2622    // Handle collections properly by iterating through all fonts
2623    let mut results = Vec::new();
2624
2625    for font_index in 0..max_fonts {
2626        let provider = font_file.table_provider(font_index).ok()?;
2627        let head_data = provider.table_data(tag::HEAD).ok()??.into_owned();
2628        let head_table = ReadScope::new(&head_data).read::<HeadTable>().ok()?;
2629
2630        let is_bold = head_table.is_bold();
2631        let is_italic = head_table.is_italic();
2632        let mut detected_monospace = None;
2633
2634        let post_data = provider.table_data(tag::POST).ok()??;
2635        if let Ok(post_table) = ReadScope::new(&post_data).read::<PostTable>() {
2636            // isFixedPitch here - https://learn.microsoft.com/en-us/typography/opentype/spec/post#header
2637            detected_monospace = Some(post_table.header.is_fixed_pitch != 0);
2638        }
2639
2640        // Get font properties from OS/2 table
2641        let os2_data = provider.table_data(tag::OS_2).ok()??;
2642        let os2_table = ReadScope::new(&os2_data)
2643            .read_dep::<Os2>(os2_data.len())
2644            .ok()?;
2645
2646        // Extract additional style information
2647        let is_oblique = os2_table
2648            .fs_selection
2649            .contains(allsorts::tables::os2::FsSelection::OBLIQUE);
2650        let weight = FcWeight::from_u16(os2_table.us_weight_class);
2651        let stretch = FcStretch::from_u16(os2_table.us_width_class);
2652
2653        // Extract unicode ranges from OS/2 table (fast, but may be inaccurate)
2654        // These are hints about what the font *should* support
2655        // For actual glyph coverage verification, query the font file directly
2656        let mut unicode_ranges = Vec::new();
2657
2658        // Process the 4 Unicode range bitfields from OS/2 table
2659        let ranges = [
2660            os2_table.ul_unicode_range1,
2661            os2_table.ul_unicode_range2,
2662            os2_table.ul_unicode_range3,
2663            os2_table.ul_unicode_range4,
2664        ];
2665
2666        // Unicode range bit positions to actual ranges
2667        // Based on OpenType spec: https://learn.microsoft.com/en-us/typography/opentype/spec/os2#ur
2668        let range_mappings = [
2669            // ulUnicodeRange1 (bits 0-31)
2670            (0, 0x0000, 0x007F), // Basic Latin
2671            (1, 0x0080, 0x00FF), // Latin-1 Supplement
2672            (2, 0x0100, 0x017F), // Latin Extended-A
2673            (3, 0x0180, 0x024F), // Latin Extended-B
2674            (4, 0x0250, 0x02AF), // IPA Extensions
2675            (5, 0x02B0, 0x02FF), // Spacing Modifier Letters
2676            (6, 0x0300, 0x036F), // Combining Diacritical Marks
2677            (7, 0x0370, 0x03FF), // Greek and Coptic
2678            (8, 0x2C80, 0x2CFF), // Coptic
2679            (9, 0x0400, 0x04FF), // Cyrillic
2680            (10, 0x0530, 0x058F), // Armenian
2681            (11, 0x0590, 0x05FF), // Hebrew
2682            (12, 0x0600, 0x06FF), // Arabic
2683            (13, 0x0700, 0x074F), // Syriac
2684            (14, 0x0780, 0x07BF), // Thaana
2685            (15, 0x0900, 0x097F), // Devanagari
2686            (16, 0x0980, 0x09FF), // Bengali
2687            (17, 0x0A00, 0x0A7F), // Gurmukhi
2688            (18, 0x0A80, 0x0AFF), // Gujarati
2689            (19, 0x0B00, 0x0B7F), // Oriya
2690            (20, 0x0B80, 0x0BFF), // Tamil
2691            (21, 0x0C00, 0x0C7F), // Telugu
2692            (22, 0x0C80, 0x0CFF), // Kannada
2693            (23, 0x0D00, 0x0D7F), // Malayalam
2694            (24, 0x0E00, 0x0E7F), // Thai
2695            (25, 0x0E80, 0x0EFF), // Lao
2696            (26, 0x10A0, 0x10FF), // Georgian
2697            (27, 0x1B00, 0x1B7F), // Balinese
2698            (28, 0x1100, 0x11FF), // Hangul Jamo
2699            (29, 0x1E00, 0x1EFF), // Latin Extended Additional
2700            (30, 0x1F00, 0x1FFF), // Greek Extended
2701            (31, 0x2000, 0x206F), // General Punctuation
2702            
2703            // ulUnicodeRange2 (bits 32-63)
2704            (32, 0x2070, 0x209F), // Superscripts And Subscripts
2705            (33, 0x20A0, 0x20CF), // Currency Symbols
2706            (34, 0x20D0, 0x20FF), // Combining Diacritical Marks For Symbols
2707            (35, 0x2100, 0x214F), // Letterlike Symbols
2708            (36, 0x2150, 0x218F), // Number Forms
2709            (37, 0x2190, 0x21FF), // Arrows
2710            (38, 0x2200, 0x22FF), // Mathematical Operators
2711            (39, 0x2300, 0x23FF), // Miscellaneous Technical
2712            (40, 0x2400, 0x243F), // Control Pictures
2713            (41, 0x2440, 0x245F), // Optical Character Recognition
2714            (42, 0x2460, 0x24FF), // Enclosed Alphanumerics
2715            (43, 0x2500, 0x257F), // Box Drawing
2716            (44, 0x2580, 0x259F), // Block Elements
2717            (45, 0x25A0, 0x25FF), // Geometric Shapes
2718            (46, 0x2600, 0x26FF), // Miscellaneous Symbols
2719            (47, 0x2700, 0x27BF), // Dingbats
2720            (48, 0x3000, 0x303F), // CJK Symbols And Punctuation
2721            (49, 0x3040, 0x309F), // Hiragana
2722            (50, 0x30A0, 0x30FF), // Katakana
2723            (51, 0x3100, 0x312F), // Bopomofo
2724            (52, 0x3130, 0x318F), // Hangul Compatibility Jamo
2725            (53, 0x3190, 0x319F), // Kanbun
2726            (54, 0x31A0, 0x31BF), // Bopomofo Extended
2727            (55, 0x31C0, 0x31EF), // CJK Strokes
2728            (56, 0x31F0, 0x31FF), // Katakana Phonetic Extensions
2729            (57, 0x3200, 0x32FF), // Enclosed CJK Letters And Months
2730            (58, 0x3300, 0x33FF), // CJK Compatibility
2731            (59, 0x4E00, 0x9FFF), // CJK Unified Ideographs
2732            (60, 0xA000, 0xA48F), // Yi Syllables
2733            (61, 0xA490, 0xA4CF), // Yi Radicals
2734            (62, 0xAC00, 0xD7AF), // Hangul Syllables
2735            (63, 0xD800, 0xDFFF), // Non-Plane 0 (note: surrogates, not directly usable)
2736            
2737            // ulUnicodeRange3 (bits 64-95)
2738            (64, 0x10000, 0x10FFFF), // Phoenician and other non-BMP (bit 64 indicates non-BMP support)
2739            (65, 0xF900, 0xFAFF), // CJK Compatibility Ideographs
2740            (66, 0xFB00, 0xFB4F), // Alphabetic Presentation Forms
2741            (67, 0xFB50, 0xFDFF), // Arabic Presentation Forms-A
2742            (68, 0xFE00, 0xFE0F), // Variation Selectors
2743            (69, 0xFE10, 0xFE1F), // Vertical Forms
2744            (70, 0xFE20, 0xFE2F), // Combining Half Marks
2745            (71, 0xFE30, 0xFE4F), // CJK Compatibility Forms
2746            (72, 0xFE50, 0xFE6F), // Small Form Variants
2747            (73, 0xFE70, 0xFEFF), // Arabic Presentation Forms-B
2748            (74, 0xFF00, 0xFFEF), // Halfwidth And Fullwidth Forms
2749            (75, 0xFFF0, 0xFFFF), // Specials
2750            (76, 0x0F00, 0x0FFF), // Tibetan
2751            (77, 0x0700, 0x074F), // Syriac
2752            (78, 0x0780, 0x07BF), // Thaana
2753            (79, 0x0D80, 0x0DFF), // Sinhala
2754            (80, 0x1000, 0x109F), // Myanmar
2755            (81, 0x1200, 0x137F), // Ethiopic
2756            (82, 0x13A0, 0x13FF), // Cherokee
2757            (83, 0x1400, 0x167F), // Unified Canadian Aboriginal Syllabics
2758            (84, 0x1680, 0x169F), // Ogham
2759            (85, 0x16A0, 0x16FF), // Runic
2760            (86, 0x1780, 0x17FF), // Khmer
2761            (87, 0x1800, 0x18AF), // Mongolian
2762            (88, 0x2800, 0x28FF), // Braille Patterns
2763            (89, 0xA000, 0xA48F), // Yi Syllables
2764            (90, 0x1680, 0x169F), // Ogham
2765            (91, 0x16A0, 0x16FF), // Runic
2766            (92, 0x1700, 0x171F), // Tagalog
2767            (93, 0x1720, 0x173F), // Hanunoo
2768            (94, 0x1740, 0x175F), // Buhid
2769            (95, 0x1760, 0x177F), // Tagbanwa
2770            
2771            // ulUnicodeRange4 (bits 96-127)
2772            (96, 0x1900, 0x194F), // Limbu
2773            (97, 0x1950, 0x197F), // Tai Le
2774            (98, 0x1980, 0x19DF), // New Tai Lue
2775            (99, 0x1A00, 0x1A1F), // Buginese
2776            (100, 0x2C00, 0x2C5F), // Glagolitic
2777            (101, 0x2D30, 0x2D7F), // Tifinagh
2778            (102, 0x4DC0, 0x4DFF), // Yijing Hexagram Symbols
2779            (103, 0xA800, 0xA82F), // Syloti Nagri
2780            (104, 0x10000, 0x1007F), // Linear B Syllabary
2781            (105, 0x10080, 0x100FF), // Linear B Ideograms
2782            (106, 0x10100, 0x1013F), // Aegean Numbers
2783            (107, 0x10140, 0x1018F), // Ancient Greek Numbers
2784            (108, 0x10300, 0x1032F), // Old Italic
2785            (109, 0x10330, 0x1034F), // Gothic
2786            (110, 0x10380, 0x1039F), // Ugaritic
2787            (111, 0x103A0, 0x103DF), // Old Persian
2788            (112, 0x10400, 0x1044F), // Deseret
2789            (113, 0x10450, 0x1047F), // Shavian
2790            (114, 0x10480, 0x104AF), // Osmanya
2791            (115, 0x10800, 0x1083F), // Cypriot Syllabary
2792            (116, 0x10A00, 0x10A5F), // Kharoshthi
2793            (117, 0x1D000, 0x1D0FF), // Byzantine Musical Symbols
2794            (118, 0x1D100, 0x1D1FF), // Musical Symbols
2795            (119, 0x1D200, 0x1D24F), // Ancient Greek Musical Notation
2796            (120, 0x1D300, 0x1D35F), // Tai Xuan Jing Symbols
2797            (121, 0x1D400, 0x1D7FF), // Mathematical Alphanumeric Symbols
2798            (122, 0x1F000, 0x1F02F), // Mahjong Tiles
2799            (123, 0x1F030, 0x1F09F), // Domino Tiles
2800            (124, 0x1F300, 0x1F9FF), // Miscellaneous Symbols And Pictographs (Emoji)
2801            (125, 0x1F680, 0x1F6FF), // Transport And Map Symbols
2802            (126, 0x1F700, 0x1F77F), // Alchemical Symbols
2803            (127, 0x1F900, 0x1F9FF), // Supplemental Symbols and Pictographs
2804        ];
2805
2806        for (range_idx, bit_pos, start, end) in range_mappings.iter().map(|&(bit, start, end)| {
2807            let range_idx = bit / 32;
2808            let bit_pos = bit % 32;
2809            (range_idx, bit_pos, start, end)
2810        }) {
2811            if range_idx < 4 && (ranges[range_idx] & (1 << bit_pos)) != 0 {
2812                unicode_ranges.push(UnicodeRange { start, end });
2813            }
2814        }
2815        
2816        // Verify OS/2 reported ranges against actual CMAP support
2817        // OS/2 ulUnicodeRange bits can be unreliable - fonts may claim support
2818        // for ranges they don't actually have glyphs for
2819        unicode_ranges = verify_unicode_ranges_with_cmap(&provider, unicode_ranges);
2820        
2821        // If still empty (OS/2 had no ranges or all were invalid), do full CMAP analysis
2822        if unicode_ranges.is_empty() {
2823            if let Some(cmap_ranges) = analyze_cmap_coverage(&provider) {
2824                unicode_ranges = cmap_ranges;
2825            }
2826        }
2827
2828        // If no monospace detection yet, check using hmtx
2829        if detected_monospace.is_none() {
2830            // Try using PANOSE classification
2831            if os2_table.panose[0] == 2 {
2832                // 2 = Latin Text
2833                detected_monospace = Some(os2_table.panose[3] == 9); // 9 = Monospaced
2834            } else {
2835                let hhea_data = provider.table_data(tag::HHEA).ok()??;
2836                let hhea_table = ReadScope::new(&hhea_data).read::<HheaTable>().ok()?;
2837                let maxp_data = provider.table_data(tag::MAXP).ok()??;
2838                let maxp_table = ReadScope::new(&maxp_data).read::<MaxpTable>().ok()?;
2839                let hmtx_data = provider.table_data(tag::HMTX).ok()??;
2840                let hmtx_table = ReadScope::new(&hmtx_data)
2841                    .read_dep::<HmtxTable<'_>>((
2842                        usize::from(maxp_table.num_glyphs),
2843                        usize::from(hhea_table.num_h_metrics),
2844                    ))
2845                    .ok()?;
2846
2847                let mut monospace = true;
2848                let mut last_advance = 0;
2849                for i in 0..hhea_table.num_h_metrics as usize {
2850                    let advance = hmtx_table.h_metrics.read_item(i).ok()?.advance_width;
2851                    if i > 0 && advance != last_advance {
2852                        monospace = false;
2853                        break;
2854                    }
2855                    last_advance = advance;
2856                }
2857
2858                detected_monospace = Some(monospace);
2859            }
2860        }
2861
2862        let is_monospace = detected_monospace.unwrap_or(false);
2863
2864        let name_data = provider.table_data(tag::NAME).ok()??.into_owned();
2865        let name_table = ReadScope::new(&name_data).read::<NameTable>().ok()?;
2866
2867        // One font can support multiple patterns
2868        let mut f_family = None;
2869
2870        let patterns = name_table
2871            .name_records
2872            .iter()
2873            .filter_map(|name_record| {
2874                let name_id = name_record.name_id;
2875                if name_id == FONT_SPECIFIER_FAMILY_ID {
2876                    let family = fontcode_get_name(&name_data, FONT_SPECIFIER_FAMILY_ID).ok()??;
2877                    f_family = Some(family);
2878                    None
2879                } else if name_id == FONT_SPECIFIER_NAME_ID {
2880                    let family = f_family.as_ref()?;
2881                    let name = fontcode_get_name(&name_data, FONT_SPECIFIER_NAME_ID).ok()??;
2882                    if name.to_bytes().is_empty() {
2883                        None
2884                    } else {
2885                        // Initialize metadata structure
2886                        let mut metadata = FcFontMetadata::default();
2887
2888                        const NAME_ID_COPYRIGHT: u16 = 0;
2889                        const NAME_ID_FAMILY: u16 = 1;
2890                        const NAME_ID_SUBFAMILY: u16 = 2;
2891                        const NAME_ID_UNIQUE_ID: u16 = 3;
2892                        const NAME_ID_FULL_NAME: u16 = 4;
2893                        const NAME_ID_VERSION: u16 = 5;
2894                        const NAME_ID_POSTSCRIPT_NAME: u16 = 6;
2895                        const NAME_ID_TRADEMARK: u16 = 7;
2896                        const NAME_ID_MANUFACTURER: u16 = 8;
2897                        const NAME_ID_DESIGNER: u16 = 9;
2898                        const NAME_ID_DESCRIPTION: u16 = 10;
2899                        const NAME_ID_VENDOR_URL: u16 = 11;
2900                        const NAME_ID_DESIGNER_URL: u16 = 12;
2901                        const NAME_ID_LICENSE: u16 = 13;
2902                        const NAME_ID_LICENSE_URL: u16 = 14;
2903                        const NAME_ID_PREFERRED_FAMILY: u16 = 16;
2904                        const NAME_ID_PREFERRED_SUBFAMILY: u16 = 17;
2905
2906                        // Extract metadata from name table
2907                        metadata.copyright = get_name_string(&name_data, NAME_ID_COPYRIGHT);
2908                        metadata.font_family = get_name_string(&name_data, NAME_ID_FAMILY);
2909                        metadata.font_subfamily = get_name_string(&name_data, NAME_ID_SUBFAMILY);
2910                        metadata.full_name = get_name_string(&name_data, NAME_ID_FULL_NAME);
2911                        metadata.unique_id = get_name_string(&name_data, NAME_ID_UNIQUE_ID);
2912                        metadata.version = get_name_string(&name_data, NAME_ID_VERSION);
2913                        metadata.postscript_name =
2914                            get_name_string(&name_data, NAME_ID_POSTSCRIPT_NAME);
2915                        metadata.trademark = get_name_string(&name_data, NAME_ID_TRADEMARK);
2916                        metadata.manufacturer = get_name_string(&name_data, NAME_ID_MANUFACTURER);
2917                        metadata.designer = get_name_string(&name_data, NAME_ID_DESIGNER);
2918                        metadata.id_description = get_name_string(&name_data, NAME_ID_DESCRIPTION);
2919                        metadata.designer_url = get_name_string(&name_data, NAME_ID_DESIGNER_URL);
2920                        metadata.manufacturer_url = get_name_string(&name_data, NAME_ID_VENDOR_URL);
2921                        metadata.license = get_name_string(&name_data, NAME_ID_LICENSE);
2922                        metadata.license_url = get_name_string(&name_data, NAME_ID_LICENSE_URL);
2923                        metadata.preferred_family =
2924                            get_name_string(&name_data, NAME_ID_PREFERRED_FAMILY);
2925                        metadata.preferred_subfamily =
2926                            get_name_string(&name_data, NAME_ID_PREFERRED_SUBFAMILY);
2927
2928                        let mut name = String::from_utf8_lossy(name.to_bytes()).to_string();
2929                        let mut family = String::from_utf8_lossy(family.as_bytes()).to_string();
2930                        if name.starts_with(".") {
2931                            name = name[1..].to_string();
2932                        }
2933                        if family.starts_with(".") {
2934                            family = family[1..].to_string();
2935                        }
2936                        Some((
2937                            FcPattern {
2938                                name: Some(name),
2939                                family: Some(family),
2940                                bold: if is_bold {
2941                                    PatternMatch::True
2942                                } else {
2943                                    PatternMatch::False
2944                                },
2945                                italic: if is_italic {
2946                                    PatternMatch::True
2947                                } else {
2948                                    PatternMatch::False
2949                                },
2950                                oblique: if is_oblique {
2951                                    PatternMatch::True
2952                                } else {
2953                                    PatternMatch::False
2954                                },
2955                                monospace: if is_monospace {
2956                                    PatternMatch::True
2957                                } else {
2958                                    PatternMatch::False
2959                                },
2960                                condensed: if stretch <= FcStretch::Condensed {
2961                                    PatternMatch::True
2962                                } else {
2963                                    PatternMatch::False
2964                                },
2965                                weight,
2966                                stretch,
2967                                unicode_ranges: unicode_ranges.clone(),
2968                                metadata,
2969                            },
2970                            font_index,
2971                        ))
2972                    }
2973                } else {
2974                    None
2975                }
2976            })
2977            .collect::<BTreeSet<_>>();
2978
2979        results.extend(patterns.into_iter().map(|(pat, index)| {
2980            (
2981                pat,
2982                FcFontPath {
2983                    path: filepath.to_string_lossy().to_string(),
2984                    font_index: index,
2985                },
2986            )
2987        }));
2988    }
2989
2990    if results.is_empty() {
2991        None
2992    } else {
2993        Some(results)
2994    }
2995}
2996
2997#[cfg(all(feature = "std", feature = "parsing"))]
2998fn FcScanDirectoriesInner(paths: &[(Option<String>, String)]) -> Vec<(FcPattern, FcFontPath)> {
2999    #[cfg(feature = "multithreading")]
3000    {
3001        use rayon::prelude::*;
3002
3003        // scan directories in parallel
3004        paths
3005            .par_iter()
3006            .filter_map(|(prefix, p)| {
3007                if let Some(path) = process_path(prefix, PathBuf::from(p), false) {
3008                    Some(FcScanSingleDirectoryRecursive(path))
3009                } else {
3010                    None
3011                }
3012            })
3013            .flatten()
3014            .collect()
3015    }
3016    #[cfg(not(feature = "multithreading"))]
3017    {
3018        paths
3019            .iter()
3020            .filter_map(|(prefix, p)| {
3021                if let Some(path) = process_path(prefix, PathBuf::from(p), false) {
3022                    Some(FcScanSingleDirectoryRecursive(path))
3023                } else {
3024                    None
3025                }
3026            })
3027            .flatten()
3028            .collect()
3029    }
3030}
3031
3032#[cfg(all(feature = "std", feature = "parsing"))]
3033fn FcScanSingleDirectoryRecursive(dir: PathBuf) -> Vec<(FcPattern, FcFontPath)> {
3034    let mut files_to_parse = Vec::new();
3035    let mut dirs_to_parse = vec![dir];
3036
3037    'outer: loop {
3038        let mut new_dirs_to_parse = Vec::new();
3039
3040        'inner: for dir in dirs_to_parse.clone() {
3041            let dir = match std::fs::read_dir(dir) {
3042                Ok(o) => o,
3043                Err(_) => continue 'inner,
3044            };
3045
3046            for (path, pathbuf) in dir.filter_map(|entry| {
3047                let entry = entry.ok()?;
3048                let path = entry.path();
3049                let pathbuf = path.to_path_buf();
3050                Some((path, pathbuf))
3051            }) {
3052                if path.is_dir() {
3053                    new_dirs_to_parse.push(pathbuf);
3054                } else {
3055                    files_to_parse.push(pathbuf);
3056                }
3057            }
3058        }
3059
3060        if new_dirs_to_parse.is_empty() {
3061            break 'outer;
3062        } else {
3063            dirs_to_parse = new_dirs_to_parse;
3064        }
3065    }
3066
3067    FcParseFontFiles(&files_to_parse)
3068}
3069
3070#[cfg(all(feature = "std", feature = "parsing"))]
3071fn FcParseFontFiles(files_to_parse: &[PathBuf]) -> Vec<(FcPattern, FcFontPath)> {
3072    let result = {
3073        #[cfg(feature = "multithreading")]
3074        {
3075            use rayon::prelude::*;
3076
3077            files_to_parse
3078                .par_iter()
3079                .filter_map(|file| FcParseFont(file))
3080                .collect::<Vec<Vec<_>>>()
3081        }
3082        #[cfg(not(feature = "multithreading"))]
3083        {
3084            files_to_parse
3085                .iter()
3086                .filter_map(|file| FcParseFont(file))
3087                .collect::<Vec<Vec<_>>>()
3088        }
3089    };
3090
3091    result.into_iter().flat_map(|f| f.into_iter()).collect()
3092}
3093
3094#[cfg(all(feature = "std", feature = "parsing"))]
3095/// Takes a path & prefix and resolves them to a usable path, or `None` if they're unsupported/unavailable.
3096///
3097/// Behaviour is based on: https://www.freedesktop.org/software/fontconfig/fontconfig-user.html
3098fn process_path(
3099    prefix: &Option<String>,
3100    mut path: PathBuf,
3101    is_include_path: bool,
3102) -> Option<PathBuf> {
3103    use std::env::var;
3104
3105    const HOME_SHORTCUT: &str = "~";
3106    const CWD_PATH: &str = ".";
3107
3108    const HOME_ENV_VAR: &str = "HOME";
3109    const XDG_CONFIG_HOME_ENV_VAR: &str = "XDG_CONFIG_HOME";
3110    const XDG_CONFIG_HOME_DEFAULT_PATH_SUFFIX: &str = ".config";
3111    const XDG_DATA_HOME_ENV_VAR: &str = "XDG_DATA_HOME";
3112    const XDG_DATA_HOME_DEFAULT_PATH_SUFFIX: &str = ".local/share";
3113
3114    const PREFIX_CWD: &str = "cwd";
3115    const PREFIX_DEFAULT: &str = "default";
3116    const PREFIX_XDG: &str = "xdg";
3117
3118    // These three could, in theory, be cached, but the work required to do so outweighs the minor benefits
3119    fn get_home_value() -> Option<PathBuf> {
3120        var(HOME_ENV_VAR).ok().map(PathBuf::from)
3121    }
3122    fn get_xdg_config_home_value() -> Option<PathBuf> {
3123        var(XDG_CONFIG_HOME_ENV_VAR)
3124            .ok()
3125            .map(PathBuf::from)
3126            .or_else(|| {
3127                get_home_value()
3128                    .map(|home_path| home_path.join(XDG_CONFIG_HOME_DEFAULT_PATH_SUFFIX))
3129            })
3130    }
3131    fn get_xdg_data_home_value() -> Option<PathBuf> {
3132        var(XDG_DATA_HOME_ENV_VAR)
3133            .ok()
3134            .map(PathBuf::from)
3135            .or_else(|| {
3136                get_home_value().map(|home_path| home_path.join(XDG_DATA_HOME_DEFAULT_PATH_SUFFIX))
3137            })
3138    }
3139
3140    // Resolve the tilde character in the path, if present
3141    if path.starts_with(HOME_SHORTCUT) {
3142        if let Some(home_path) = get_home_value() {
3143            path = home_path.join(
3144                path.strip_prefix(HOME_SHORTCUT)
3145                    .expect("already checked that it starts with the prefix"),
3146            );
3147        } else {
3148            return None;
3149        }
3150    }
3151
3152    // Resolve prefix values
3153    match prefix {
3154        Some(prefix) => match prefix.as_str() {
3155            PREFIX_CWD | PREFIX_DEFAULT => {
3156                let mut new_path = PathBuf::from(CWD_PATH);
3157                new_path.push(path);
3158
3159                Some(new_path)
3160            }
3161            PREFIX_XDG => {
3162                if is_include_path {
3163                    get_xdg_config_home_value()
3164                        .map(|xdg_config_home_path| xdg_config_home_path.join(path))
3165                } else {
3166                    get_xdg_data_home_value()
3167                        .map(|xdg_data_home_path| xdg_data_home_path.join(path))
3168                }
3169            }
3170            _ => None, // Unsupported prefix
3171        },
3172        None => Some(path),
3173    }
3174}
3175
3176// Helper function to extract a string from the name table
3177#[cfg(all(feature = "std", feature = "parsing"))]
3178fn get_name_string(name_data: &[u8], name_id: u16) -> Option<String> {
3179    fontcode_get_name(name_data, name_id)
3180        .ok()
3181        .flatten()
3182        .map(|name| String::from_utf8_lossy(name.to_bytes()).to_string())
3183}
3184
3185/// Representative test codepoints for each Unicode block.
3186/// These are carefully chosen to be actual script characters (not punctuation/symbols)
3187/// that a font claiming to support this script should definitely have.
3188#[cfg(all(feature = "std", feature = "parsing"))]
3189fn get_verification_codepoints(start: u32, end: u32) -> Vec<u32> {
3190    match start {
3191        // Basic Latin - test uppercase, lowercase, and digits
3192        0x0000 => vec!['A' as u32, 'M' as u32, 'Z' as u32, 'a' as u32, 'm' as u32, 'z' as u32],
3193        // Latin-1 Supplement - common accented letters
3194        0x0080 => vec![0x00C0, 0x00C9, 0x00D1, 0x00E0, 0x00E9, 0x00F1], // À É Ñ à é ñ
3195        // Latin Extended-A
3196        0x0100 => vec![0x0100, 0x0110, 0x0141, 0x0152, 0x0160], // Ā Đ Ł Œ Š
3197        // Latin Extended-B
3198        0x0180 => vec![0x0180, 0x01A0, 0x01B0, 0x01CD], // ƀ Ơ ư Ǎ
3199        // IPA Extensions
3200        0x0250 => vec![0x0250, 0x0259, 0x026A, 0x0279], // ɐ ə ɪ ɹ
3201        // Greek and Coptic
3202        0x0370 => vec![0x0391, 0x0392, 0x0393, 0x03B1, 0x03B2, 0x03C9], // Α Β Γ α β ω
3203        // Cyrillic
3204        0x0400 => vec![0x0410, 0x0411, 0x0412, 0x0430, 0x0431, 0x042F], // А Б В а б Я
3205        // Armenian
3206        0x0530 => vec![0x0531, 0x0532, 0x0533, 0x0561, 0x0562], // Ա Բ Գ ա բ
3207        // Hebrew
3208        0x0590 => vec![0x05D0, 0x05D1, 0x05D2, 0x05E9, 0x05EA], // א ב ג ש ת
3209        // Arabic
3210        0x0600 => vec![0x0627, 0x0628, 0x062A, 0x062C, 0x0645], // ا ب ت ج م
3211        // Syriac
3212        0x0700 => vec![0x0710, 0x0712, 0x0713, 0x0715], // ܐ ܒ ܓ ܕ
3213        // Devanagari
3214        0x0900 => vec![0x0905, 0x0906, 0x0915, 0x0916, 0x0939], // अ आ क ख ह
3215        // Bengali
3216        0x0980 => vec![0x0985, 0x0986, 0x0995, 0x0996], // অ আ ক খ
3217        // Gurmukhi
3218        0x0A00 => vec![0x0A05, 0x0A06, 0x0A15, 0x0A16], // ਅ ਆ ਕ ਖ
3219        // Gujarati
3220        0x0A80 => vec![0x0A85, 0x0A86, 0x0A95, 0x0A96], // અ આ ક ખ
3221        // Oriya
3222        0x0B00 => vec![0x0B05, 0x0B06, 0x0B15, 0x0B16], // ଅ ଆ କ ଖ
3223        // Tamil
3224        0x0B80 => vec![0x0B85, 0x0B86, 0x0B95, 0x0BA4], // அ ஆ க த
3225        // Telugu
3226        0x0C00 => vec![0x0C05, 0x0C06, 0x0C15, 0x0C16], // అ ఆ క ఖ
3227        // Kannada
3228        0x0C80 => vec![0x0C85, 0x0C86, 0x0C95, 0x0C96], // ಅ ಆ ಕ ಖ
3229        // Malayalam
3230        0x0D00 => vec![0x0D05, 0x0D06, 0x0D15, 0x0D16], // അ ആ ക ഖ
3231        // Thai
3232        0x0E00 => vec![0x0E01, 0x0E02, 0x0E04, 0x0E07, 0x0E40], // ก ข ค ง เ
3233        // Lao
3234        0x0E80 => vec![0x0E81, 0x0E82, 0x0E84, 0x0E87], // ກ ຂ ຄ ງ
3235        // Myanmar
3236        0x1000 => vec![0x1000, 0x1001, 0x1002, 0x1010, 0x1019], // က ခ ဂ တ မ
3237        // Georgian
3238        0x10A0 => vec![0x10D0, 0x10D1, 0x10D2, 0x10D3], // ა ბ გ დ
3239        // Hangul Jamo
3240        0x1100 => vec![0x1100, 0x1102, 0x1103, 0x1161, 0x1162], // ᄀ ᄂ ᄃ ᅡ ᅢ
3241        // Ethiopic
3242        0x1200 => vec![0x1200, 0x1208, 0x1210, 0x1218], // ሀ ለ ሐ መ
3243        // Cherokee
3244        0x13A0 => vec![0x13A0, 0x13A1, 0x13A2, 0x13A3], // Ꭰ Ꭱ Ꭲ Ꭳ
3245        // Khmer
3246        0x1780 => vec![0x1780, 0x1781, 0x1782, 0x1783], // ក ខ គ ឃ
3247        // Mongolian
3248        0x1800 => vec![0x1820, 0x1821, 0x1822, 0x1823], // ᠠ ᠡ ᠢ ᠣ
3249        // Hiragana
3250        0x3040 => vec![0x3042, 0x3044, 0x3046, 0x304B, 0x304D, 0x3093], // あ い う か き ん
3251        // Katakana
3252        0x30A0 => vec![0x30A2, 0x30A4, 0x30A6, 0x30AB, 0x30AD, 0x30F3], // ア イ ウ カ キ ン
3253        // Bopomofo
3254        0x3100 => vec![0x3105, 0x3106, 0x3107, 0x3108], // ㄅ ㄆ ㄇ ㄈ
3255        // CJK Unified Ideographs - common characters
3256        0x4E00 => vec![0x4E00, 0x4E2D, 0x4EBA, 0x5927, 0x65E5, 0x6708], // 一 中 人 大 日 月
3257        // Hangul Syllables
3258        0xAC00 => vec![0xAC00, 0xAC01, 0xAC04, 0xB098, 0xB2E4], // 가 각 간 나 다
3259        // CJK Compatibility Ideographs
3260        0xF900 => vec![0xF900, 0xF901, 0xF902], // 豈 更 車
3261        // Arabic Presentation Forms-A
3262        0xFB50 => vec![0xFB50, 0xFB51, 0xFB52, 0xFB56], // ﭐ ﭑ ﭒ ﭖ
3263        // Arabic Presentation Forms-B
3264        0xFE70 => vec![0xFE70, 0xFE72, 0xFE74, 0xFE76], // ﹰ ﹲ ﹴ ﹶ
3265        // Halfwidth and Fullwidth Forms
3266        0xFF00 => vec![0xFF01, 0xFF21, 0xFF41, 0xFF61], // ! A a 。
3267        // Default: sample at regular intervals
3268        _ => {
3269            let range_size = end - start;
3270            if range_size > 20 {
3271                vec![
3272                    start + range_size / 5,
3273                    start + 2 * range_size / 5,
3274                    start + 3 * range_size / 5,
3275                    start + 4 * range_size / 5,
3276                ]
3277            } else {
3278                vec![start, start + range_size / 2]
3279            }
3280        }
3281    }
3282}
3283
3284/// Verify OS/2 reported Unicode ranges against actual CMAP support.
3285/// Returns only ranges that are actually supported by the font's CMAP table.
3286#[cfg(all(feature = "std", feature = "parsing"))]
3287fn verify_unicode_ranges_with_cmap(
3288    provider: &impl FontTableProvider, 
3289    os2_ranges: Vec<UnicodeRange>
3290) -> Vec<UnicodeRange> {
3291    use allsorts::tables::cmap::{Cmap, CmapSubtable, PlatformId, EncodingId};
3292    
3293    if os2_ranges.is_empty() {
3294        return Vec::new();
3295    }
3296    
3297    // Try to get CMAP subtable
3298    let cmap_data = match provider.table_data(tag::CMAP) {
3299        Ok(Some(data)) => data,
3300        _ => return os2_ranges, // Can't verify, trust OS/2
3301    };
3302    
3303    let cmap = match ReadScope::new(&cmap_data).read::<Cmap<'_>>() {
3304        Ok(c) => c,
3305        Err(_) => return os2_ranges,
3306    };
3307    
3308    // Find the best Unicode subtable
3309    let encoding_record = cmap.find_subtable(PlatformId::UNICODE, EncodingId(3))
3310        .or_else(|| cmap.find_subtable(PlatformId::UNICODE, EncodingId(4)))
3311        .or_else(|| cmap.find_subtable(PlatformId::WINDOWS, EncodingId(1)))
3312        .or_else(|| cmap.find_subtable(PlatformId::WINDOWS, EncodingId(10)))
3313        .or_else(|| cmap.find_subtable(PlatformId::UNICODE, EncodingId(0)))
3314        .or_else(|| cmap.find_subtable(PlatformId::UNICODE, EncodingId(1)));
3315    
3316    let encoding_record = match encoding_record {
3317        Some(r) => r,
3318        None => return os2_ranges, // No suitable subtable, trust OS/2
3319    };
3320    
3321    let cmap_subtable = match ReadScope::new(&cmap_data)
3322        .offset(encoding_record.offset as usize)
3323        .read::<CmapSubtable<'_>>() 
3324    {
3325        Ok(st) => st,
3326        Err(_) => return os2_ranges,
3327    };
3328    
3329    // Verify each range
3330    let mut verified_ranges = Vec::new();
3331    
3332    for range in os2_ranges {
3333        let test_codepoints = get_verification_codepoints(range.start, range.end);
3334        
3335        // Require at least 50% of test codepoints to have valid glyphs
3336        // This is stricter than before to avoid false positives
3337        let required_hits = (test_codepoints.len() + 1) / 2; // ceil(len/2)
3338        let mut hits = 0;
3339        
3340        for cp in test_codepoints {
3341            if cp >= range.start && cp <= range.end {
3342                if let Ok(Some(gid)) = cmap_subtable.map_glyph(cp) {
3343                    if gid != 0 {
3344                        hits += 1;
3345                        if hits >= required_hits {
3346                            break;
3347                        }
3348                    }
3349                }
3350            }
3351        }
3352        
3353        if hits >= required_hits {
3354            verified_ranges.push(range);
3355        }
3356    }
3357    
3358    verified_ranges
3359}
3360
3361/// Analyze CMAP table to discover font coverage when OS/2 provides no info.
3362/// This is the fallback when OS/2 ulUnicodeRange bits are all zero.
3363#[cfg(all(feature = "std", feature = "parsing"))]
3364fn analyze_cmap_coverage(provider: &impl FontTableProvider) -> Option<Vec<UnicodeRange>> {
3365    use allsorts::tables::cmap::{Cmap, CmapSubtable, PlatformId, EncodingId};
3366    
3367    let cmap_data = provider.table_data(tag::CMAP).ok()??;
3368    let cmap = ReadScope::new(&cmap_data).read::<Cmap<'_>>().ok()?;
3369    
3370    let encoding_record = cmap.find_subtable(PlatformId::UNICODE, EncodingId(3))
3371        .or_else(|| cmap.find_subtable(PlatformId::UNICODE, EncodingId(4)))
3372        .or_else(|| cmap.find_subtable(PlatformId::WINDOWS, EncodingId(1)))
3373        .or_else(|| cmap.find_subtable(PlatformId::WINDOWS, EncodingId(10)))
3374        .or_else(|| cmap.find_subtable(PlatformId::UNICODE, EncodingId(0)))
3375        .or_else(|| cmap.find_subtable(PlatformId::UNICODE, EncodingId(1)))?;
3376    
3377    let cmap_subtable = ReadScope::new(&cmap_data)
3378        .offset(encoding_record.offset as usize)
3379        .read::<CmapSubtable<'_>>()
3380        .ok()?;
3381    
3382    // Standard Unicode blocks to probe
3383    let blocks_to_check: &[(u32, u32)] = &[
3384        (0x0000, 0x007F), // Basic Latin
3385        (0x0080, 0x00FF), // Latin-1 Supplement
3386        (0x0100, 0x017F), // Latin Extended-A
3387        (0x0180, 0x024F), // Latin Extended-B
3388        (0x0250, 0x02AF), // IPA Extensions
3389        (0x0300, 0x036F), // Combining Diacritical Marks
3390        (0x0370, 0x03FF), // Greek and Coptic
3391        (0x0400, 0x04FF), // Cyrillic
3392        (0x0500, 0x052F), // Cyrillic Supplement
3393        (0x0530, 0x058F), // Armenian
3394        (0x0590, 0x05FF), // Hebrew
3395        (0x0600, 0x06FF), // Arabic
3396        (0x0700, 0x074F), // Syriac
3397        (0x0900, 0x097F), // Devanagari
3398        (0x0980, 0x09FF), // Bengali
3399        (0x0A00, 0x0A7F), // Gurmukhi
3400        (0x0A80, 0x0AFF), // Gujarati
3401        (0x0B00, 0x0B7F), // Oriya
3402        (0x0B80, 0x0BFF), // Tamil
3403        (0x0C00, 0x0C7F), // Telugu
3404        (0x0C80, 0x0CFF), // Kannada
3405        (0x0D00, 0x0D7F), // Malayalam
3406        (0x0E00, 0x0E7F), // Thai
3407        (0x0E80, 0x0EFF), // Lao
3408        (0x1000, 0x109F), // Myanmar
3409        (0x10A0, 0x10FF), // Georgian
3410        (0x1100, 0x11FF), // Hangul Jamo
3411        (0x1200, 0x137F), // Ethiopic
3412        (0x13A0, 0x13FF), // Cherokee
3413        (0x1780, 0x17FF), // Khmer
3414        (0x1800, 0x18AF), // Mongolian
3415        (0x2000, 0x206F), // General Punctuation
3416        (0x20A0, 0x20CF), // Currency Symbols
3417        (0x2100, 0x214F), // Letterlike Symbols
3418        (0x2190, 0x21FF), // Arrows
3419        (0x2200, 0x22FF), // Mathematical Operators
3420        (0x2500, 0x257F), // Box Drawing
3421        (0x25A0, 0x25FF), // Geometric Shapes
3422        (0x2600, 0x26FF), // Miscellaneous Symbols
3423        (0x3000, 0x303F), // CJK Symbols and Punctuation
3424        (0x3040, 0x309F), // Hiragana
3425        (0x30A0, 0x30FF), // Katakana
3426        (0x3100, 0x312F), // Bopomofo
3427        (0x3130, 0x318F), // Hangul Compatibility Jamo
3428        (0x4E00, 0x9FFF), // CJK Unified Ideographs
3429        (0xAC00, 0xD7AF), // Hangul Syllables
3430        (0xF900, 0xFAFF), // CJK Compatibility Ideographs
3431        (0xFB50, 0xFDFF), // Arabic Presentation Forms-A
3432        (0xFE70, 0xFEFF), // Arabic Presentation Forms-B
3433        (0xFF00, 0xFFEF), // Halfwidth and Fullwidth Forms
3434    ];
3435    
3436    let mut ranges = Vec::new();
3437    
3438    for &(start, end) in blocks_to_check {
3439        let test_codepoints = get_verification_codepoints(start, end);
3440        let required_hits = (test_codepoints.len() + 1) / 2;
3441        let mut hits = 0;
3442        
3443        for cp in test_codepoints {
3444            if let Ok(Some(gid)) = cmap_subtable.map_glyph(cp) {
3445                if gid != 0 {
3446                    hits += 1;
3447                    if hits >= required_hits {
3448                        break;
3449                    }
3450                }
3451            }
3452        }
3453        
3454        if hits >= required_hits {
3455            ranges.push(UnicodeRange { start, end });
3456        }
3457    }
3458    
3459    if ranges.is_empty() {
3460        None
3461    } else {
3462        Some(ranges)
3463    }
3464}
3465
3466// Helper function to extract unicode ranges (unused, kept for reference)
3467#[cfg(feature = "parsing")]
3468#[allow(dead_code)]
3469fn extract_unicode_ranges(os2_table: &Os2) -> Vec<UnicodeRange> {
3470    let mut unicode_ranges = Vec::new();
3471
3472    // Process the 4 Unicode range bitfields from OS/2 table
3473    let ranges = [
3474        os2_table.ul_unicode_range1,
3475        os2_table.ul_unicode_range2,
3476        os2_table.ul_unicode_range3,
3477        os2_table.ul_unicode_range4,
3478    ];
3479
3480    // Unicode range bit positions to actual ranges
3481    // Based on OpenType spec
3482    let range_mappings = [
3483        (0, 0x0000, 0x007F),  // Basic Latin
3484        (1, 0x0080, 0x00FF),  // Latin-1 Supplement
3485        (2, 0x0100, 0x017F),  // Latin Extended-A
3486        (7, 0x0370, 0x03FF),  // Greek and Coptic
3487        (9, 0x0400, 0x04FF),  // Cyrillic
3488        (29, 0x2000, 0x206F), // General Punctuation
3489        (57, 0x4E00, 0x9FFF), // CJK Unified Ideographs
3490                              // Add more ranges as needed
3491    ];
3492
3493    for (bit, start, end) in &range_mappings {
3494        let range_idx = bit / 32;
3495        let bit_pos = bit % 32;
3496
3497        if range_idx < 4 && (ranges[range_idx] & (1 << bit_pos)) != 0 {
3498            unicode_ranges.push(UnicodeRange {
3499                start: *start,
3500                end: *end,
3501            });
3502        }
3503    }
3504
3505    unicode_ranges
3506}
3507
3508// Helper function to detect if a font is monospace
3509#[cfg(feature = "parsing")]
3510#[allow(dead_code)]
3511fn detect_monospace(
3512    provider: &impl FontTableProvider,
3513    os2_table: &Os2,
3514    detected_monospace: Option<bool>,
3515) -> Option<bool> {
3516    if let Some(is_monospace) = detected_monospace {
3517        return Some(is_monospace);
3518    }
3519
3520    // Try using PANOSE classification
3521    if os2_table.panose[0] == 2 {
3522        // 2 = Latin Text
3523        return Some(os2_table.panose[3] == 9); // 9 = Monospaced
3524    }
3525
3526    // Check glyph widths in hmtx table
3527    let hhea_data = provider.table_data(tag::HHEA).ok()??;
3528    let hhea_table = ReadScope::new(&hhea_data).read::<HheaTable>().ok()?;
3529    let maxp_data = provider.table_data(tag::MAXP).ok()??;
3530    let maxp_table = ReadScope::new(&maxp_data).read::<MaxpTable>().ok()?;
3531    let hmtx_data = provider.table_data(tag::HMTX).ok()??;
3532    let hmtx_table = ReadScope::new(&hmtx_data)
3533        .read_dep::<HmtxTable<'_>>((
3534            usize::from(maxp_table.num_glyphs),
3535            usize::from(hhea_table.num_h_metrics),
3536        ))
3537        .ok()?;
3538
3539    let mut monospace = true;
3540    let mut last_advance = 0;
3541
3542    // Check if all advance widths are the same
3543    for i in 0..hhea_table.num_h_metrics as usize {
3544        let advance = hmtx_table.h_metrics.read_item(i).ok()?.advance_width;
3545        if i > 0 && advance != last_advance {
3546            monospace = false;
3547            break;
3548        }
3549        last_advance = advance;
3550    }
3551
3552    Some(monospace)
3553}