Skip to main content

pdf_xfa/
font_bridge.rs

1//! # Font Resolution Pipeline
2//!
3//! This module resolves fonts for XFA rendering. The pipeline:
4//!
5//! 1. **Extract** — `extract_embedded_fonts()` in flatten.rs reads font
6//!    programs and /Widths arrays from PDF font dictionaries
7//! 2. **Store** — `store_font_data()` saves font bytes + widths keyed by name
8//! 3. **Resolve** — `XfaFontResolver::resolve()` matches XFA font names to
9//!    stored fonts, with fallbacks (alias, family, system)
10//! 4. **Inject** — `inject_resolved_metrics()` pushes resolved widths into
11//!    FontMetrics for the layout engine
12//! 5. **Measure** — `FontMetrics::measure_width()` uses the widths for text
13//!    wrapping calculations
14//!
15//! ## /Widths Handling
16//!
17//! PDF /Widths arrays start at FirstChar (typically 32). For simple fonts we
18//! project those code-indexed widths onto Unicode scalar values so the layout
19//! engine can measure wrapped XFA text using the same character semantics as
20//! the PDF font encoding.
21//!
22//! ## Known Limitations
23//!
24//! - CID-to-Unicode mapping (ToUnicode CMap) is not yet parsed
25//! - System font fallback may have different metrics than the PDF's embedded font
26
27use crate::error::{Result, XfaError};
28use lopdf::ObjectId;
29use std::collections::HashMap;
30use std::path::PathBuf;
31use std::sync::OnceLock;
32
33/// Reference to a reusable simple-font object already present in the source PDF.
34#[derive(Debug, Clone, Copy, PartialEq, Eq)]
35pub struct PdfSourceFont {
36    /// object_id.
37    pub object_id: ObjectId,
38}
39
40/// Embedded font record extracted from the source PDF.
41#[derive(Debug, Clone, PartialEq, Eq)]
42pub struct EmbeddedFontData {
43    /// name.
44    pub name: String,
45    /// data.
46    pub data: Vec<u8>,
47    /// pdf_widths.
48    pub pdf_widths: Option<(u16, Vec<u16>)>,
49    /// pdf_encoding.
50    pub pdf_encoding: Option<PdfSimpleEncoding>,
51    /// Existing simple-font object that can be reused during flattening.
52    pub pdf_source_font: Option<PdfSourceFont>,
53}
54
55/// Base encodings for simple PDF fonts.
56#[derive(Debug, Clone, Copy, PartialEq, Eq)]
57pub enum PdfBaseEncoding {
58    /// WinAnsi.
59    WinAnsi,
60    /// Standard.
61    Standard,
62    /// MacRoman.
63    MacRoman,
64}
65
66impl PdfBaseEncoding {
67    /// from_pdf_name.
68    pub fn from_pdf_name(name: &[u8]) -> Option<Self> {
69        match name {
70            b"WinAnsiEncoding" => Some(Self::WinAnsi),
71            b"StandardEncoding" => Some(Self::Standard),
72            b"MacRomanEncoding" => Some(Self::MacRoman),
73            _ => None,
74        }
75    }
76
77    fn code_to_unicode_table(self) -> &'static [Option<u16>; 256] {
78        match self {
79            Self::WinAnsi => &base_encoding_tables().win_ansi,
80            Self::Standard => &base_encoding_tables().standard,
81            Self::MacRoman => &base_encoding_tables().mac_roman,
82        }
83    }
84}
85
86/// Simple-font encoding overrides parsed from a PDF `/Encoding` dictionary.
87#[derive(Debug, Clone, PartialEq, Eq)]
88pub struct PdfSimpleEncoding {
89    /// base_encoding.
90    pub base_encoding: PdfBaseEncoding,
91    /// differences.
92    pub differences: Vec<(u8, u16)>,
93}
94
95impl PdfSimpleEncoding {
96    /// code_to_unicode_table.
97    pub fn code_to_unicode_table(&self) -> Vec<Option<u16>> {
98        let mut table = self.base_encoding.code_to_unicode_table().to_vec();
99        for (code, unicode) in &self.differences {
100            table[*code as usize] = Some(*unicode);
101        }
102        table
103    }
104}
105
106#[derive(Debug, Clone, PartialEq, Eq)]
107struct PdfWidthData {
108    widths: (u16, Vec<u16>),
109    encoding: Option<PdfSimpleEncoding>,
110    source_font: Option<PdfSourceFont>,
111}
112
113#[derive(Debug)]
114struct PdfBaseEncodingTables {
115    win_ansi: [Option<u16>; 256],
116    standard: [Option<u16>; 256],
117    mac_roman: [Option<u16>; 256],
118}
119
120/// A resolved font ready for use in PDF rendering.
121#[derive(Debug, Clone)]
122pub struct ResolvedFont {
123    /// PostScript name or family name.
124    pub name: String,
125    /// Raw font data (TTF/OTF/CFF bytes).
126    pub data: Vec<u8>,
127    /// Font face index within a collection (TTC/OTC).
128    pub face_index: u32,
129    /// Units per em from the font's head table.
130    pub units_per_em: u16,
131    /// Ascender in font units.
132    pub ascender: i16,
133    /// Descender in font units (negative).
134    pub descender: i16,
135    /// PDF /Widths array for glyph metrics: (first_char_code, widths).
136    pub pdf_widths: Option<(u16, Vec<u16>)>,
137    /// Optional `/Encoding` differences for the PDF width table.
138    pub pdf_encoding: Option<PdfSimpleEncoding>,
139    /// Existing simple-font object reused to preserve the PDF's original metrics.
140    pub pdf_source_font: Option<PdfSourceFont>,
141}
142
143impl ResolvedFont {
144    /// Measure the approximate width of a string in points at the given font size.
145    pub fn measure_string(&self, text: &str, font_size: f64) -> f64 {
146        if let Some(widths) = self.pdf_unicode_widths() {
147            let mut total = 0.0;
148            for ch in text.chars() {
149                let code = ch as usize;
150                if code < widths.len() && widths[code] != 0 {
151                    total += widths[code] as f64;
152                } else {
153                    total += self.measure_char_fallback(ch);
154                }
155            }
156            return total * font_size / 1000.0;
157        }
158        if let Ok(face) = ttf_parser::Face::parse(&self.data, self.face_index) {
159            let upem = face.units_per_em() as f64;
160            let scale = font_size / upem;
161            let mut width = 0.0;
162            for ch in text.chars() {
163                if let Some(gid) = face.glyph_index(ch) {
164                    width += face.glyph_hor_advance(gid).unwrap_or(0) as f64 * scale;
165                } else {
166                    width += font_size * 0.5;
167                }
168            }
169            width
170        } else {
171            text.len() as f64 * font_size * 0.5
172        }
173    }
174
175    fn measure_char_fallback(&self, ch: char) -> f64 {
176        if let Ok(face) = ttf_parser::Face::parse(&self.data, self.face_index) {
177            if let Some(gid) = face.glyph_index(ch) {
178                let upem = face.units_per_em() as f64;
179                let scale = 1.0 / upem;
180                face.glyph_hor_advance(gid).unwrap_or(0) as f64 * scale * 1000.0
181            } else {
182                500.0
183            }
184        } else {
185            500.0
186        }
187    }
188
189    /// Line height in points at the given font size.
190    pub fn line_height(&self, font_size: f64) -> f64 {
191        let upem = self.units_per_em as f64;
192        if upem > 0.0 {
193            (self.ascender as f64 - self.descender as f64) / upem * font_size
194        } else {
195            font_size * 1.2
196        }
197    }
198
199    /// Ascender in points at the given font size.
200    pub fn ascender_pt(&self, font_size: f64) -> f64 {
201        let upem = self.units_per_em as f64;
202        if upem > 0.0 {
203            self.ascender as f64 / upem * font_size
204        } else {
205            font_size * 0.8
206        }
207    }
208
209    /// Descender in points at the given font size (negative value).
210    pub fn descender_pt(&self, font_size: f64) -> f64 {
211        let upem = self.units_per_em as f64;
212        if upem > 0.0 {
213            self.descender as f64 / upem * font_size
214        } else {
215            font_size * -0.2
216        }
217    }
218
219    /// Generate a Unicode-indexed width table from PDF `/Widths`.
220    ///
221    /// WHY: Custom encodings via `/Differences` are essential for correct glyph
222    /// width mapping. Without this, widths are indexed by the wrong characters
223    /// for fonts that deviate from WinAnsi.
224    ///
225    /// SPEC: PDF spec §9.6.6.1 defines `/Differences` as a code-to-glyph-name
226    /// override table layered on top of a base encoding.
227    ///
228    /// LIMITATION: CID fonts (`/Type0`) use `/W` arrays and CMaps rather than
229    /// simple-font `/Widths`; that path is not handled here.
230    pub fn pdf_glyph_widths(&self) -> (u16, Vec<u16>) {
231        if let Some(widths) = self.pdf_unicode_widths() {
232            return (0, widths);
233        }
234        if let Ok(face) = ttf_parser::Face::parse(&self.data, self.face_index) {
235            let upem = face.units_per_em() as f64;
236            let scale = 1000.0 / upem;
237            let mut widths = Vec::with_capacity(256);
238            for code in 0u16..256 {
239                let w = if let Some(gid) = face.glyph_index(char::from(code as u8)) {
240                    (face.glyph_hor_advance(gid).unwrap_or(0) as f64 * scale) as u16
241                } else {
242                    0
243                };
244                widths.push(w);
245            }
246            (0, widths)
247        } else {
248            (0, vec![500; 256])
249        }
250    }
251
252    fn pdf_unicode_widths(&self) -> Option<Vec<u16>> {
253        let (first_char, widths) = self.pdf_widths.as_ref()?;
254        Some(unicode_widths_from_pdf_widths(
255            *first_char,
256            widths,
257            self.pdf_encoding.as_ref(),
258        ))
259    }
260
261    /// Generate CID font data for Identity-H encoding.
262    ///
263    /// Returns glyph widths indexed by GID and a GID→Unicode mapping for
264    /// the ToUnicode CMap.
265    pub fn cid_font_info(&self) -> Option<CidFontInfo> {
266        let face = ttf_parser::Face::parse(&self.data, self.face_index).ok()?;
267        let upem = face.units_per_em() as f64;
268        let scale = 1000.0 / upem;
269        let num_glyphs = face.number_of_glyphs();
270
271        let mut widths = Vec::with_capacity(num_glyphs as usize);
272        for gid_val in 0..num_glyphs {
273            let w = face
274                .glyph_hor_advance(ttf_parser::GlyphId(gid_val))
275                .map(|a| (a as f64 * scale) as u16)
276                .unwrap_or(0);
277            widths.push(w);
278        }
279
280        let mut gid_to_unicode = Vec::new();
281        for cp in 0x0020u32..=0xFFFDu32 {
282            if let Some(ch) = char::from_u32(cp) {
283                if let Some(gid) = face.glyph_index(ch) {
284                    gid_to_unicode.push((gid.0, ch));
285                }
286            }
287        }
288
289        Some(CidFontInfo {
290            widths,
291            gid_to_unicode,
292        })
293    }
294}
295
296/// Data needed for CIDFont (Identity-H) embedding.
297pub struct CidFontInfo {
298    /// Width in 1/1000 units for each glyph, indexed by GID.
299    pub widths: Vec<u16>,
300    /// Mapping from glyph ID to Unicode codepoint (for ToUnicode CMap).
301    pub gid_to_unicode: Vec<(u16, char)>,
302}
303
304/// Build a cache/lookup key that encodes typeface, weight, and posture.
305///
306/// This ensures that "Arial" regular and "Arial" bold are stored and
307/// looked up as distinct entries in font maps and metrics data.
308pub fn font_variant_key(typeface: &str, weight: Option<&str>, posture: Option<&str>) -> String {
309    let w = match weight {
310        Some("bold") => "_Bold",
311        _ => "_Normal",
312    };
313    let p = match posture {
314        Some("italic") => "_Italic",
315        _ => "_Normal",
316    };
317    format!("{}{}{}", typeface, w, p)
318}
319
320/// XFA Spec 3.3 §17 (p716) — genericFamily attribute on the font element.
321///
322/// Used as a fallback when the requested typeface cannot be found.
323/// XFA Spec 3.3 §28.2 (p1246) — Font mapping step 4: genericFamily mapping.
324#[derive(Debug, Clone, Copy, PartialEq, Eq)]
325pub enum GenericFamily {
326    /// Serif.
327    Serif,
328    /// SansSerif.
329    SansSerif,
330    /// Monospaced.
331    Monospaced,
332    /// Decorative.
333    Decorative,
334    /// Fantasy.
335    Fantasy,
336    /// Cursive.
337    Cursive,
338}
339
340impl GenericFamily {
341    /// Parse the XFA genericFamily attribute value.
342    pub fn parse(s: &str) -> Option<Self> {
343        match s {
344            "serif" => Some(Self::Serif),
345            "sansSerif" => Some(Self::SansSerif),
346            "monospaced" => Some(Self::Monospaced),
347            "decorative" => Some(Self::Decorative),
348            "fantasy" => Some(Self::Fantasy),
349            "cursive" => Some(Self::Cursive),
350            _ => None,
351        }
352    }
353}
354
355/// XFA font specification from the template.
356#[derive(Debug, Clone)]
357pub struct XfaFontSpec {
358    /// typeface.
359    pub typeface: String,
360    /// weight.
361    pub weight: FontWeight,
362    /// posture.
363    pub posture: FontPosture,
364    /// size_pt.
365    pub size_pt: f64,
366    /// XFA Spec 3.3 §17 (p716) — genericFamily fallback hint.
367    pub generic_family: Option<GenericFamily>,
368}
369/// FontWeight.
370
371#[derive(Debug, Clone, Copy, PartialEq, Eq)]
372pub enum FontWeight {
373    /// Normal.
374    Normal,
375    /// Bold.
376    Bold,
377}
378/// FontPosture.
379
380#[derive(Debug, Clone, Copy, PartialEq, Eq)]
381pub enum FontPosture {
382    /// Normal.
383    Normal,
384    /// Italic.
385    Italic,
386}
387
388impl XfaFontSpec {
389    /// Parse a font specification from XFA template attributes.
390    pub fn from_xfa_attrs(
391        typeface: &str,
392        weight: Option<&str>,
393        posture: Option<&str>,
394        size: Option<&str>,
395        generic_family: Option<&str>,
396    ) -> Self {
397        Self {
398            typeface: typeface.to_string(),
399            weight: match weight {
400                Some("bold") => FontWeight::Bold,
401                _ => FontWeight::Normal,
402            },
403            posture: match posture {
404                Some("italic") => FontPosture::Italic,
405                _ => FontPosture::Normal,
406            },
407            size_pt: size
408                .and_then(|s| s.strip_suffix("pt").or(Some(s)))
409                .and_then(|s| s.parse::<f64>().ok())
410                .unwrap_or(10.0),
411            generic_family: generic_family.and_then(GenericFamily::parse),
412        }
413    }
414}
415
416/// Resolves XFA font specifications to actual font data.
417pub struct XfaFontResolver {
418    embedded: HashMap<String, ResolvedFont>,
419    embedded_pdf_widths: HashMap<String, PdfWidthData>,
420    system_fonts: HashMap<String, PathBuf>,
421    cache: HashMap<String, ResolvedFont>,
422    /// Optional directory of pre-extracted font programs (from oracle PDFs).
423    /// Fonts are looked up by PostScript name before falling back to system fonts.
424    font_cache_fonts: HashMap<String, PathBuf>,
425}
426
427/// Normalize a font name by stripping subset prefixes and PostScript suffixes.
428///
429/// - Strips subset prefix: "ABCDEF+Arial" -> "Arial"
430/// - Strips PS suffixes: "ArialMT" -> "Arial", "TimesNewRomanPSMT" -> "TimesNewRoman"
431/// - Converts to lowercase
432fn normalize_font_name(name: &str) -> String {
433    // Strip subset prefix (6 uppercase letters + '+')
434    let stripped = if name.len() > 7 && name.as_bytes()[6] == b'+' {
435        let prefix = &name[..6];
436        if prefix.chars().all(|c| c.is_ascii_uppercase()) {
437            &name[7..]
438        } else {
439            name
440        }
441    } else {
442        name
443    };
444
445    // Strip PostScript suffixes. These can appear on the full name
446    // ("ArialMT") as well as on variant names ("Arial-BoldMT"), so we
447    // also strip a trailing "MT" that sits directly after a Bold/Italic
448    // marker — e.g. "-BoldMT" → "-Bold", "-BoldItalicMT" → "-BoldItalic".
449    let stripped = stripped
450        .strip_suffix("PSMT")
451        .or_else(|| stripped.strip_suffix("MT"))
452        .unwrap_or(stripped);
453
454    stripped.to_lowercase()
455}
456
457/// Aggressive canonical key used for /Widths matching.
458///
459/// The `/Widths` table in a PDF may be registered under many spellings for
460/// the same logical font: `Arial-Bold`, `Arial,Bold`, `Arial Bold`,
461/// `ArialBold`, `Arial-BoldMT`, `ABCDEF+Arial-Bold`. All of these should
462/// map to a single lookup key so that an XFA font spec resolves the same
463/// widths regardless of the separator convention used by the producer.
464///
465/// The canonical form strips:
466/// * the subset prefix (`ABCDEF+`)
467/// * the trailing `PSMT`/`MT` markers
468/// * every non-alphanumeric character (`-`, `,`, space, `_`, `.`)
469///
470/// and lowercases the rest. Empty strings map to `None` so callers can
471/// skip the lookup safely.
472fn canonical_font_key(name: &str) -> Option<String> {
473    let normalized = normalize_font_name(name);
474    let canonical: String = normalized
475        .chars()
476        .filter(|c| c.is_ascii_alphanumeric())
477        .collect();
478    if canonical.is_empty() {
479        None
480    } else {
481        Some(canonical)
482    }
483}
484
485/// Return alias list for common font family names.
486///
487/// Maps Windows/macOS font names to their Linux metric-compatible equivalents.
488fn font_family_aliases(name: &str) -> &'static [&'static str] {
489    match name {
490        "arial" | "arialmt" => &["liberationsans", "arimo", "freesans"],
491        "times new roman" | "timesnewroman" | "timesnewromanpsmt" => {
492            &["liberationserif", "tinos", "freeserif"]
493        }
494        "courier new" | "couriernew" | "couriernewpsmt" => {
495            &["liberationmono", "cousine", "freemono"]
496        }
497        "helvetica" => &["liberationsans", "arimo", "arial"],
498        "myriad pro" | "myriadpro" => &["liberationsans", "arimo", "dejavusans"],
499        // Reverse mappings: Linux fonts -> common equivalents
500        "liberationsans" | "liberation sans" => &["arial", "arimo", "freesans", "helvetica"],
501        "liberationserif" | "liberation serif" => &["times new roman", "tinos", "freeserif"],
502        "liberationmono" | "liberation mono" => &["courier new", "cousine", "freemono"],
503        _ => &[],
504    }
505}
506
507/// Font classification for family-aware fallback.
508#[derive(Debug, Clone, Copy, PartialEq, Eq)]
509enum FontFamily {
510    Serif,
511    SansSerif,
512    Monospace,
513    Unknown,
514}
515
516/// Classify a font name into a font family category.
517fn classify_font_family(name: &str) -> FontFamily {
518    let lower = name.to_lowercase();
519
520    // Monospace indicators
521    if lower.contains("mono")
522        || lower.contains("courier")
523        || lower.contains("consolas")
524        || lower.contains("menlo")
525        || lower.contains("fixed")
526        || lower.contains("code")
527    {
528        return FontFamily::Monospace;
529    }
530
531    // Serif indicators (check before sans-serif since "sans" contains checks come after)
532    if lower.contains("serif") && !lower.contains("sans") {
533        return FontFamily::Serif;
534    }
535    if lower.contains("times")
536        || lower.contains("garamond")
537        || lower.contains("georgia")
538        || lower.contains("palatino")
539        || lower.contains("bodoni")
540        || lower.contains("cambria")
541        || lower.contains("tinos")
542    {
543        return FontFamily::Serif;
544    }
545
546    // Sans-serif indicators
547    if lower.contains("sans")
548        || lower.contains("arial")
549        || lower.contains("helvetica")
550        || lower.contains("verdana")
551        || lower.contains("tahoma")
552        || lower.contains("calibri")
553        || lower.contains("arimo")
554        || lower.contains("myriad")
555        || lower.contains("segoe")
556    {
557        return FontFamily::SansSerif;
558    }
559
560    FontFamily::Unknown
561}
562
563/// Return family-aware fallback chain for the given font family.
564fn family_fallback_chain(family: FontFamily) -> &'static [&'static str] {
565    match family {
566        FontFamily::SansSerif | FontFamily::Unknown => &[
567            "liberationsans",
568            "arimo",
569            "dejavusans",
570            "freesans",
571            "helvetica",
572            "arial",
573        ],
574        FontFamily::Serif => &["liberationserif", "tinos", "dejavuserif", "freeserif"],
575        FontFamily::Monospace => &["liberationmono", "cousine", "dejavusansmono", "freemono"],
576    }
577}
578
579fn font_name_substring_match(requested: &str, available: &str) -> bool {
580    let requested_lower = requested.to_lowercase();
581    let available_lower = available.to_lowercase();
582
583    if requested_lower == available_lower {
584        return true;
585    }
586
587    if requested_lower.contains(&available_lower) || available_lower.contains(&requested_lower) {
588        return true;
589    }
590
591    let requested_stripped = normalize_font_name(requested);
592    let available_stripped = normalize_font_name(available);
593
594    if requested_stripped == available_stripped {
595        return true;
596    }
597
598    if requested_stripped.contains(&available_stripped)
599        || available_stripped.contains(&requested_stripped)
600    {
601        return true;
602    }
603
604    false
605}
606
607// XFA Spec 3.3 §28.2 (p1246) — Font mapping step 4: genericFamily mapping.
608// Maps genericFamily values to concrete font fallback chains.
609fn generic_family_fallback_chain(gf: GenericFamily) -> &'static [&'static str] {
610    match gf {
611        GenericFamily::Serif | GenericFamily::Decorative => &[
612            "liberationserif",
613            "tinos",
614            "dejavuserif",
615            "freeserif",
616            "times new roman",
617            "times",
618        ],
619        GenericFamily::SansSerif | GenericFamily::Fantasy => &[
620            "liberationsans",
621            "arimo",
622            "dejavusans",
623            "freesans",
624            "helvetica",
625            "arial",
626        ],
627        GenericFamily::Monospaced => &[
628            "liberationmono",
629            "cousine",
630            "dejavusansmono",
631            "freemono",
632            "courier new",
633            "courier",
634        ],
635        GenericFamily::Cursive => {
636            // Cursive maps to best available serif-italic; fall back to serif fonts.
637            &[
638                "liberationserif",
639                "tinos",
640                "dejavuserif",
641                "freeserif",
642                "times new roman",
643            ]
644        }
645    }
646}
647
648impl XfaFontResolver {
649    /// Create a new resolver with embedded fonts extracted from the PDF.
650    pub fn new(embedded_fonts: Vec<EmbeddedFontData>) -> Self {
651        let mut embedded = HashMap::new();
652        let mut embedded_pdf_widths = HashMap::new();
653        for font_data in embedded_fonts {
654            let EmbeddedFontData {
655                name,
656                data,
657                pdf_widths,
658                pdf_encoding,
659                pdf_source_font,
660            } = font_data;
661            if let Some(ref widths) = pdf_widths {
662                remember_pdf_widths(
663                    &mut embedded_pdf_widths,
664                    &name,
665                    widths,
666                    pdf_encoding.clone(),
667                    pdf_source_font,
668                );
669            }
670            if let Some(font) = parse_font_data_with_widths(
671                &name,
672                &data,
673                pdf_widths.clone(),
674                pdf_encoding.clone(),
675                pdf_source_font,
676            )
677            .or_else(|| build_pdf_only_font(&name, pdf_widths, pdf_encoding, pdf_source_font))
678            {
679                let normalized = normalize_font_name(&name);
680                embedded.insert(name.to_lowercase(), font.clone());
681                if normalized != name.to_lowercase() {
682                    embedded.insert(normalized, font);
683                }
684            }
685        }
686        let system_fonts = scan_system_fonts();
687        let font_cache_fonts = scan_font_cache_dir();
688        Self {
689            embedded,
690            embedded_pdf_widths,
691            system_fonts,
692            cache: HashMap::new(),
693            font_cache_fonts,
694        }
695    }
696
697    /// Set a custom font cache directory (overrides `XFA_FONT_CACHE` env var).
698    pub fn with_font_cache(mut self, dir: &std::path::Path) -> Self {
699        self.font_cache_fonts = scan_font_dir(dir);
700        self
701    }
702
703    /// Resolve a font specification to a usable font.
704    ///
705    /// XFA Spec 3.3 §28.2 (p1246) — Font mapping: Adobe uses a 5-step algorithm:
706    /// 1) direct match, 2) equate, 3) locale, 4) genericFamily, 5) default.
707    ///    We implement steps 1, 4, 5 (equate and locale are config-dependent).
708    ///
709    /// When weight is Bold and/or posture is Italic, variant-specific font
710    /// names are tried first (e.g. "Arial-Bold", "ArialBold", "Arial Bold")
711    /// in both embedded and system font lookups. This ensures that bold/italic
712    /// text gets the correct font metrics (wider glyphs, different ascender/
713    /// descender) instead of silently falling back to the regular weight.
714    pub fn resolve(&mut self, spec: &XfaFontSpec) -> Result<ResolvedFont> {
715        let cache_key = format!("{}_{:?}_{:?}", spec.typeface, spec.weight, spec.posture);
716        if let Some(cached) = self.cache.get(&cache_key) {
717            return Ok(cached.clone());
718        }
719        let normalized = normalize_font_name(&spec.typeface);
720
721        // Build variant suffixes based on weight/posture.
722        let variant_names = build_variant_names(&spec.typeface, spec.weight, spec.posture);
723
724        // Step 1: Try variant-specific names first (bold/italic variants).
725        let font = variant_names
726            .iter()
727            .find_map(|vn| {
728                self.try_embedded(vn)
729                    .or_else(|| self.try_font_cache(vn))
730                    .or_else(|| self.try_system(vn))
731                    .or_else(|| {
732                        let norm = normalize_font_name(vn);
733                        self.try_embedded(&norm)
734                            .or_else(|| self.try_font_cache(&norm))
735                            .or_else(|| self.try_system(&norm))
736                    })
737            })
738            // Then try the base name as before.
739            .or_else(|| self.try_embedded(&spec.typeface))
740            .or_else(|| self.try_embedded(&normalized))
741            .or_else(|| self.try_font_cache(&spec.typeface))
742            .or_else(|| self.try_font_cache(&normalized))
743            .or_else(|| self.try_system(&spec.typeface))
744            .or_else(|| self.try_system(&normalized))
745            .or_else(|| self.try_base_name(&spec.typeface))
746            .or_else(|| self.try_aliases(&spec.typeface))
747            .or_else(|| self.try_substring_match(&spec.typeface))
748            // Step 4: genericFamily fallback (XFA §28.2 step 4).
749            .or_else(|| self.try_generic_family_fallback(spec.generic_family))
750            .or_else(|| self.try_family_fallback(&spec.typeface))
751            // Step 5: system default.
752            .or_else(|| self.try_fallbacks())
753            .ok_or_else(|| {
754                XfaError::FontError(format!("cannot resolve font: {}", spec.typeface))
755            })?;
756        let mut font = self.attach_pdf_widths(font, spec, &variant_names);
757        // #858: When a PDF-only font (from AcroForm /DR) has no embedded data,
758        // the Identity-H encoding path is unavailable and non-WinAnsi characters
759        // (e.g. Polish ł,ś,ć) become '?'. Augment with system font data so
760        // embed_font_in_pdf can create a Type0/Identity-H font.
761        if font.data.is_empty() && font.pdf_source_font.is_some() {
762            let sys = self
763                .try_system(&font.name)
764                .or_else(|| self.try_system(&spec.typeface))
765                .or_else(|| self.try_aliases(&spec.typeface));
766            if let Some(sys_font) = sys {
767                font.data = sys_font.data;
768                font.face_index = sys_font.face_index;
769            }
770        }
771        self.cache.insert(cache_key, font.clone());
772        Ok(font)
773    }
774
775    fn try_embedded(&self, name: &str) -> Option<ResolvedFont> {
776        self.embedded.get(&name.to_lowercase()).cloned()
777    }
778
779    fn try_font_cache(&self, name: &str) -> Option<ResolvedFont> {
780        let path = self.font_cache_fonts.get(&name.to_lowercase())?;
781        load_system_font(path, name)
782    }
783
784    fn try_system(&self, name: &str) -> Option<ResolvedFont> {
785        let path = self.system_fonts.get(&name.to_lowercase())?;
786        load_system_font(path, name)
787    }
788
789    fn try_base_name(&self, name: &str) -> Option<ResolvedFont> {
790        let base = name
791            .replace("-Bold", "")
792            .replace("-Italic", "")
793            .replace("-BoldItalic", "")
794            .replace(",Bold", "")
795            .replace(",Italic", "");
796        if base != name {
797            let normalized_base = normalize_font_name(&base);
798            self.try_embedded(&base)
799                .or_else(|| self.try_embedded(&normalized_base))
800                .or_else(|| self.try_system(&base))
801                .or_else(|| self.try_system(&normalized_base))
802        } else {
803            None
804        }
805    }
806
807    /// Try font family aliases: map common font names to Linux equivalents.
808    fn try_aliases(&self, name: &str) -> Option<ResolvedFont> {
809        let normalized = normalize_font_name(name);
810
811        for lookup in &[name.to_lowercase(), normalized] {
812            let no_spaces = lookup.replace(' ', "");
813            for candidate in [lookup.as_str(), no_spaces.as_str()] {
814                let aliases = font_family_aliases(candidate);
815                for alias in aliases {
816                    if let Some(font) = self.try_system(alias) {
817                        return Some(font);
818                    }
819                }
820            }
821        }
822        None
823    }
824
825    fn try_substring_match(&self, name: &str) -> Option<ResolvedFont> {
826        let name_lower = name.to_lowercase();
827        let name_normalized = normalize_font_name(name);
828
829        for (available_key, font) in &self.embedded {
830            if font_name_substring_match(&name_lower, available_key)
831                || font_name_substring_match(&name_normalized, available_key)
832            {
833                return Some(font.clone());
834            }
835        }
836
837        for (available_key, path) in &self.system_fonts {
838            if font_name_substring_match(&name_lower, available_key)
839                || font_name_substring_match(&name_normalized, available_key)
840            {
841                if let Some(font) = load_system_font(path, available_key) {
842                    return Some(font);
843                }
844            }
845        }
846
847        None
848    }
849
850    /// XFA Spec 3.3 §28.2 step 4 — genericFamily fallback.
851    fn try_generic_family_fallback(&self, gf: Option<GenericFamily>) -> Option<ResolvedFont> {
852        let gf = gf?;
853        let chain = generic_family_fallback_chain(gf);
854        for candidate in chain {
855            if let Some(font) = self.try_system(candidate) {
856                return Some(font);
857            }
858        }
859        None
860    }
861
862    /// Family-aware fallback: classify the font and try appropriate chain.
863    fn try_family_fallback(&self, name: &str) -> Option<ResolvedFont> {
864        let family = classify_font_family(name);
865        let chain = family_fallback_chain(family);
866        for candidate in chain {
867            if let Some(font) = self.try_system(candidate) {
868                return Some(font);
869            }
870        }
871        None
872    }
873
874    fn try_fallbacks(&self) -> Option<ResolvedFont> {
875        #[cfg(target_os = "macos")]
876        let fallback_chain = ["Arial", "Helvetica.ttc", "DejaVuSans", "LiberationSans"];
877        #[cfg(not(target_os = "macos"))]
878        let fallback_chain = [
879            "LiberationSans",
880            "DejaVuSans",
881            "Arial",
882            "Helvetica",
883            "FreeSans",
884        ];
885
886        for name in &fallback_chain {
887            if let Some(font) = self.try_system(name) {
888                return Some(font);
889            }
890        }
891        None
892    }
893
894    fn attach_pdf_widths(
895        &self,
896        mut font: ResolvedFont,
897        spec: &XfaFontSpec,
898        variant_names: &[String],
899    ) -> ResolvedFont {
900        if font.pdf_widths.is_some() && font.pdf_source_font.is_some() {
901            return font;
902        }
903
904        let mut matched = false;
905        for name in variant_names
906            .iter()
907            .map(String::as_str)
908            .chain([spec.typeface.as_str(), font.name.as_str()])
909        {
910            if let Some(width_data) = lookup_pdf_widths(&self.embedded_pdf_widths, name) {
911                if font.pdf_widths.is_none() {
912                    font.pdf_widths = Some(width_data.widths);
913                }
914                if font.pdf_encoding.is_none() {
915                    font.pdf_encoding = width_data.encoding;
916                }
917                if font.pdf_source_font.is_none() {
918                    font.pdf_source_font = width_data.source_font;
919                }
920                matched = true;
921                break;
922            }
923        }
924
925        // When no /Widths are attached but the PDF does ship width tables,
926        // the font will silently fall back to AFM metrics and wrap text at
927        // the wrong offsets. Surface these misses behind an env flag so
928        // diagnostic runs can see which XFA fonts failed to bind.
929        if !matched
930            && font.pdf_widths.is_none()
931            && !self.embedded_pdf_widths.is_empty()
932            && std::env::var("XFA_FONT_BRIDGE_DEBUG").is_ok()
933        {
934            eprintln!(
935                "attach_pdf_widths: no /Widths match for typeface '{}' (resolved='{}', variants={:?}); available keys (first 20): {:?}",
936                spec.typeface,
937                font.name,
938                variant_names,
939                self.embedded_pdf_widths
940                    .keys()
941                    .take(20)
942                    .collect::<Vec<_>>(),
943            );
944        }
945
946        font
947    }
948}
949
950fn remember_pdf_widths(
951    widths_map: &mut HashMap<String, PdfWidthData>,
952    name: &str,
953    widths: &(u16, Vec<u16>),
954    encoding: Option<PdfSimpleEncoding>,
955    source_font: Option<PdfSourceFont>,
956) {
957    let record = PdfWidthData {
958        widths: widths.clone(),
959        encoding,
960        source_font,
961    };
962    let lower = name.to_lowercase();
963    widths_map.insert(lower.clone(), record.clone());
964
965    let normalized = normalize_font_name(name);
966    if normalized != lower {
967        widths_map.insert(normalized.clone(), record.clone());
968    }
969
970    let no_spaces = lower.replace(' ', "");
971    if no_spaces != lower {
972        widths_map.insert(no_spaces, record.clone());
973    }
974
975    // Canonical form (no separators, no MT/PSMT, no subset prefix) — lets
976    // "Arial-BoldMT", "Arial,Bold", "Arial Bold" etc. collapse to the same
977    // key so variants with different separator conventions all resolve.
978    if let Some(canonical) = canonical_font_key(name) {
979        widths_map.entry(canonical).or_insert(record);
980    }
981}
982
983fn lookup_pdf_widths(
984    widths_map: &HashMap<String, PdfWidthData>,
985    name: &str,
986) -> Option<PdfWidthData> {
987    let lower = name.to_lowercase();
988    if let Some(hit) = widths_map.get(&lower) {
989        return Some(hit.clone());
990    }
991    let normalized = normalize_font_name(name);
992    if let Some(hit) = widths_map.get(&normalized) {
993        return Some(hit.clone());
994    }
995    let no_spaces = lower.replace(' ', "");
996    if let Some(hit) = widths_map.get(&no_spaces) {
997        return Some(hit.clone());
998    }
999    // Canonical key fallback handles separator mismatches ("Arial-BoldMT"
1000    // vs "Arial,Bold") and subset prefixes ("ABCDEF+Arial-Bold").
1001    if let Some(canonical) = canonical_font_key(name) {
1002        if let Some(hit) = widths_map.get(&canonical) {
1003            return Some(hit.clone());
1004        }
1005        // Last-resort substring match on canonical keys in both directions.
1006        // Prefer the longest matching stored key so "arialbold" wins over
1007        // "arial" when both are present.
1008        let mut best: Option<(&String, &PdfWidthData)> = None;
1009        for (key, data) in widths_map {
1010            if (key.contains(&canonical) || canonical.contains(key.as_str()))
1011                && best.is_none_or(|(b, _)| key.len() > b.len())
1012            {
1013                best = Some((key, data));
1014            }
1015        }
1016        if let Some((_, data)) = best {
1017            return Some(data.clone());
1018        }
1019    }
1020    None
1021}
1022
1023/// Build variant-specific font names for bold/italic lookup.
1024///
1025/// Given a base typeface name ("Arial") and weight/posture, produces names
1026/// like "Arial-Bold", "ArialBold", "Arial Bold" etc. Returns an empty vec
1027/// when both weight and posture are Normal.
1028fn build_variant_names(typeface: &str, weight: FontWeight, posture: FontPosture) -> Vec<String> {
1029    let suffix = match (weight, posture) {
1030        (FontWeight::Bold, FontPosture::Italic) => "BoldItalic",
1031        (FontWeight::Bold, FontPosture::Normal) => "Bold",
1032        (FontWeight::Normal, FontPosture::Italic) => "Italic",
1033        (FontWeight::Normal, FontPosture::Normal) => return Vec::new(),
1034    };
1035
1036    let mut names = Vec::with_capacity(6);
1037    // "{Name}-{Suffix}" e.g. "Arial-Bold"
1038    names.push(format!("{}-{}", typeface, suffix));
1039    // "{Name}{Suffix}" e.g. "ArialBold"
1040    names.push(format!("{}{}", typeface, suffix));
1041    // "{Name} {Suffix}" e.g. "Arial Bold"
1042    names.push(format!("{} {}", typeface, suffix));
1043    // Comma-separated: "{Name},{Suffix}" e.g. "Arial,Bold"
1044    names.push(format!("{},{}", typeface, suffix));
1045
1046    // For BoldItalic, also try the two-suffix patterns:
1047    // "{Name}-Bold Italic", "{Name} Bold Italic"
1048    if weight == FontWeight::Bold && posture == FontPosture::Italic {
1049        names.push(format!("{}-Bold Italic", typeface));
1050        names.push(format!("{} Bold Italic", typeface));
1051    }
1052
1053    names
1054}
1055
1056fn _parse_font_data(name: &str, data: &[u8]) -> Option<ResolvedFont> {
1057    let face = ttf_parser::Face::parse(data, 0).ok()?;
1058    Some(ResolvedFont {
1059        name: name.to_string(),
1060        data: data.to_vec(),
1061        face_index: 0,
1062        units_per_em: face.units_per_em(),
1063        ascender: face.ascender(),
1064        descender: face.descender(),
1065        pdf_widths: None,
1066        pdf_encoding: None,
1067        pdf_source_font: None,
1068    })
1069}
1070
1071fn parse_font_data_with_widths(
1072    name: &str,
1073    data: &[u8],
1074    pdf_widths: Option<(u16, Vec<u16>)>,
1075    pdf_encoding: Option<PdfSimpleEncoding>,
1076    pdf_source_font: Option<PdfSourceFont>,
1077) -> Option<ResolvedFont> {
1078    let face = ttf_parser::Face::parse(data, 0).ok()?;
1079    Some(ResolvedFont {
1080        name: name.to_string(),
1081        data: data.to_vec(),
1082        face_index: 0,
1083        units_per_em: face.units_per_em(),
1084        ascender: face.ascender(),
1085        descender: face.descender(),
1086        pdf_widths,
1087        pdf_encoding,
1088        pdf_source_font,
1089    })
1090}
1091
1092fn build_pdf_only_font(
1093    name: &str,
1094    pdf_widths: Option<(u16, Vec<u16>)>,
1095    pdf_encoding: Option<PdfSimpleEncoding>,
1096    pdf_source_font: Option<PdfSourceFont>,
1097) -> Option<ResolvedFont> {
1098    let pdf_widths = pdf_widths?;
1099    let pdf_source_font = pdf_source_font?;
1100    Some(ResolvedFont {
1101        name: name.to_string(),
1102        data: Vec::new(),
1103        face_index: 0,
1104        // fix(#811): XFA 3.3 §11.7.1 expects field fitting to follow the
1105        // actual font metrics seen by the target renderer. When the PDF already
1106        // carries a simple font object with authoritative /Widths, reuse that
1107        // object instead of synthesizing a system fallback with drifted widths.
1108        // These defaults only cover vertical metrics when no parseable font
1109        // program exists; horizontal measurement comes from the PDF /Widths.
1110        units_per_em: 1000,
1111        ascender: 800,
1112        descender: -200,
1113        pdf_widths: Some(pdf_widths),
1114        pdf_encoding,
1115        pdf_source_font: Some(pdf_source_font),
1116    })
1117}
1118
1119fn load_system_font(path: &PathBuf, name: &str) -> Option<ResolvedFont> {
1120    let data = std::fs::read(path).ok()?;
1121    let num_fonts = ttf_parser::fonts_in_collection(&data).unwrap_or(1);
1122    for idx in 0..num_fonts {
1123        if let Ok(face) = ttf_parser::Face::parse(&data, idx) {
1124            let name_id_matches = |name_id: u16| {
1125                name_id == ttf_parser::name_id::FULL_NAME
1126                    || name_id == ttf_parser::name_id::POST_SCRIPT_NAME
1127                    || name_id == ttf_parser::name_id::FAMILY
1128            };
1129            let matches = face.names().into_iter().any(|n| {
1130                name_id_matches(n.name_id)
1131                    && n.to_string().is_some_and(|s| s.eq_ignore_ascii_case(name))
1132            });
1133            if matches || idx == 0 {
1134                return Some(ResolvedFont {
1135                    name: name.to_string(),
1136                    data: data.clone(),
1137                    face_index: idx,
1138                    units_per_em: face.units_per_em(),
1139                    ascender: face.ascender(),
1140                    descender: face.descender(),
1141                    pdf_widths: None,
1142                    pdf_encoding: None,
1143                    pdf_source_font: None,
1144                });
1145            }
1146        }
1147    }
1148    None
1149}
1150
1151fn unicode_widths_from_pdf_widths(
1152    first_char: u16,
1153    widths: &[u16],
1154    encoding: Option<&PdfSimpleEncoding>,
1155) -> Vec<u16> {
1156    let code_to_unicode = encoding
1157        .map(PdfSimpleEncoding::code_to_unicode_table)
1158        .unwrap_or_else(|| PdfBaseEncoding::WinAnsi.code_to_unicode_table().to_vec());
1159
1160    let mut max_codepoint = 255usize;
1161    for (offset, _) in widths.iter().enumerate() {
1162        let code = first_char as usize + offset;
1163        if code >= 256 {
1164            break;
1165        }
1166        if let Some(unicode) = code_to_unicode[code] {
1167            max_codepoint = max_codepoint.max(unicode as usize);
1168        }
1169    }
1170
1171    let mut projected = vec![0u16; max_codepoint + 1];
1172    for (offset, &width) in widths.iter().enumerate() {
1173        let code = first_char as usize + offset;
1174        if code >= 256 {
1175            break;
1176        }
1177        if let Some(unicode) = code_to_unicode[code] {
1178            projected[unicode as usize] = width;
1179        }
1180    }
1181    projected
1182}
1183
1184pub(crate) fn pdf_glyph_name_to_unicode(name: &str) -> Option<u16> {
1185    if let Some(base) = name.split('.').next() {
1186        if base != name {
1187            return pdf_glyph_name_to_unicode(base);
1188        }
1189    }
1190
1191    if let Some(cp) = glyph_name_map().get(name) {
1192        return Some(*cp);
1193    }
1194
1195    if let Some(hex) = name.strip_prefix("uni") {
1196        if hex.len() == 4 && hex.chars().all(|ch| ch.is_ascii_hexdigit()) {
1197            return u16::from_str_radix(hex, 16).ok();
1198        }
1199    }
1200
1201    if let Some(hex) = name.strip_prefix('u') {
1202        if (4..=6).contains(&hex.len()) && hex.chars().all(|ch| ch.is_ascii_hexdigit()) {
1203            let codepoint = u32::from_str_radix(hex, 16).ok()?;
1204            return u16::try_from(codepoint).ok();
1205        }
1206    }
1207
1208    None
1209}
1210
1211fn glyph_name_map() -> &'static HashMap<&'static str, u16> {
1212    static MAP: OnceLock<HashMap<&'static str, u16>> = OnceLock::new();
1213    MAP.get_or_init(|| {
1214        let mut map = HashMap::new();
1215        for line in include_str!("encodings/glyphnames.rs").lines() {
1216            let line = line.trim();
1217            let Some(rest) = line.strip_prefix("pub const ") else {
1218                continue;
1219            };
1220            let Some((name, hex)) = rest.split_once(": u16 = 0x") else {
1221                continue;
1222            };
1223            let Some(hex) = hex.strip_suffix(';') else {
1224                continue;
1225            };
1226            if let Ok(codepoint) = u16::from_str_radix(hex, 16) {
1227                map.insert(name, codepoint);
1228            }
1229        }
1230        map
1231    })
1232}
1233
1234fn base_encoding_tables() -> &'static PdfBaseEncodingTables {
1235    static TABLES: OnceLock<PdfBaseEncodingTables> = OnceLock::new();
1236    TABLES.get_or_init(|| PdfBaseEncodingTables {
1237        win_ansi: parse_base_encoding_table("WIN_ANSI_ENCODING"),
1238        standard: parse_base_encoding_table("STANDARD_ENCODING"),
1239        mac_roman: parse_base_encoding_table("MAC_ROMAN_ENCODING"),
1240    })
1241}
1242
1243fn parse_base_encoding_table(const_name: &str) -> [Option<u16>; 256] {
1244    let src = include_str!("encodings/mappings.rs");
1245    let marker = format!("pub const {const_name}: CodedCharacterSet = [");
1246    let start = src
1247        .find(&marker)
1248        .unwrap_or_else(|| panic!("missing {const_name} in lopdf mappings"));
1249    let body = &src[start + marker.len()..];
1250    let end = body
1251        .find("];")
1252        .unwrap_or_else(|| panic!("unterminated {const_name} in lopdf mappings"));
1253
1254    let mut entries = Vec::with_capacity(256);
1255    for line in body[..end].lines() {
1256        let line = line.trim();
1257        if line.is_empty() {
1258            continue;
1259        }
1260        if line == "None," {
1261            entries.push(None);
1262            continue;
1263        }
1264        if let Some(name) = line
1265            .strip_prefix("Some(Glyph::")
1266            .and_then(|rest| rest.strip_suffix("),"))
1267        {
1268            entries.push(pdf_glyph_name_to_unicode(name));
1269        }
1270    }
1271
1272    let entry_count = entries.len();
1273    entries
1274        .try_into()
1275        .unwrap_or_else(|_| panic!("expected 256 entries in {const_name}, got {entry_count}"))
1276}
1277
1278fn scan_system_fonts() -> HashMap<String, PathBuf> {
1279    let mut fonts = HashMap::new();
1280    let mut font_files = Vec::new();
1281
1282    // Collect all font files, including subdirectories (Linux stores fonts in
1283    // subdirectories like /usr/share/fonts/truetype/liberation/).
1284    for dir in system_font_dirs() {
1285        collect_font_files(&dir, &mut font_files, 0);
1286    }
1287
1288    for path in &font_files {
1289        // Always register by filename stem (existing behavior)
1290        if let Some(name) = path.file_stem().and_then(|s| s.to_str()) {
1291            fonts.insert(name.to_lowercase(), path.clone());
1292        }
1293
1294        // Also register by TrueType name table entries
1295        if let Ok(data) = std::fs::read(path) {
1296            let num_faces = ttf_parser::fonts_in_collection(&data).unwrap_or(1);
1297            for face_idx in 0..num_faces {
1298                if let Ok(face) = ttf_parser::Face::parse(&data, face_idx) {
1299                    for name_record in face.names() {
1300                        // Register under family name (ID 1), full name (ID 4),
1301                        // and PostScript name (ID 6).
1302                        let dominated = matches!(
1303                            name_record.name_id,
1304                            ttf_parser::name_id::FAMILY
1305                                | ttf_parser::name_id::FULL_NAME
1306                                | ttf_parser::name_id::POST_SCRIPT_NAME
1307                        );
1308                        if dominated {
1309                            if let Some(s) = name_record.to_string() {
1310                                let key = s.to_lowercase();
1311                                // Don't overwrite an existing entry — first match wins
1312                                fonts.entry(key).or_insert_with(|| path.clone());
1313
1314                                // Also insert without spaces so "Liberation Sans"
1315                                // can be found as "liberationsans"
1316                                let no_spaces = s.replace(' ', "").to_lowercase();
1317                                if no_spaces != s.to_lowercase() {
1318                                    fonts.entry(no_spaces).or_insert_with(|| path.clone());
1319                                }
1320                            }
1321                        }
1322                    }
1323                }
1324            }
1325        }
1326    }
1327
1328    fonts
1329}
1330
1331/// Recursively collect font files from a directory (up to 3 levels deep).
1332fn collect_font_files(dir: &std::path::Path, out: &mut Vec<PathBuf>, depth: u32) {
1333    if depth > 3 {
1334        return;
1335    }
1336    if let Ok(entries) = std::fs::read_dir(dir) {
1337        for entry in entries.flatten() {
1338            let path = entry.path();
1339            if path.is_dir() {
1340                collect_font_files(&path, out, depth + 1);
1341            } else {
1342                let ext = path
1343                    .extension()
1344                    .and_then(|e| e.to_str())
1345                    .unwrap_or("")
1346                    .to_lowercase();
1347                if matches!(ext.as_str(), "ttf" | "otf" | "ttc" | "otc") {
1348                    out.push(path);
1349                }
1350            }
1351        }
1352    }
1353}
1354
1355fn system_font_dirs() -> Vec<PathBuf> {
1356    let mut dirs = Vec::new();
1357    #[cfg(target_os = "macos")]
1358    {
1359        dirs.push(PathBuf::from("/System/Library/Fonts"));
1360        dirs.push(PathBuf::from("/Library/Fonts"));
1361        if let Ok(home) = std::env::var("HOME") {
1362            dirs.push(PathBuf::from(format!("{home}/Library/Fonts")));
1363        }
1364    }
1365    #[cfg(target_os = "linux")]
1366    {
1367        dirs.push(PathBuf::from("/usr/share/fonts"));
1368        dirs.push(PathBuf::from("/usr/local/share/fonts"));
1369        if let Ok(home) = std::env::var("HOME") {
1370            dirs.push(PathBuf::from(format!("{home}/.local/share/fonts")));
1371            dirs.push(PathBuf::from(format!("{home}/.fonts")));
1372        }
1373    }
1374    #[cfg(target_os = "windows")]
1375    {
1376        if let Ok(windir) = std::env::var("WINDIR") {
1377            dirs.push(PathBuf::from(format!("{windir}\\Fonts")));
1378        }
1379        if let Ok(local) = std::env::var("LOCALAPPDATA") {
1380            dirs.push(PathBuf::from(format!("{local}\\Microsoft\\Windows\\Fonts")));
1381        }
1382    }
1383    dirs
1384}
1385
1386/// Scan a single directory of font files (flat, no recursion).
1387///
1388/// Returns a map from lowercase PostScript name / family name / filename stem
1389/// to the font file path — same name-table logic as `scan_system_fonts`.
1390fn scan_font_dir(dir: &std::path::Path) -> HashMap<String, PathBuf> {
1391    let mut fonts = HashMap::new();
1392    let entries = match std::fs::read_dir(dir) {
1393        Ok(e) => e,
1394        Err(_) => return fonts,
1395    };
1396    for entry in entries.flatten() {
1397        let path = entry.path();
1398        if path.is_dir() {
1399            continue;
1400        }
1401        let ext = path
1402            .extension()
1403            .and_then(|e| e.to_str())
1404            .unwrap_or("")
1405            .to_lowercase();
1406        if !matches!(ext.as_str(), "ttf" | "otf" | "ttc" | "otc" | "cff") {
1407            continue;
1408        }
1409        // Register by filename stem
1410        if let Some(stem) = path.file_stem().and_then(|s| s.to_str()) {
1411            fonts
1412                .entry(stem.to_lowercase())
1413                .or_insert_with(|| path.clone());
1414        }
1415        // Register by name table entries
1416        if let Ok(data) = std::fs::read(&path) {
1417            let num_faces = ttf_parser::fonts_in_collection(&data).unwrap_or(1);
1418            for face_idx in 0..num_faces {
1419                if let Ok(face) = ttf_parser::Face::parse(&data, face_idx) {
1420                    for name_record in face.names() {
1421                        let dominated = matches!(
1422                            name_record.name_id,
1423                            ttf_parser::name_id::FAMILY
1424                                | ttf_parser::name_id::FULL_NAME
1425                                | ttf_parser::name_id::POST_SCRIPT_NAME
1426                        );
1427                        if dominated {
1428                            if let Some(s) = name_record.to_string() {
1429                                let key = s.to_lowercase();
1430                                fonts.entry(key).or_insert_with(|| path.clone());
1431                                let no_spaces = s.replace(' ', "").to_lowercase();
1432                                if no_spaces != s.to_lowercase() {
1433                                    fonts.entry(no_spaces).or_insert_with(|| path.clone());
1434                                }
1435                            }
1436                        }
1437                    }
1438                }
1439            }
1440        }
1441    }
1442    fonts
1443}
1444
1445/// Scan font cache directory from `XFA_FONT_CACHE` env var.
1446fn scan_font_cache_dir() -> HashMap<String, PathBuf> {
1447    match std::env::var("XFA_FONT_CACHE") {
1448        Ok(dir) => {
1449            let path = PathBuf::from(&dir);
1450            if path.is_dir() {
1451                scan_font_dir(&path)
1452            } else {
1453                HashMap::new()
1454            }
1455        }
1456        Err(_) => HashMap::new(),
1457    }
1458}
1459
1460#[cfg(test)]
1461mod tests {
1462    use super::*;
1463
1464    #[test]
1465    fn font_spec_parsing() {
1466        let spec = XfaFontSpec::from_xfa_attrs("Helvetica", Some("bold"), None, Some("12pt"), None);
1467        assert_eq!(spec.typeface, "Helvetica");
1468        assert_eq!(spec.weight, FontWeight::Bold);
1469        assert_eq!(spec.posture, FontPosture::Normal);
1470        assert!((spec.size_pt - 12.0).abs() < 0.001);
1471        assert_eq!(spec.generic_family, None);
1472    }
1473
1474    #[test]
1475    fn font_spec_defaults() {
1476        let spec = XfaFontSpec::from_xfa_attrs("Arial", None, None, None, None);
1477        assert_eq!(spec.weight, FontWeight::Normal);
1478        assert_eq!(spec.posture, FontPosture::Normal);
1479        assert!((spec.size_pt - 10.0).abs() < 0.001);
1480    }
1481
1482    #[test]
1483    fn font_spec_generic_family() {
1484        let spec = XfaFontSpec::from_xfa_attrs("FancyFont", None, None, None, Some("serif"));
1485        assert_eq!(spec.generic_family, Some(GenericFamily::Serif));
1486
1487        let spec = XfaFontSpec::from_xfa_attrs("FancyFont", None, None, None, Some("sansSerif"));
1488        assert_eq!(spec.generic_family, Some(GenericFamily::SansSerif));
1489
1490        let spec = XfaFontSpec::from_xfa_attrs("FancyFont", None, None, None, Some("monospaced"));
1491        assert_eq!(spec.generic_family, Some(GenericFamily::Monospaced));
1492
1493        let spec = XfaFontSpec::from_xfa_attrs("FancyFont", None, None, None, Some("bogus"));
1494        assert_eq!(spec.generic_family, None);
1495    }
1496
1497    #[test]
1498    fn resolver_empty() {
1499        let mut resolver = XfaFontResolver::new(vec![]);
1500        let spec = XfaFontSpec::from_xfa_attrs("NonExistentFont12345", None, None, None, None);
1501        let _ = resolver.resolve(&spec);
1502    }
1503
1504    #[test]
1505    fn system_font_dirs_not_empty() {
1506        let dirs = system_font_dirs();
1507        assert!(!dirs.is_empty());
1508    }
1509
1510    #[test]
1511    fn cid_font_info_with_system_font() {
1512        // Try to resolve a system font and verify cid_font_info works
1513        let mut resolver = XfaFontResolver::new(vec![]);
1514        let spec = XfaFontSpec::from_xfa_attrs("Helvetica", None, None, None, None);
1515        if let Ok(font) = resolver.resolve(&spec) {
1516            let info = font.cid_font_info();
1517            assert!(
1518                info.is_some(),
1519                "cid_font_info should succeed for a valid font"
1520            );
1521            let info = info.unwrap();
1522            assert!(!info.widths.is_empty(), "widths should not be empty");
1523            assert!(
1524                !info.gid_to_unicode.is_empty(),
1525                "gid_to_unicode should not be empty"
1526            );
1527            // Verify that 'A' (U+0041) is mapped
1528            let has_a = info.gid_to_unicode.iter().any(|&(_, ch)| ch == 'A');
1529            assert!(has_a, "font should have a mapping for 'A'");
1530        }
1531    }
1532
1533    #[test]
1534    fn normalize_font_name_strips_subset_prefix() {
1535        assert_eq!(normalize_font_name("ABCDEF+Arial"), "arial");
1536        assert_eq!(
1537            normalize_font_name("XYZABC+TimesNewRomanPSMT"),
1538            "timesnewroman"
1539        );
1540    }
1541
1542    #[test]
1543    fn normalize_font_name_strips_ps_suffixes() {
1544        assert_eq!(normalize_font_name("ArialMT"), "arial");
1545        assert_eq!(normalize_font_name("TimesNewRomanPSMT"), "timesnewroman");
1546        assert_eq!(normalize_font_name("CourierNewPSMT"), "couriernew");
1547    }
1548
1549    #[test]
1550    fn normalize_font_name_preserves_normal_names() {
1551        assert_eq!(normalize_font_name("Helvetica"), "helvetica");
1552        assert_eq!(normalize_font_name("DejaVuSans"), "dejavusans");
1553    }
1554
1555    #[test]
1556    fn normalize_font_name_no_false_prefix_strip() {
1557        // "abcdef+" should not be stripped (not uppercase)
1558        assert_eq!(normalize_font_name("abcdef+Arial"), "abcdef+arial");
1559        // Short prefix should not be stripped
1560        assert_eq!(normalize_font_name("AB+Arial"), "ab+arial");
1561    }
1562
1563    #[test]
1564    fn font_family_aliases_known_fonts() {
1565        assert!(!font_family_aliases("arial").is_empty());
1566        assert!(!font_family_aliases("helvetica").is_empty());
1567        assert!(!font_family_aliases("courier new").is_empty());
1568        assert!(!font_family_aliases("times new roman").is_empty());
1569        assert!(!font_family_aliases("myriad pro").is_empty());
1570    }
1571
1572    #[test]
1573    fn font_family_aliases_unknown_font() {
1574        assert!(font_family_aliases("some_unknown_font_xyz").is_empty());
1575    }
1576
1577    #[test]
1578    fn classify_font_family_sans() {
1579        assert_eq!(classify_font_family("Arial"), FontFamily::SansSerif);
1580        assert_eq!(classify_font_family("Helvetica"), FontFamily::SansSerif);
1581        assert_eq!(classify_font_family("DejaVuSans"), FontFamily::SansSerif);
1582        assert_eq!(
1583            classify_font_family("LiberationSans"),
1584            FontFamily::SansSerif
1585        );
1586    }
1587
1588    #[test]
1589    fn classify_font_family_serif() {
1590        assert_eq!(classify_font_family("Times New Roman"), FontFamily::Serif);
1591        assert_eq!(classify_font_family("Georgia"), FontFamily::Serif);
1592        assert_eq!(classify_font_family("LiberationSerif"), FontFamily::Serif);
1593    }
1594
1595    #[test]
1596    fn classify_font_family_mono() {
1597        assert_eq!(classify_font_family("Courier New"), FontFamily::Monospace);
1598        assert_eq!(
1599            classify_font_family("LiberationMono"),
1600            FontFamily::Monospace
1601        );
1602        assert_eq!(classify_font_family("Consolas"), FontFamily::Monospace);
1603    }
1604
1605    #[test]
1606    fn classify_font_family_unknown() {
1607        assert_eq!(classify_font_family("FancyFont"), FontFamily::Unknown);
1608    }
1609
1610    #[test]
1611    fn scan_system_fonts_has_name_table_entries() {
1612        let fonts = scan_system_fonts();
1613        // On any system with fonts, we should have entries.
1614        // The name-table scanning should produce more entries than just filename stems.
1615        assert!(!fonts.is_empty(), "system fonts map should not be empty");
1616    }
1617
1618    #[test]
1619    fn build_variant_names_normal() {
1620        let names = build_variant_names("Arial", FontWeight::Normal, FontPosture::Normal);
1621        assert!(names.is_empty(), "normal/normal should produce no variants");
1622    }
1623
1624    #[test]
1625    fn build_variant_names_bold() {
1626        let names = build_variant_names("Arial", FontWeight::Bold, FontPosture::Normal);
1627        assert!(names.contains(&"Arial-Bold".to_string()));
1628        assert!(names.contains(&"ArialBold".to_string()));
1629        assert!(names.contains(&"Arial Bold".to_string()));
1630        assert!(names.contains(&"Arial,Bold".to_string()));
1631    }
1632
1633    #[test]
1634    fn build_variant_names_italic() {
1635        let names = build_variant_names("Helvetica", FontWeight::Normal, FontPosture::Italic);
1636        assert!(names.contains(&"Helvetica-Italic".to_string()));
1637        assert!(names.contains(&"HelveticaItalic".to_string()));
1638        assert!(names.contains(&"Helvetica Italic".to_string()));
1639    }
1640
1641    #[test]
1642    fn build_variant_names_bold_italic() {
1643        let names = build_variant_names("Arial", FontWeight::Bold, FontPosture::Italic);
1644        assert!(names.contains(&"Arial-BoldItalic".to_string()));
1645        assert!(names.contains(&"ArialBoldItalic".to_string()));
1646        assert!(names.contains(&"Arial BoldItalic".to_string()));
1647        assert!(names.contains(&"Arial-Bold Italic".to_string()));
1648        assert!(names.contains(&"Arial Bold Italic".to_string()));
1649    }
1650
1651    #[test]
1652    fn font_variant_key_encoding() {
1653        assert_eq!(font_variant_key("Arial", None, None), "Arial_Normal_Normal");
1654        assert_eq!(
1655            font_variant_key("Arial", Some("bold"), None),
1656            "Arial_Bold_Normal"
1657        );
1658        assert_eq!(
1659            font_variant_key("Arial", None, Some("italic")),
1660            "Arial_Normal_Italic"
1661        );
1662        assert_eq!(
1663            font_variant_key("Arial", Some("bold"), Some("italic")),
1664            "Arial_Bold_Italic"
1665        );
1666    }
1667
1668    #[test]
1669    fn resolve_uses_bold_variant_cache_key() {
1670        let mut resolver = XfaFontResolver::new(vec![]);
1671        let spec_normal = XfaFontSpec::from_xfa_attrs("Arial", None, None, None, None);
1672        let spec_bold = XfaFontSpec::from_xfa_attrs("Arial", Some("bold"), None, None, None);
1673        // Both should resolve (or fail) independently — they use different cache keys.
1674        let _ = resolver.resolve(&spec_normal);
1675        let _ = resolver.resolve(&spec_bold);
1676    }
1677
1678    #[test]
1679    fn resolver_preserves_pdf_widths_when_embedded_font_data_is_unparseable() {
1680        let embedded = vec![EmbeddedFontData {
1681            name: "Helvetica".to_string(),
1682            data: vec![0_u8, 1, 2, 3],
1683            pdf_widths: Some((32, vec![278, 333, 444])),
1684            pdf_encoding: Some(PdfSimpleEncoding {
1685                base_encoding: PdfBaseEncoding::WinAnsi,
1686                differences: vec![(32, 0x0020)],
1687            }),
1688            pdf_source_font: None,
1689        }];
1690        let mut resolver = XfaFontResolver::new(embedded);
1691        let spec = XfaFontSpec::from_xfa_attrs("Helvetica", None, None, None, None);
1692        let resolved = resolver
1693            .resolve(&spec)
1694            .expect("resolver should fall back to a system font");
1695        assert_eq!(resolved.pdf_widths, Some((32, vec![278, 333, 444])));
1696        assert_eq!(
1697            resolved.pdf_encoding,
1698            Some(PdfSimpleEncoding {
1699                base_encoding: PdfBaseEncoding::WinAnsi,
1700                differences: vec![(32, 0x0020)],
1701            })
1702        );
1703    }
1704
1705    #[test]
1706    fn canonical_font_key_collapses_variant_separators() {
1707        let keys: Vec<_> = ["Arial-Bold", "Arial,Bold", "Arial Bold", "ArialBold"]
1708            .iter()
1709            .map(|n| canonical_font_key(n).unwrap())
1710            .collect();
1711        // All four spellings must map to the same canonical key.
1712        assert!(keys.iter().all(|k| k == "arialbold"), "got {:?}", keys);
1713    }
1714
1715    #[test]
1716    fn canonical_font_key_strips_mt_and_subset_prefix() {
1717        assert_eq!(
1718            canonical_font_key("Arial-BoldMT").as_deref(),
1719            Some("arialbold")
1720        );
1721        assert_eq!(
1722            canonical_font_key("ABCDEF+Arial-Bold").as_deref(),
1723            Some("arialbold")
1724        );
1725        assert_eq!(
1726            canonical_font_key("TimesNewRomanPSMT").as_deref(),
1727            Some("timesnewroman")
1728        );
1729        assert_eq!(canonical_font_key("").as_deref(), None);
1730        assert_eq!(canonical_font_key("-,_").as_deref(), None);
1731    }
1732
1733    #[test]
1734    fn lookup_pdf_widths_matches_across_separator_variants() {
1735        let mut map = HashMap::new();
1736        let widths = (32_u16, vec![278_u16, 333, 611]);
1737        // Producer registered widths under "Arial,Bold".
1738        remember_pdf_widths(&mut map, "Arial,Bold", &widths, None, None);
1739
1740        // Every common spelling of the same logical font should hit:
1741        for probe in &[
1742            "Arial,Bold",
1743            "Arial-Bold",
1744            "ArialBold",
1745            "Arial Bold",
1746            "Arial-BoldMT",
1747            "ABCDEF+Arial-Bold",
1748        ] {
1749            assert!(
1750                lookup_pdf_widths(&map, probe).is_some(),
1751                "lookup should resolve '{probe}' via the canonical key"
1752            );
1753        }
1754    }
1755
1756    #[test]
1757    fn lookup_pdf_widths_substring_fallback() {
1758        let mut map = HashMap::new();
1759        let widths = (32_u16, vec![500_u16; 3]);
1760        // Producer only stored the plain family name.
1761        remember_pdf_widths(&mut map, "Helvetica", &widths, None, None);
1762
1763        // A more specific request should still find the family as a last resort.
1764        assert!(lookup_pdf_widths(&map, "HelveticaNeue").is_some());
1765    }
1766
1767    #[test]
1768    fn attach_pdf_widths_binds_arial_bold_mt_across_separator_variants() {
1769        // Producer registered widths under "Arial,Bold"; XFA asks for
1770        // "Arial-BoldMT" (a different but equivalent spelling). Before the
1771        // canonical-key fix this dropped the /Widths and fell back to AFM.
1772        let widths = (32_u16, vec![278_u16, 333, 611]);
1773        let embedded = vec![EmbeddedFontData {
1774            name: "Arial,Bold".to_string(),
1775            data: Vec::new(),
1776            pdf_widths: Some(widths.clone()),
1777            pdf_encoding: None,
1778            pdf_source_font: Some(PdfSourceFont { object_id: (7, 0) }),
1779        }];
1780        let mut resolver = XfaFontResolver::new(embedded);
1781        let spec = XfaFontSpec::from_xfa_attrs("Arial-BoldMT", Some("bold"), None, None, None);
1782        let resolved = resolver
1783            .resolve(&spec)
1784            .expect("resolver should bind /Widths for the variant spelling");
1785        assert_eq!(resolved.pdf_widths, Some(widths));
1786        assert_eq!(
1787            resolved.pdf_source_font,
1788            Some(PdfSourceFont { object_id: (7, 0) })
1789        );
1790    }
1791
1792    // On Linux CI runners "Myriad Pro" is absent from the system font set, so the
1793    // augmentation branch (resolve() line ~762) behaves differently than on macOS
1794    // where the font *is* available. The test assertions only check pdf_widths and
1795    // pdf_source_font (not data), so the logic is sound — the platform difference
1796    // is purely environmental. Ignored on Linux to avoid non-deterministic CI failures.
1797    // Tracked in #1475 for a proper bundled-fixture fix.
1798    #[test]
1799    #[cfg_attr(target_os = "linux", ignore)]
1800    fn resolver_prefers_reusable_pdf_font_over_system_fallback() {
1801        let embedded = vec![EmbeddedFontData {
1802            name: "Myriad Pro".to_string(),
1803            data: Vec::new(),
1804            pdf_widths: Some((32, vec![278, 333, 612])),
1805            pdf_encoding: None,
1806            pdf_source_font: Some(PdfSourceFont { object_id: (42, 0) }),
1807        }];
1808        let mut resolver = XfaFontResolver::new(embedded);
1809        let spec = XfaFontSpec::from_xfa_attrs("Myriad Pro", None, None, None, None);
1810        let resolved = resolver
1811            .resolve(&spec)
1812            .expect("resolver should reuse the original PDF font object");
1813
1814        assert_eq!(resolved.pdf_widths, Some((32, vec![278, 333, 612])));
1815        assert_eq!(
1816            resolved.pdf_source_font,
1817            Some(PdfSourceFont { object_id: (42, 0) })
1818        );
1819        // Note: `data` may be non-empty if a matching system font is found (#858 augmentation
1820        // for Identity-H encoding of non-WinAnsi characters). We only assert that the original
1821        // PDF font object (pdf_source_font) and its width table are preserved.
1822    }
1823
1824    #[test]
1825    fn scan_font_dir_empty_dir() {
1826        let dir = std::env::temp_dir().join("xfa_font_cache_test_empty");
1827        let _ = std::fs::create_dir_all(&dir);
1828        let fonts = scan_font_dir(&dir);
1829        // May or may not be empty depending on temp dir state, but should not panic
1830        assert!(fonts.len() < 10000);
1831        let _ = std::fs::remove_dir_all(&dir);
1832    }
1833
1834    #[test]
1835    fn scan_font_dir_nonexistent() {
1836        let fonts = scan_font_dir(std::path::Path::new("/nonexistent/font/cache/dir"));
1837        assert!(fonts.is_empty());
1838    }
1839
1840    #[test]
1841    fn scan_font_cache_dir_unset() {
1842        // When XFA_FONT_CACHE is not set, should return empty map
1843        std::env::remove_var("XFA_FONT_CACHE");
1844        let fonts = scan_font_cache_dir();
1845        assert!(fonts.is_empty());
1846    }
1847
1848    #[test]
1849    fn resolver_with_font_cache_dir() {
1850        let dir = std::env::temp_dir().join("xfa_font_cache_test_resolver");
1851        let _ = std::fs::create_dir_all(&dir);
1852        let resolver = XfaFontResolver::new(vec![]).with_font_cache(&dir);
1853        // Should have the cache fonts map initialized (likely empty for temp dir)
1854        assert!(resolver.font_cache_fonts.len() < 10000);
1855        let _ = std::fs::remove_dir_all(&dir);
1856    }
1857
1858    #[test]
1859    fn font_cache_resolves_before_system_fallback() {
1860        // Create a temp dir with a fake "font" file named after a font
1861        let dir = std::env::temp_dir().join("xfa_font_cache_test_resolve_order");
1862        let _ = std::fs::remove_dir_all(&dir);
1863        std::fs::create_dir_all(&dir).unwrap();
1864
1865        // Copy a real system font (if available) to the cache under a test name.
1866        // On macOS, Helvetica.ttc exists; on Linux, try LiberationSans.
1867        let source = {
1868            #[cfg(target_os = "macos")]
1869            {
1870                PathBuf::from("/System/Library/Fonts/Helvetica.ttc")
1871            }
1872            #[cfg(not(target_os = "macos"))]
1873            {
1874                PathBuf::from("/usr/share/fonts/truetype/liberation/LiberationSans-Regular.ttf")
1875            }
1876        };
1877        if !source.exists() {
1878            // Skip test if no system font available
1879            let _ = std::fs::remove_dir_all(&dir);
1880            return;
1881        }
1882        let dest = dir.join("TestCacheFont.ttf");
1883        std::fs::copy(&source, &dest).unwrap();
1884
1885        let resolver = XfaFontResolver::new(vec![]).with_font_cache(&dir);
1886        // The cache should have picked up the font by filename stem
1887        assert!(
1888            resolver.font_cache_fonts.contains_key("testcachefont"),
1889            "font cache should contain the test font by stem name"
1890        );
1891
1892        let _ = std::fs::remove_dir_all(&dir);
1893    }
1894}