Skip to main content

pdf_interpret/font/
standard_font.rs

1use crate::FontResolverFn;
2use crate::font::blob::{CffFontBlob, OpenTypeFontBlob};
3use crate::font::generated::{glyph_names, metrics, standard, symbol, zapf_dings};
4use crate::font::true_type::{Width, read_encoding, read_widths};
5use crate::font::{
6    Encoding, FontData, FontQuery, glyph_name_to_unicode, normalized_glyph_name, stretch_glyph,
7    strip_subset_prefix,
8};
9use kurbo::BezPath;
10use pdf_syntax::object::Dict;
11use pdf_syntax::object::Name;
12use pdf_syntax::object::dict::keys::{
13    BASE_FONT, FONT_DESC, FONT_FAMILY, FONT_WEIGHT, ITALIC_ANGLE, MISSING_WIDTH,
14};
15use skrifa::raw::TableProvider;
16use skrifa::{GlyphId, GlyphId16};
17use std::cell::RefCell;
18use std::collections::HashMap;
19
20/// The 14 standard fonts of PDF.
21#[derive(Copy, Clone, Debug)]
22pub enum StandardFont {
23    /// Helvetica.
24    Helvetica,
25    /// Helvetica Bold.
26    HelveticaBold,
27    /// Helvetica Oblique.
28    HelveticaOblique,
29    /// Helvetica Bold Oblique.
30    HelveticaBoldOblique,
31    /// Courier.
32    Courier,
33    /// Courier Bold.
34    CourierBold,
35    /// Courier Oblique.
36    CourierOblique,
37    /// Courier Bold Oblique.
38    CourierBoldOblique,
39    /// Times Roman.
40    TimesRoman,
41    /// Times Bold.
42    TimesBold,
43    /// Times Italic.
44    TimesItalic,
45    /// Times Bold Italic.
46    TimesBoldItalic,
47    /// Zapf Dingbats - a decorative symbol font.
48    ZapfDingBats,
49    /// Symbol - a mathematical symbol font.
50    Symbol,
51}
52
53impl StandardFont {
54    pub(crate) fn code_to_name(&self, code: u8) -> Option<&'static str> {
55        match self {
56            Self::Symbol => symbol::get(code),
57            // Note that this font does not return postscript character names,
58            // but instead has a custom encoding.
59            Self::ZapfDingBats => zapf_dings::get(code),
60            _ => standard::get(code),
61        }
62    }
63
64    pub(crate) fn get_width(&self, mut name: &str) -> Option<f32> {
65        // <https://github.com/apache/pdfbox/blob/129aafe26548c1ff935af9c55cb40a996186c35f/pdfbox/src/main/java/org/apache/pdfbox/pdmodel/font/PDSimpleFont.java#L340>
66        if name == ".notdef" {
67            return Some(250.0);
68        }
69
70        name = normalized_glyph_name(name);
71
72        match self {
73            Self::Helvetica => metrics::HELVETICA.get(name).copied(),
74            Self::HelveticaBold => metrics::HELVETICA_BOLD.get(name).copied(),
75            Self::HelveticaOblique => metrics::HELVETICA_OBLIQUE.get(name).copied(),
76            Self::HelveticaBoldOblique => metrics::HELVETICA_BOLD_OBLIQUE.get(name).copied(),
77            Self::Courier => metrics::COURIER.get(name).copied(),
78            Self::CourierBold => metrics::COURIER_BOLD.get(name).copied(),
79            Self::CourierOblique => metrics::COURIER_OBLIQUE.get(name).copied(),
80            Self::CourierBoldOblique => metrics::COURIER_BOLD_OBLIQUE.get(name).copied(),
81            Self::TimesRoman => metrics::TIMES_ROMAN.get(name).copied(),
82            Self::TimesBold => metrics::TIMES_BOLD.get(name).copied(),
83            Self::TimesItalic => metrics::TIMES_ITALIC.get(name).copied(),
84            Self::TimesBoldItalic => metrics::TIMES_BOLD_ITALIC.get(name).copied(),
85            Self::ZapfDingBats => metrics::ZAPF_DING_BATS.get(name).copied(),
86            Self::Symbol => metrics::SYMBOL.get(name).copied(),
87        }
88    }
89
90    pub(crate) fn as_str(&self) -> &'static str {
91        match self {
92            Self::Helvetica => "Helvetica",
93            Self::HelveticaBold => "Helvetica Bold",
94            Self::HelveticaOblique => "Helvetica Oblique",
95            Self::HelveticaBoldOblique => "Helvetica Bold Oblique",
96            Self::Courier => "Courier",
97            Self::CourierBold => "Courier Bold",
98            Self::CourierOblique => "Courier Oblique",
99            Self::CourierBoldOblique => "Courier Bold Oblique",
100            Self::TimesRoman => "Times Roman",
101            Self::TimesBold => "Times Bold",
102            Self::TimesItalic => "Times Italic",
103            Self::TimesBoldItalic => "Times Bold Italic",
104            Self::ZapfDingBats => "Zapf Dingbats",
105            Self::Symbol => "Symbol",
106        }
107    }
108
109    /// Return the postscrit name of the font.
110    pub fn postscript_name(&self) -> &'static str {
111        match self {
112            Self::Helvetica => "Helvetica",
113            Self::HelveticaBold => "Helvetica-Bold",
114            Self::HelveticaOblique => "Helvetica-Oblique",
115            Self::HelveticaBoldOblique => "Helvetica-BoldOblique",
116            Self::Courier => "Courier",
117            Self::CourierBold => "Courier-Bold",
118            Self::CourierOblique => "Courier-Oblique",
119            Self::CourierBoldOblique => "Courier-BoldOblique",
120            Self::TimesRoman => "Times-Roman",
121            Self::TimesBold => "Times-Bold",
122            Self::TimesItalic => "Times-Italic",
123            Self::TimesBoldItalic => "Times-BoldItalic",
124            Self::ZapfDingBats => "ZapfDingbats",
125            Self::Symbol => "Symbol",
126        }
127    }
128
129    pub(crate) fn is_bold(&self) -> bool {
130        matches!(
131            self,
132            Self::HelveticaBold
133                | Self::HelveticaBoldOblique
134                | Self::CourierBold
135                | Self::CourierBoldOblique
136                | Self::TimesBold
137                | Self::TimesBoldItalic
138        )
139    }
140
141    pub(crate) fn is_italic(&self) -> bool {
142        matches!(
143            self,
144            Self::HelveticaOblique
145                | Self::HelveticaBoldOblique
146                | Self::CourierOblique
147                | Self::CourierBoldOblique
148                | Self::TimesItalic
149                | Self::TimesBoldItalic
150        )
151    }
152
153    pub(crate) fn is_serif(&self) -> bool {
154        matches!(
155            self,
156            Self::TimesRoman | Self::TimesBold | Self::TimesItalic | Self::TimesBoldItalic
157        )
158    }
159
160    pub(crate) fn is_monospace(&self) -> bool {
161        matches!(
162            self,
163            Self::Courier | Self::CourierBold | Self::CourierOblique | Self::CourierBoldOblique
164        )
165    }
166
167    /// Canonical vertical font metrics for the PDF Standard-14 faces, in /1000
168    /// em. Returns `(ascent, descent, cap_height, x_height)` where `ascent` is
169    /// positive (above the baseline) and `descent` is negative (below it).
170    ///
171    /// Source: the Adobe Core-14 AFM files (`Ascender` / `Descender` /
172    /// `CapHeight` / `XHeight` global keys; obliques share their upright face's
173    /// vertical metrics). Symbol and ZapfDingbats declare no letterform metrics
174    /// in their AFM, so `cap_height` / `x_height` are `None` and ascent/descent
175    /// are taken from the AFM `FontBBox` vertical extents.
176    ///
177    /// Used only as a fallback when no embedded font binary provides skrifa
178    /// metrics — embedded font metrics always take precedence.
179    pub(crate) fn canonical_metrics(&self) -> Option<(f64, f64, Option<f64>, Option<f64>)> {
180        match self {
181            Self::Helvetica => Some((718.0, -207.0, Some(718.0), Some(523.0))),
182            Self::HelveticaBold => Some((718.0, -207.0, Some(718.0), Some(532.0))),
183            Self::HelveticaOblique => Some((718.0, -207.0, Some(718.0), Some(523.0))),
184            Self::HelveticaBoldOblique => Some((718.0, -207.0, Some(718.0), Some(532.0))),
185            Self::Courier => Some((629.0, -157.0, Some(562.0), Some(426.0))),
186            Self::CourierBold => Some((629.0, -157.0, Some(562.0), Some(439.0))),
187            Self::CourierOblique => Some((629.0, -157.0, Some(562.0), Some(426.0))),
188            Self::CourierBoldOblique => Some((629.0, -157.0, Some(562.0), Some(439.0))),
189            Self::TimesRoman => Some((683.0, -217.0, Some(662.0), Some(450.0))),
190            Self::TimesBold => Some((683.0, -217.0, Some(676.0), Some(461.0))),
191            Self::TimesItalic => Some((683.0, -217.0, Some(653.0), Some(441.0))),
192            Self::TimesBoldItalic => Some((683.0, -217.0, Some(669.0), Some(462.0))),
193            Self::ZapfDingBats => Some((820.0, -143.0, None, None)),
194            Self::Symbol => Some((1010.0, -293.0, None, None)),
195        }
196    }
197
198    /// Return suitable font data for the given standard font.
199    ///
200    /// Currently, this will return the corresponding Foxit font, which is a set of permissibly
201    /// licensed fonts that is also very light-weight.
202    ///
203    /// You can use the result of this method in your implementation of [`FontResolverFn`].
204    ///
205    /// [`FontResolverFn`]: crate::FontResolverFn
206    #[cfg(feature = "embed-fonts")]
207    pub fn get_font_data(&self) -> (FontData, u32) {
208        use std::sync::Arc;
209
210        let data = match self {
211            Self::Helvetica => &include_bytes!("../../assets/FoxitSans.pfb")[..],
212            Self::HelveticaBold => &include_bytes!("../../assets/FoxitSansBold.pfb")[..],
213            Self::HelveticaOblique => &include_bytes!("../../assets/FoxitSansItalic.pfb")[..],
214            Self::HelveticaBoldOblique => {
215                &include_bytes!("../../assets/FoxitSansBoldItalic.pfb")[..]
216            }
217            Self::Courier => &include_bytes!("../../assets/FoxitFixed.pfb")[..],
218            Self::CourierBold => &include_bytes!("../../assets/FoxitFixedBold.pfb")[..],
219            Self::CourierOblique => &include_bytes!("../../assets/FoxitFixedItalic.pfb")[..],
220            Self::CourierBoldOblique => {
221                &include_bytes!("../../assets/FoxitFixedBoldItalic.pfb")[..]
222            }
223            Self::TimesRoman => &include_bytes!("../../assets/FoxitSerif.pfb")[..],
224            Self::TimesBold => &include_bytes!("../../assets/FoxitSerifBold.pfb")[..],
225            Self::TimesItalic => &include_bytes!("../../assets/FoxitSerifItalic.pfb")[..],
226            Self::TimesBoldItalic => &include_bytes!("../../assets/FoxitSerifBoldItalic.pfb")[..],
227            Self::ZapfDingBats => &include_bytes!("../../assets/FoxitDingbats.pfb")[..],
228            Self::Symbol => {
229                include_bytes!("../../assets/FoxitSymbol.pfb")
230            }
231        };
232
233        (Arc::new(data), 0)
234    }
235}
236
237enum StandardFontFamily {
238    Helvetica,
239    Courier,
240    Times,
241}
242
243/// PostScript-name aliases commonly produced by Office/iText/etc. that refer
244/// to fonts the reader is expected to substitute with the corresponding
245/// Standard-14 font. Matched after subset-prefix stripping and after the
246/// literal Standard-14 names, but before the keyword-based heuristic.
247///
248/// Aliases are intentionally exact (case-sensitive) matches — the keyword
249/// heuristic below already catches free-form variants like "ArialNarrow-Bold".
250fn standard_font_alias(name: &str) -> Option<StandardFont> {
251    match name {
252        // Arial family → Helvetica
253        "ArialMT" | "Arial" => Some(StandardFont::Helvetica),
254        "Arial-BoldMT" | "Arial,Bold" | "Arial-Bold" => Some(StandardFont::HelveticaBold),
255        "Arial-ItalicMT" | "Arial,Italic" | "Arial-Italic" => Some(StandardFont::HelveticaOblique),
256        "Arial-BoldItalicMT" | "Arial,BoldItalic" | "Arial-BoldItalic" => {
257            Some(StandardFont::HelveticaBoldOblique)
258        }
259        // Times New Roman family → Times
260        "TimesNewRomanPSMT" | "TimesNewRoman" | "TimesNewRomanPS" => Some(StandardFont::TimesRoman),
261        "TimesNewRomanPS-BoldMT"
262        | "TimesNewRoman-Bold"
263        | "TimesNewRomanPS-Bold"
264        | "TimesNewRoman,Bold" => Some(StandardFont::TimesBold),
265        "TimesNewRomanPS-ItalicMT"
266        | "TimesNewRoman-Italic"
267        | "TimesNewRomanPS-Italic"
268        | "TimesNewRoman,Italic" => Some(StandardFont::TimesItalic),
269        "TimesNewRomanPS-BoldItalicMT"
270        | "TimesNewRoman-BoldItalic"
271        | "TimesNewRomanPS-BoldItalic"
272        | "TimesNewRoman,BoldItalic" => Some(StandardFont::TimesBoldItalic),
273        // Courier New family → Courier
274        "CourierNewPSMT" | "CourierNew" => Some(StandardFont::Courier),
275        "CourierNewPS-BoldMT" | "CourierNew-Bold" | "CourierNewPS-Bold" => {
276            Some(StandardFont::CourierBold)
277        }
278        "CourierNewPS-ItalicMT" | "CourierNew-Italic" | "CourierNewPS-Italic" => {
279            Some(StandardFont::CourierOblique)
280        }
281        "CourierNewPS-BoldItalicMT" | "CourierNew-BoldItalic" | "CourierNewPS-BoldItalic" => {
282            Some(StandardFont::CourierBoldOblique)
283        }
284        _ => None,
285    }
286}
287
288pub(crate) fn select_standard_font(
289    dict: &Dict<'_>,
290    descriptor: &Dict<'_>,
291) -> Option<(StandardFont, bool)> {
292    let base_font = dict.get::<Name>(BASE_FONT)?;
293    let name = strip_subset_prefix(base_font.as_str());
294
295    // First try whether it matches literally.
296    match name {
297        "Helvetica" => return Some((StandardFont::Helvetica, true)),
298        "Helvetica-Bold" => return Some((StandardFont::HelveticaBold, true)),
299        "Helvetica-Oblique" => return Some((StandardFont::HelveticaOblique, true)),
300        "Helvetica-BoldOblique" => return Some((StandardFont::HelveticaBoldOblique, true)),
301        "Courier" => return Some((StandardFont::Courier, true)),
302        "Courier-Bold" => return Some((StandardFont::CourierBold, true)),
303        "Courier-Oblique" => return Some((StandardFont::CourierOblique, true)),
304        "Courier-BoldOblique" => return Some((StandardFont::CourierBoldOblique, true)),
305        "Times-Roman" => return Some((StandardFont::TimesRoman, true)),
306        "Times-Bold" => return Some((StandardFont::TimesBold, true)),
307        "Times-Italic" => return Some((StandardFont::TimesItalic, true)),
308        "Times-BoldItalic" => return Some((StandardFont::TimesBoldItalic, true)),
309        "Symbol" => return Some((StandardFont::Symbol, true)),
310        "ZapfDingbats" => return Some((StandardFont::ZapfDingBats, true)),
311        _ => {}
312    }
313
314    // PostScript-name aliases commonly emitted by Office/iText/etc. for
315    // unembedded Standard-14-equivalent fonts (e.g. ArialMT → Helvetica).
316    // Treated as non-exact so glyph-width fallback in StandardKind still
317    // consults the supplied Widths array when present.
318    if let Some(alias) = standard_font_alias(name) {
319        return Some((alias, false));
320    }
321
322    // Now, we bruteforce, trying to determine a suitable font based on the
323    // keywords that appear in the name and the descriptor.
324    let lower = name.to_ascii_lowercase();
325
326    // FontFamily (descriptor) captures the human-readable family, which is
327    // often present even when BaseFont is an opaque subset name. PDF 1.7 §9.8.1
328    // specifies it as a text string, but producers in the wild use name
329    // objects too; fetch as Name (covers both via implicit conversion).
330    let family_field = descriptor
331        .get::<Name>(FONT_FAMILY)
332        .map(|n| n.as_str().to_ascii_lowercase())
333        .unwrap_or_default();
334
335    // PDF spec §9.8.2 Table 120: FontWeight is a number in {100, 200, … 900};
336    // 400 is normal and 700 is bold. Adobe considers weights ≥ 600 (SemiBold,
337    // DemiBold) as "bold" for substitution purposes — matching that lowers
338    // the threshold from 700 to 600 so fonts like "HelveticaNeue-Medium"
339    // (weight 500) stay regular but "*-SemiBold" (600) map to the bold face.
340    let is_bold = descriptor.get::<u32>(FONT_WEIGHT).is_some_and(|w| w >= 600)
341        || lower.contains("bold")
342        || lower.contains("demi")
343        || family_field.contains("bold")
344        || family_field.contains("demi");
345    // PDF spec §9.8.2 Table 120: ItalicAngle is the angle, in counter-clockwise
346    // degrees, of the dominant vertical strokes. Italic/oblique faces are
347    // negative (typically -10° to -20°). Previously we accepted any non-zero
348    // value, which mis-classified upright fonts that shipped with tiny
349    // rounding noise (e.g. -0.1). The stricter −5° threshold follows what
350    // PDF.js and PDFBox use and avoids that false-positive.
351    let is_italic = descriptor
352        .get::<f32>(ITALIC_ANGLE)
353        .is_some_and(|a| !(-5.0..=5.0).contains(&a))
354        || lower.contains("italic")
355        || lower.contains("oblique")
356        || family_field.contains("italic")
357        || family_field.contains("oblique");
358
359    // Keyword/family heuristic. Prefer BaseFont; fall back to FontFamily.
360    let haystack = if family_field.is_empty() {
361        lower.clone()
362    } else {
363        format!("{lower} {family_field}")
364    };
365
366    // Keyword/family heuristic (last resort — only reached when the font name
367    // did not match any Standard-14 alias above).
368    //
369    // `exact` controls whether the caller should trust PDF /Widths entries or
370    // fall back to Standard-14 AFM metrics:
371    //   exact=true  → AFM metrics used; PDF /Widths ignored (safe for genuine
372    //                 Standard-14 faces whose names survived case folding here)
373    //   exact=false → PDF /Widths respected when present; AFM is only fallback
374    //                 (correct for non-Standard-14 lookalikes like "ArialMT")
375    //
376    // GL-QA38 regression note: setting exact=false for ALL heuristic matches
377    // caused 116 SSIM regressions in gate-5k-04 because many PDFs that contained
378    // "helvetica" or "times" in the font name ARE genuine Standard-14 and their
379    // /Widths arrays (when present) are less accurate than Standard-14 AFM.
380    // The corrected approach: treat Standard-14 keyword matches (helvetica,
381    // courier, times) as exact=true; treat clear non-Standard-14 keywords
382    // (arial, sans, mono, serif without "times") as exact=false so we respect
383    // their embedded /Widths.
384    let (family, exact) = if haystack.contains("helvetica") {
385        (Some(StandardFontFamily::Helvetica), true) // likely genuine Helvetica — use AFM
386    } else if haystack.contains("arial") || haystack.contains("sans") {
387        (Some(StandardFontFamily::Helvetica), false) // Arial/generic sans — respect /Widths
388    } else if haystack.contains("courier") {
389        (Some(StandardFontFamily::Courier), true) // likely genuine Courier — use AFM
390    } else if haystack.contains("mono") {
391        (Some(StandardFontFamily::Courier), false) // generic monospace — respect /Widths
392    } else if haystack.contains("times") {
393        (Some(StandardFontFamily::Times), true) // likely genuine Times — use AFM
394    } else if haystack.contains("serif") {
395        (Some(StandardFontFamily::Times), false) // generic serif — respect /Widths
396    } else if haystack.contains("zapfdingbats") || haystack.contains("dingbats") {
397        return Some((StandardFont::ZapfDingBats, false));
398    } else {
399        (None, false)
400    };
401
402    let font = match (family?, is_bold, is_italic) {
403        (StandardFontFamily::Helvetica, false, false) => StandardFont::Helvetica,
404        (StandardFontFamily::Helvetica, true, false) => StandardFont::HelveticaBold,
405        (StandardFontFamily::Helvetica, false, true) => StandardFont::HelveticaOblique,
406        (StandardFontFamily::Helvetica, true, true) => StandardFont::HelveticaBoldOblique,
407        (StandardFontFamily::Courier, false, false) => StandardFont::Courier,
408        (StandardFontFamily::Courier, true, false) => StandardFont::CourierBold,
409        (StandardFontFamily::Courier, false, true) => StandardFont::CourierOblique,
410        (StandardFontFamily::Courier, true, true) => StandardFont::CourierBoldOblique,
411        (StandardFontFamily::Times, false, false) => StandardFont::TimesRoman,
412        (StandardFontFamily::Times, true, false) => StandardFont::TimesBold,
413        (StandardFontFamily::Times, false, true) => StandardFont::TimesItalic,
414        (StandardFontFamily::Times, true, true) => StandardFont::TimesBoldItalic,
415    };
416
417    Some((font, exact))
418}
419
420#[derive(Debug)]
421pub(crate) enum StandardFontBlob {
422    Cff(CffFontBlob),
423    Otf(OpenTypeFontBlob, HashMap<String, GlyphId>),
424}
425
426impl StandardFontBlob {
427    pub(crate) fn from_data(data: FontData, index: u32) -> Option<Self> {
428        if let Some(blob) = CffFontBlob::new(data.clone()) {
429            Some(Self::new_cff(blob))
430        } else {
431            OpenTypeFontBlob::new(data, index).map(Self::new_otf)
432        }
433    }
434
435    pub(crate) fn new_cff(blob: CffFontBlob) -> Self {
436        Self::Cff(blob)
437    }
438
439    pub(crate) fn new_otf(blob: OpenTypeFontBlob) -> Self {
440        let mut glyph_names = HashMap::new();
441
442        if let Ok(post) = blob.font_ref().post() {
443            for i in 0..blob.num_glyphs() {
444                if let Some(str) = post.glyph_name(GlyphId16::new(i)) {
445                    glyph_names.insert(str.to_string(), GlyphId::new(i as u32));
446                }
447            }
448        }
449
450        Self::Otf(blob, glyph_names)
451    }
452}
453
454impl StandardFontBlob {
455    pub(crate) fn name_to_glyph(&self, name: &str) -> Option<GlyphId> {
456        match self {
457            Self::Cff(blob) => blob
458                .table()
459                .glyph_index_by_name(name)
460                .map(|g| GlyphId::new(g.0 as u32)),
461            Self::Otf(_, glyph_names) => glyph_names.get(name).copied(),
462        }
463    }
464
465    pub(crate) fn unicode_to_glyph(&self, code: u32) -> Option<GlyphId> {
466        match self {
467            Self::Cff(_) => None,
468            Self::Otf(blob, _) => blob
469                .font_ref()
470                .cmap()
471                .ok()
472                .and_then(|c| c.map_codepoint(code)),
473        }
474    }
475
476    pub(crate) fn advance_width(&self, glyph: GlyphId) -> Option<f32> {
477        match self {
478            Self::Cff(_) => None,
479            Self::Otf(blob, _) => blob.glyph_metrics().advance_width(glyph),
480        }
481    }
482
483    pub(crate) fn outline_glyph(&self, glyph: GlyphId) -> BezPath {
484        // Standard fonts have empty outlines for these, but in Liberation Sans
485        // they are a .notdef rectangle.
486        if glyph == GlyphId::NOTDEF {
487            return BezPath::new();
488        }
489
490        match self {
491            Self::Cff(blob) => blob.outline_glyph(glyph),
492            Self::Otf(blob, _) => blob.outline_glyph(glyph),
493        }
494    }
495}
496
497#[derive(Debug)]
498pub(crate) struct StandardKind {
499    base_font: StandardFont,
500    base_font_blob: StandardFontBlob,
501    encoding: Encoding,
502    widths: Vec<Width>,
503    missing_width: Option<f32>,
504    fallback: bool,
505    glyph_to_code: RefCell<HashMap<GlyphId, u8>>,
506    encodings: HashMap<u8, String>,
507}
508
509impl StandardKind {
510    pub(crate) fn new(dict: &Dict<'_>, resolver: &FontResolverFn) -> Option<Self> {
511        let descriptor = dict.get::<Dict<'_>>(FONT_DESC).unwrap_or_default();
512        let (font, exact) = select_standard_font(dict, &descriptor)?;
513        Self::new_with_standard(dict, font, !exact, resolver)
514    }
515
516    pub(crate) fn new_with_standard(
517        dict: &Dict<'_>,
518        base_font: StandardFont,
519        fallback: bool,
520        resolver: &FontResolverFn,
521    ) -> Option<Self> {
522        let descriptor = dict.get::<Dict<'_>>(FONT_DESC).unwrap_or_default();
523        let (widths, missing_width) = read_widths(dict, &descriptor)?;
524        let missing_width = descriptor
525            .contains_key(MISSING_WIDTH)
526            .then_some(missing_width);
527
528        let (mut encoding, encoding_map) = read_encoding(dict);
529
530        // See PDFJS-16464: Ignore encodings for non-embedded Type1 symbol fonts.
531        if matches!(base_font, StandardFont::Symbol | StandardFont::ZapfDingBats) {
532            encoding = Encoding::BuiltIn;
533        }
534
535        let (blob, index) = resolver(&FontQuery::Standard(base_font))?;
536        let base_font_blob = StandardFontBlob::from_data(blob, index)?;
537
538        Some(Self {
539            base_font,
540            base_font_blob,
541            widths,
542            missing_width,
543            encodings: encoding_map,
544            glyph_to_code: RefCell::new(HashMap::new()),
545            fallback,
546            encoding,
547        })
548    }
549
550    fn code_to_ps_name(&self, code: u8) -> Option<&str> {
551        let bf = self.base_font;
552
553        self.encodings
554            .get(&code)
555            .map(String::as_str)
556            .or_else(|| match self.encoding {
557                Encoding::BuiltIn => bf.code_to_name(code),
558                _ => self.encoding.map_code(code),
559            })
560    }
561
562    pub(crate) fn map_code(&self, code: u8) -> GlyphId {
563        let result = self
564            .code_to_ps_name(code)
565            .and_then(|c| {
566                self.base_font_blob.name_to_glyph(c).or_else(|| {
567                    // If the font doesn't have a POST table, try to map via unicode instead.
568                    glyph_names::get(c).and_then(|c| {
569                        self.base_font_blob
570                            .unicode_to_glyph(c.chars().nth(0).unwrap() as u32)
571                    })
572                })
573            })
574            .unwrap_or(GlyphId::NOTDEF);
575        self.glyph_to_code.borrow_mut().insert(result, code);
576
577        result
578    }
579
580    pub(crate) fn outline_glyph(&self, glyph: GlyphId) -> BezPath {
581        let path = self.base_font_blob.outline_glyph(glyph);
582
583        // If the font is not embedded, we might need to stretch it so that
584        // it matches the metrics of the actual underlying font blob.
585
586        if let Some(code) = self.glyph_to_code.borrow().get(&glyph).copied()
587            && let Some(actual_width) = self.base_font_blob.advance_width(glyph).or_else(|| {
588                self.code_to_ps_name(code)
589                    .and_then(|name| self.base_font.get_width(name))
590            })
591        {
592            // From my experiments: Most PDF viewers, if they detect a font is a
593            // standard font, they completely ignore the widths array, even if
594            // different widths are indicated there. So only if it's an unknown
595            // font do we check the widths array. Otherwise, we always use the
596            // base font metrics.
597            let should_width = if self.fallback {
598                if let Some(Width::Value(w)) = self.widths.get(code as usize).copied() {
599                    w
600                } else {
601                    return path;
602                }
603            } else if let Some(w) = self
604                .code_to_ps_name(code)
605                .and_then(|name| self.base_font.get_width(name))
606            {
607                w
608            } else {
609                return path;
610            };
611
612            return stretch_glyph(path, should_width, actual_width);
613        }
614
615        path
616    }
617
618    pub(crate) fn glyph_width(&self, code: u8) -> Option<f32> {
619        match self.widths.get(code as usize).copied() {
620            Some(Width::Value(w)) => Some(w),
621            Some(Width::Missing) => self.missing_width.or_else(|| {
622                self.code_to_ps_name(code)
623                    .and_then(|c| self.base_font.get_width(c))
624            }),
625            _ => self
626                .code_to_ps_name(code)
627                .and_then(|c| self.base_font.get_width(c)),
628        }
629    }
630
631    pub(crate) fn char_code_to_unicode(&self, code: u8) -> Option<char> {
632        self.code_to_ps_name(code).and_then(glyph_name_to_unicode)
633    }
634
635    pub(crate) fn is_italic(&self) -> bool {
636        self.base_font.is_italic()
637    }
638
639    pub(crate) fn is_bold(&self) -> bool {
640        self.base_font.is_bold()
641    }
642
643    pub(crate) fn is_serif(&self) -> bool {
644        self.base_font.is_serif()
645    }
646
647    pub(crate) fn is_monospace(&self) -> bool {
648        self.base_font.is_monospace()
649    }
650
651    /// PostScript name of the underlying standard-14 font.
652    ///
653    /// Always available because every `StandardKind` was constructed
654    /// against a known [`StandardFont`].  Surfaced for the WASM
655    /// `getTextPositions()` `fontName` field on non-embedded Type1
656    /// runs (see `pdf-engine`).
657    pub(crate) fn postscript_name(&self) -> &'static str {
658        self.base_font.postscript_name()
659    }
660
661    /// Vertical font metrics from the Standard-14 AFM fallback table.
662    pub(crate) fn font_metrics(&self) -> Option<(f64, f64, Option<f64>, Option<f64>)> {
663        self.base_font.canonical_metrics()
664    }
665}
666
667#[cfg(test)]
668mod tests {
669    use super::*;
670
671    fn build_widths(entries: &[(u8, f32)]) -> Vec<Width> {
672        let mut widths = vec![Width::Missing; 256];
673        for (code, width) in entries {
674            widths[*code as usize] = Width::Value(*width);
675        }
676        widths
677    }
678
679    fn build_standard_kind(widths: Vec<Width>, missing_width: Option<f32>) -> StandardKind {
680        let (data, index) = StandardFont::Helvetica.get_font_data();
681        let base_font_blob =
682            StandardFontBlob::from_data(data, index).expect("standard font data should parse");
683
684        StandardKind {
685            base_font: StandardFont::Helvetica,
686            base_font_blob,
687            encoding: Encoding::WinAnsi,
688            widths,
689            missing_width,
690            fallback: true,
691            glyph_to_code: RefCell::new(HashMap::new()),
692            encodings: HashMap::new(),
693        }
694    }
695
696    #[test]
697    fn glyph_width_falls_back_to_base_metrics_when_missing_width_is_absent() {
698        let font = build_standard_kind(build_widths(&[(b'A', 600.0)]), None);
699
700        assert_eq!(font.glyph_width(b'A'), Some(600.0));
701        assert_eq!(
702            font.glyph_width(b'B'),
703            StandardFont::Helvetica.get_width("B")
704        );
705    }
706
707    #[test]
708    fn glyph_width_respects_explicit_zero_missing_width() {
709        let font = build_standard_kind(build_widths(&[(b'A', 600.0)]), Some(0.0));
710
711        assert_eq!(font.glyph_width(b'A'), Some(600.0));
712        assert_eq!(font.glyph_width(b'B'), Some(0.0));
713    }
714
715    #[test]
716    fn arial_aliases_resolve_to_helvetica_family() {
717        assert!(matches!(
718            standard_font_alias("ArialMT"),
719            Some(StandardFont::Helvetica)
720        ));
721        assert!(matches!(
722            standard_font_alias("Arial-BoldMT"),
723            Some(StandardFont::HelveticaBold)
724        ));
725        assert!(matches!(
726            standard_font_alias("Arial-ItalicMT"),
727            Some(StandardFont::HelveticaOblique)
728        ));
729        assert!(matches!(
730            standard_font_alias("Arial-BoldItalicMT"),
731            Some(StandardFont::HelveticaBoldOblique)
732        ));
733    }
734
735    #[test]
736    fn times_new_roman_aliases_resolve_to_times_family() {
737        assert!(matches!(
738            standard_font_alias("TimesNewRomanPSMT"),
739            Some(StandardFont::TimesRoman)
740        ));
741        assert!(matches!(
742            standard_font_alias("TimesNewRomanPS-BoldMT"),
743            Some(StandardFont::TimesBold)
744        ));
745        assert!(matches!(
746            standard_font_alias("TimesNewRomanPS-ItalicMT"),
747            Some(StandardFont::TimesItalic)
748        ));
749        assert!(matches!(
750            standard_font_alias("TimesNewRomanPS-BoldItalicMT"),
751            Some(StandardFont::TimesBoldItalic)
752        ));
753    }
754
755    #[test]
756    fn courier_new_aliases_resolve_to_courier_family() {
757        assert!(matches!(
758            standard_font_alias("CourierNewPSMT"),
759            Some(StandardFont::Courier)
760        ));
761        assert!(matches!(
762            standard_font_alias("CourierNewPS-BoldMT"),
763            Some(StandardFont::CourierBold)
764        ));
765        assert!(matches!(
766            standard_font_alias("CourierNewPS-ItalicMT"),
767            Some(StandardFont::CourierOblique)
768        ));
769        assert!(matches!(
770            standard_font_alias("CourierNewPS-BoldItalicMT"),
771            Some(StandardFont::CourierBoldOblique)
772        ));
773    }
774
775    #[test]
776    fn unknown_names_do_not_alias() {
777        assert!(standard_font_alias("LiberationSans").is_none());
778        assert!(standard_font_alias("CenturySchoolbook").is_none());
779        assert!(standard_font_alias("").is_none());
780    }
781
782    // ── canonical_metrics tests ──
783
784    #[test]
785    fn helvetica_metrics() {
786        let m = StandardFont::Helvetica.canonical_metrics().unwrap();
787        assert_eq!(m.0, 718.0);
788        assert_eq!(m.1, -207.0);
789        assert_eq!(m.2, Some(718.0));
790        assert_eq!(m.3, Some(523.0));
791    }
792
793    #[test]
794    fn helvetica_bold_metrics() {
795        let m = StandardFont::HelveticaBold.canonical_metrics().unwrap();
796        assert_eq!(m.0, 718.0);
797        assert_eq!(m.1, -207.0);
798        assert_eq!(m.2, Some(718.0));
799        assert_eq!(m.3, Some(532.0));
800    }
801
802    #[test]
803    fn helvetica_oblique_metrics() {
804        let m = StandardFont::HelveticaOblique.canonical_metrics().unwrap();
805        assert_eq!(m.0, 718.0);
806        assert_eq!(m.1, -207.0);
807        assert_eq!(m.2, Some(718.0));
808        assert_eq!(m.3, Some(523.0));
809    }
810
811    #[test]
812    fn helvetica_bold_oblique_metrics() {
813        let m = StandardFont::HelveticaBoldOblique
814            .canonical_metrics()
815            .unwrap();
816        assert_eq!(m.0, 718.0);
817        assert_eq!(m.1, -207.0);
818        assert_eq!(m.2, Some(718.0));
819        assert_eq!(m.3, Some(532.0));
820    }
821
822    #[test]
823    fn courier_metrics() {
824        let m = StandardFont::Courier.canonical_metrics().unwrap();
825        assert_eq!(m.0, 629.0);
826        assert_eq!(m.1, -157.0);
827        assert_eq!(m.2, Some(562.0));
828        assert_eq!(m.3, Some(426.0));
829    }
830
831    #[test]
832    fn courier_bold_metrics() {
833        let m = StandardFont::CourierBold.canonical_metrics().unwrap();
834        assert_eq!(m.0, 629.0);
835        assert_eq!(m.1, -157.0);
836        assert_eq!(m.2, Some(562.0));
837        assert_eq!(m.3, Some(439.0));
838    }
839
840    #[test]
841    fn courier_oblique_metrics() {
842        let m = StandardFont::CourierOblique.canonical_metrics().unwrap();
843        assert_eq!(m.0, 629.0);
844        assert_eq!(m.1, -157.0);
845        assert_eq!(m.2, Some(562.0));
846        assert_eq!(m.3, Some(426.0));
847    }
848
849    #[test]
850    fn courier_bold_oblique_metrics() {
851        let m = StandardFont::CourierBoldOblique
852            .canonical_metrics()
853            .unwrap();
854        assert_eq!(m.0, 629.0);
855        assert_eq!(m.1, -157.0);
856        assert_eq!(m.2, Some(562.0));
857        assert_eq!(m.3, Some(439.0));
858    }
859
860    #[test]
861    fn times_roman_metrics() {
862        let m = StandardFont::TimesRoman.canonical_metrics().unwrap();
863        assert_eq!(m.0, 683.0);
864        assert_eq!(m.1, -217.0);
865        assert_eq!(m.2, Some(662.0));
866        assert_eq!(m.3, Some(450.0));
867    }
868
869    #[test]
870    fn times_bold_metrics() {
871        let m = StandardFont::TimesBold.canonical_metrics().unwrap();
872        assert_eq!(m.0, 683.0);
873        assert_eq!(m.1, -217.0);
874        assert_eq!(m.2, Some(676.0));
875        assert_eq!(m.3, Some(461.0));
876    }
877
878    #[test]
879    fn times_italic_metrics() {
880        let m = StandardFont::TimesItalic.canonical_metrics().unwrap();
881        assert_eq!(m.0, 683.0);
882        assert_eq!(m.1, -217.0);
883        assert_eq!(m.2, Some(653.0));
884        assert_eq!(m.3, Some(441.0));
885    }
886
887    #[test]
888    fn times_bold_italic_metrics() {
889        let m = StandardFont::TimesBoldItalic.canonical_metrics().unwrap();
890        assert_eq!(m.0, 683.0);
891        assert_eq!(m.1, -217.0);
892        assert_eq!(m.2, Some(669.0));
893        assert_eq!(m.3, Some(462.0));
894    }
895
896    #[test]
897    fn symbol_metrics_no_letterform_values() {
898        let m = StandardFont::Symbol.canonical_metrics().unwrap();
899        assert_eq!(m.0, 1010.0);
900        assert_eq!(m.1, -293.0);
901        assert!(m.2.is_none(), "Symbol has no cap_height");
902        assert!(m.3.is_none(), "Symbol has no x_height");
903    }
904
905    #[test]
906    fn zapf_dingbats_metrics_no_letterform_values() {
907        let m = StandardFont::ZapfDingBats.canonical_metrics().unwrap();
908        assert_eq!(m.0, 820.0);
909        assert_eq!(m.1, -143.0);
910        assert!(m.2.is_none(), "ZapfDingbats has no cap_height");
911        assert!(m.3.is_none(), "ZapfDingbats has no x_height");
912    }
913
914    #[test]
915    fn all_14_fonts_return_some() {
916        let all = &[
917            StandardFont::Helvetica,
918            StandardFont::HelveticaBold,
919            StandardFont::HelveticaOblique,
920            StandardFont::HelveticaBoldOblique,
921            StandardFont::Courier,
922            StandardFont::CourierBold,
923            StandardFont::CourierOblique,
924            StandardFont::CourierBoldOblique,
925            StandardFont::TimesRoman,
926            StandardFont::TimesBold,
927            StandardFont::TimesItalic,
928            StandardFont::TimesBoldItalic,
929            StandardFont::Symbol,
930            StandardFont::ZapfDingBats,
931        ];
932        for font in all {
933            assert!(
934                font.canonical_metrics().is_some(),
935                "font {:?} returned None",
936                font
937            );
938        }
939    }
940
941    #[test]
942    fn ascent_always_positive_descent_always_negative() {
943        let all = &[
944            StandardFont::Helvetica,
945            StandardFont::HelveticaBold,
946            StandardFont::HelveticaOblique,
947            StandardFont::HelveticaBoldOblique,
948            StandardFont::Courier,
949            StandardFont::CourierBold,
950            StandardFont::CourierOblique,
951            StandardFont::CourierBoldOblique,
952            StandardFont::TimesRoman,
953            StandardFont::TimesBold,
954            StandardFont::TimesItalic,
955            StandardFont::TimesBoldItalic,
956            StandardFont::Symbol,
957            StandardFont::ZapfDingBats,
958        ];
959        for font in all {
960            let (ascent, descent, _, _) = font.canonical_metrics().unwrap();
961            assert!(
962                ascent > 0.0,
963                "font {:?} ascent {} not positive",
964                font,
965                ascent
966            );
967            assert!(
968                descent < 0.0,
969                "font {:?} descent {} not negative",
970                font,
971                descent
972            );
973        }
974    }
975}