hayro_interpret/font/
mod.rs

1//! Interacting with the different kinds of PDF fonts.
2
3use crate::cache::Cache;
4use crate::context::Context;
5use crate::device::Device;
6use crate::font::cid::Type0Font;
7use crate::font::generated::{
8    glyph_names, mac_expert, mac_os_roman, mac_roman, standard, win_ansi,
9};
10use crate::font::true_type::TrueTypeFont;
11use crate::font::type1::Type1Font;
12use crate::font::type3::Type3;
13use crate::interpret::state::State;
14use crate::{CacheKey, FontResolverFn, InterpreterSettings, Paint};
15use bitflags::bitflags;
16use hayro_syntax::object::Name;
17use hayro_syntax::object::dict::keys::SUBTYPE;
18use hayro_syntax::object::dict::keys::*;
19use hayro_syntax::object::{Dict, Stream};
20use hayro_syntax::page::Resources;
21use hayro_syntax::xref::XRef;
22use kurbo::{Affine, BezPath, Vec2};
23use log::warn;
24use outline::OutlineFont;
25use skrifa::GlyphId;
26use std::fmt::Debug;
27use std::ops::Deref;
28use std::rc::Rc;
29use std::sync::Arc;
30
31mod blob;
32mod cid;
33mod cmap;
34mod generated;
35mod glyph_simulator;
36pub(crate) mod outline;
37mod standard_font;
38mod true_type;
39mod type1;
40pub(crate) mod type3;
41
42pub(crate) const UNITS_PER_EM: f32 = 1000.0;
43
44/// A container for the bytes of a PDF file.
45pub type FontData = Arc<dyn AsRef<[u8]> + Send + Sync>;
46
47use crate::font::cmap::{CMap, parse_cmap};
48use crate::util::hash128;
49pub use standard_font::StandardFont;
50
51/// A glyph that can be drawn.
52pub enum Glyph<'a> {
53    /// A glyph defined by an outline.
54    Outline(OutlineGlyph),
55    /// A type3 glyph, defined by PDF drawing instructions.
56    Type3(Box<Type3Glyph<'a>>),
57}
58
59impl Glyph<'_> {
60    /// Returns the Unicode code point for this glyph, if available.
61    ///
62    /// This method attempts to determine the Unicode character that this glyph
63    /// represents. The exact fallback chain depends on the font type:
64    ///
65    /// **For Outline Fonts (Type1, TrueType, CFF):**
66    /// 1. `ToUnicode` `CMap`
67    /// 2. Glyph name → Unicode (via Adobe Glyph List)
68    /// 3. Unicode naming conventions (e.g., "uni0041", "u0041")
69    ///
70    /// **For CID Fonts (Type0):**
71    /// 1. `ToUnicode` `CMap`
72    ///
73    ///
74    /// **For Type3 Fonts:**
75    /// 1. `ToUnicode` `CMap`
76    ///
77    /// Returns `None` if the Unicode value could not be determined.
78    ///
79    /// Please note that this method is still somewhat experimental and might
80    /// not work reliably in all cases.
81    pub fn as_unicode(&self) -> Option<char> {
82        match self {
83            Glyph::Outline(g) => g.as_unicode(),
84            Glyph::Type3(g) => g.as_unicode(),
85        }
86    }
87}
88
89/// An identifier that uniquely identifies a glyph, for caching purposes.
90#[derive(Clone, Debug)]
91pub struct GlyphIdentifier {
92    id: GlyphId,
93    font: OutlineFont,
94}
95
96impl CacheKey for GlyphIdentifier {
97    fn cache_key(&self) -> u128 {
98        hash128(&(self.id, self.font.cache_key()))
99    }
100}
101
102/// A glyph defined by an outline.
103#[derive(Clone, Debug)]
104pub struct OutlineGlyph {
105    pub(crate) id: GlyphId,
106    pub(crate) font: OutlineFont,
107    pub(crate) char_code: u32,
108}
109
110impl OutlineGlyph {
111    /// Return the outline of the glyph, assuming an upem value of 1000.
112    pub fn outline(&self) -> BezPath {
113        self.font.outline_glyph(self.id)
114    }
115
116    /// Return the identifier of the glyph. You can use this to calculate the cache key
117    /// for the glyph.
118    ///
119    /// Note that the `glyph_transform` attribute is not considered in the cache key of
120    /// the identifier, only the glyph ID and the font.
121    pub fn identifier(&self) -> GlyphIdentifier {
122        GlyphIdentifier {
123            id: self.id,
124            font: self.font.clone(),
125        }
126    }
127
128    /// Returns the Unicode code point for this glyph, if available.
129    ///
130    /// See [`Glyph::as_unicode`] for details on the fallback chain used.
131    pub fn as_unicode(&self) -> Option<char> {
132        self.font.char_code_to_unicode(self.char_code)
133    }
134}
135
136/// A type3 glyph.
137#[derive(Clone)]
138pub struct Type3Glyph<'a> {
139    pub(crate) font: Rc<Type3<'a>>,
140    pub(crate) glyph_id: GlyphId,
141    pub(crate) state: State<'a>,
142    pub(crate) parent_resources: Resources<'a>,
143    pub(crate) cache: Cache,
144    pub(crate) xref: &'a XRef,
145    pub(crate) settings: InterpreterSettings,
146    pub(crate) char_code: u32,
147}
148
149/// A glyph defined by PDF drawing instructions.
150impl<'a> Type3Glyph<'a> {
151    /// Draw the type3 glyph to the given device.
152    pub fn interpret(
153        &self,
154        device: &mut impl Device<'a>,
155        transform: Affine,
156        glyph_transform: Affine,
157        paint: &Paint<'a>,
158    ) {
159        self.font
160            .render_glyph(self, transform, glyph_transform, paint, device);
161    }
162
163    /// Returns the Unicode code point for this glyph, if available.
164    ///
165    /// Note: Type3 fonts can only provide Unicode via `ToUnicode` `CMap`.
166    pub fn as_unicode(&self) -> Option<char> {
167        self.font.char_code_to_unicode(self.char_code)
168    }
169}
170
171impl CacheKey for Type3Glyph<'_> {
172    fn cache_key(&self) -> u128 {
173        hash128(&(self.font.cache_key(), self.glyph_id))
174    }
175}
176
177#[derive(Clone, Debug)]
178pub(crate) struct Font<'a>(u128, FontType<'a>);
179
180impl<'a> Font<'a> {
181    pub(crate) fn new(dict: &Dict<'a>, resolver: &FontResolverFn) -> Option<Self> {
182        let f_type = match dict.get::<Name<'_>>(SUBTYPE)?.deref() {
183            TYPE1 | MM_TYPE1 => FontType::Type1(Rc::new(Type1Font::new(dict, resolver)?)),
184            TRUE_TYPE => TrueTypeFont::new(dict)
185                .map(Rc::new)
186                .map(FontType::TrueType)
187                .or_else(|| {
188                    Type1Font::new(dict, resolver)
189                        .map(Rc::new)
190                        .map(FontType::Type1)
191                })?,
192            TYPE0 => FontType::Type0(Rc::new(Type0Font::new(dict)?)),
193            TYPE3 => FontType::Type3(Rc::new(Type3::new(dict)?)),
194            f => {
195                warn!(
196                    "unimplemented font type {:?}",
197                    std::str::from_utf8(f).unwrap_or("unknown type")
198                );
199
200                return None;
201            }
202        };
203
204        let cache_key = dict.cache_key();
205
206        Some(Self(cache_key, f_type))
207    }
208
209    pub(crate) fn map_code(&self, code: u32) -> GlyphId {
210        match &self.1 {
211            FontType::Type1(f) => {
212                debug_assert!(code <= u8::MAX as u32);
213
214                f.map_code(code as u8)
215            }
216            FontType::TrueType(t) => {
217                debug_assert!(code <= u8::MAX as u32);
218
219                t.map_code(code as u8)
220            }
221            FontType::Type0(t) => t.map_code(code),
222            FontType::Type3(t) => {
223                debug_assert!(code <= u8::MAX as u32);
224
225                t.map_code(code as u8)
226            }
227        }
228    }
229
230    pub(crate) fn get_glyph(
231        &self,
232        glyph: GlyphId,
233        char_code: u32,
234        ctx: &mut Context<'a>,
235        resources: &Resources<'a>,
236        origin_displacement: Vec2,
237    ) -> (Glyph<'a>, Affine) {
238        let glyph_transform = ctx.get().text_state.full_transform()
239            * Affine::scale(1.0 / UNITS_PER_EM as f64)
240            * Affine::translate(origin_displacement);
241
242        let glyph = match &self.1 {
243            FontType::Type1(t) => {
244                let font = OutlineFont::Type1(t.clone());
245                Glyph::Outline(OutlineGlyph {
246                    id: glyph,
247                    font,
248                    char_code,
249                })
250            }
251            FontType::TrueType(t) => {
252                let font = OutlineFont::TrueType(t.clone());
253                Glyph::Outline(OutlineGlyph {
254                    id: glyph,
255                    font,
256                    char_code,
257                })
258            }
259            FontType::Type0(t) => {
260                let font = OutlineFont::Type0(t.clone());
261                Glyph::Outline(OutlineGlyph {
262                    id: glyph,
263                    font,
264                    char_code,
265                })
266            }
267            FontType::Type3(t) => {
268                let shape_glyph = Type3Glyph {
269                    font: t.clone(),
270                    glyph_id: glyph,
271                    state: ctx.get().clone(),
272                    parent_resources: resources.clone(),
273                    cache: ctx.object_cache.clone(),
274                    xref: ctx.xref,
275                    settings: ctx.settings.clone(),
276                    char_code,
277                };
278
279                Glyph::Type3(Box::new(shape_glyph))
280            }
281        };
282
283        (glyph, glyph_transform)
284    }
285
286    pub(crate) fn code_advance(&self, code: u32) -> Vec2 {
287        match &self.1 {
288            FontType::Type1(t) => {
289                debug_assert!(code <= u8::MAX as u32);
290
291                Vec2::new(t.glyph_width(code as u8).unwrap_or(0.0) as f64, 0.0)
292            }
293            FontType::TrueType(t) => {
294                debug_assert!(code <= u8::MAX as u32);
295
296                Vec2::new(t.glyph_width(code as u8) as f64, 0.0)
297            }
298            FontType::Type0(t) => t.code_advance(code),
299            FontType::Type3(t) => {
300                debug_assert!(code <= u8::MAX as u32);
301
302                Vec2::new(t.glyph_width(code as u8) as f64, 0.0)
303            }
304        }
305    }
306
307    pub(crate) fn origin_displacement(&self, code: u32) -> Vec2 {
308        match &self.1 {
309            FontType::Type1(_) => Vec2::default(),
310            FontType::TrueType(_) => Vec2::default(),
311            FontType::Type0(t) => t.origin_displacement(code),
312            FontType::Type3(_) => Vec2::default(),
313        }
314    }
315
316    pub(crate) fn read_code(&self, bytes: &[u8], offset: usize) -> (u32, usize) {
317        match &self.1 {
318            FontType::Type1(_) => (bytes[offset] as u32, 1),
319            FontType::TrueType(_) => (bytes[offset] as u32, 1),
320            FontType::Type0(t) => t.read_code(bytes, offset),
321            FontType::Type3(_) => (bytes[offset] as u32, 1),
322        }
323    }
324
325    pub(crate) fn is_horizontal(&self) -> bool {
326        match &self.1 {
327            FontType::Type1(_) => true,
328            FontType::TrueType(_) => true,
329            FontType::Type0(t) => t.is_horizontal(),
330            FontType::Type3(_) => true,
331        }
332    }
333}
334
335impl CacheKey for Font<'_> {
336    fn cache_key(&self) -> u128 {
337        self.0
338    }
339}
340
341#[derive(Clone, Debug)]
342enum FontType<'a> {
343    Type1(Rc<Type1Font>),
344    TrueType(Rc<TrueTypeFont>),
345    Type0(Rc<Type0Font>),
346    Type3(Rc<Type3<'a>>),
347}
348
349#[derive(Debug)]
350enum Encoding {
351    Standard,
352    MacRoman,
353    WinAnsi,
354    MacExpert,
355    BuiltIn,
356}
357
358impl Encoding {
359    fn map_code(&self, code: u8) -> Option<&'static str> {
360        if code == 0 {
361            return Some(".notdef");
362        }
363        match self {
364            Self::Standard => standard::get(code),
365            Self::MacRoman => mac_roman::get(code).or_else(|| mac_os_roman::get(code)),
366            Self::WinAnsi => win_ansi::get(code),
367            Self::MacExpert => mac_expert::get(code),
368            Self::BuiltIn => None,
369        }
370    }
371}
372
373/// The font stretch.
374#[derive(Debug, Copy, Clone)]
375pub enum FontStretch {
376    /// Normal.
377    Normal,
378    /// Ultra condensed.
379    UltraCondensed,
380    /// Extra condensed.
381    ExtraCondensed,
382    /// Condensed.
383    Condensed,
384    /// Semi condensed.
385    SemiCondensed,
386    /// Semi expanded.
387    SemiExpanded,
388    /// Expanded.
389    Expanded,
390    /// Extra expanded.
391    ExtraExpanded,
392    /// Ultra expanded.
393    UltraExpanded,
394}
395
396impl FontStretch {
397    fn from_string(s: &str) -> Self {
398        match s {
399            "UltraCondensed" => Self::UltraCondensed,
400            "ExtraCondensed" => Self::ExtraCondensed,
401            "Condensed" => Self::Condensed,
402            "SemiCondensed" => Self::SemiCondensed,
403            "SemiExpanded" => Self::SemiExpanded,
404            "Expanded" => Self::Expanded,
405            "ExtraExpanded" => Self::ExtraExpanded,
406            "UltraExpanded" => Self::UltraExpanded,
407            _ => Self::Normal,
408        }
409    }
410}
411
412bitflags! {
413    /// Bitflags describing various characteristics of fonts.
414    #[derive(Debug)]
415    pub(crate) struct FontFlags: u32 {
416        const FIXED_PITCH = 1 << 0;
417        const SERIF = 1 << 1;
418        const SYMBOLIC = 1 << 2;
419        const SCRIPT = 1 << 3;
420        const NON_SYMBOLIC = 1 << 5;
421        const ITALIC = 1 << 6;
422        const ALL_CAP = 1 << 16;
423        const SMALL_CAP = 1 << 17;
424        const FORCE_BOLD = 1 << 18;
425    }
426}
427
428/// A query for a font.
429pub enum FontQuery {
430    /// A query for one of the 14 PDF standard fonts.
431    Standard(StandardFont),
432    /// A query for a font that is not embedded in the PDF file.
433    ///
434    /// Note that this type of query is currently not supported,
435    /// but will be implemented in the future.
436    Fallback(FallbackFontQuery),
437}
438
439/// A query for a font with specific properties.
440#[derive(Debug, Clone)]
441pub struct FallbackFontQuery {
442    /// The postscript name of the font.
443    pub post_script_name: Option<String>,
444    /// The name of the font.
445    pub font_name: Option<String>,
446    /// The family of the font.
447    pub font_family: Option<String>,
448    /// The stretch of the font.
449    pub font_stretch: FontStretch,
450    /// The weight of the font.
451    pub font_weight: u32,
452    /// Whether the font is monospaced.
453    pub is_fixed_pitch: bool,
454    /// Whether the font is serif.
455    pub is_serif: bool,
456    /// Whether the font is italic.
457    pub is_italic: bool,
458    /// Whether the font is bold.
459    pub is_bold: bool,
460    /// Whether the font is small cap.
461    pub is_small_cap: bool,
462}
463
464impl FallbackFontQuery {
465    pub(crate) fn new(dict: &Dict<'_>) -> Self {
466        let mut data = Self::default();
467
468        let remove_subset_prefix = |s: String| {
469            if s.contains("+") {
470                s.chars().skip(7).collect()
471            } else {
472                s
473            }
474        };
475
476        data.post_script_name = dict
477            .get::<Name<'_>>(BASE_FONT)
478            .map(|n| remove_subset_prefix(n.as_str().to_string()));
479
480        if let Some(descriptor) = dict.get::<Dict<'_>>(FONT_DESC) {
481            data.font_name = dict
482                .get::<Name<'_>>(FONT_NAME)
483                .map(|n| remove_subset_prefix(n.as_str().to_string()));
484            data.font_family = descriptor
485                .get::<Name<'_>>(FONT_FAMILY)
486                .map(|n| n.as_str().to_string());
487            data.font_stretch = descriptor
488                .get::<Name<'_>>(FONT_STRETCH)
489                .map(|n| FontStretch::from_string(n.as_str()))
490                .unwrap_or(FontStretch::Normal);
491            data.font_weight = descriptor.get::<u32>(FONT_WEIGHT).unwrap_or(400);
492
493            if let Some(flags) = descriptor
494                .get::<u32>(FLAGS)
495                .map(FontFlags::from_bits_truncate)
496            {
497                data.is_serif = flags.contains(FontFlags::SERIF);
498                data.is_italic = flags.contains(FontFlags::ITALIC)
499                    || data
500                        .post_script_name
501                        .as_ref()
502                        .is_some_and(|s| s.contains("Italic"));
503                data.is_small_cap = flags.contains(FontFlags::SMALL_CAP);
504                data.is_bold = data
505                    .post_script_name
506                    .as_ref()
507                    .is_some_and(|s| s.contains("Bold"));
508            }
509        }
510
511        data
512    }
513
514    /// Do a best-effort fallback to the 14 standard fonts based on the query.
515    pub fn pick_standard_font(&self) -> StandardFont {
516        if self.is_fixed_pitch {
517            match (self.is_bold, self.is_italic) {
518                (true, true) => StandardFont::CourierBoldOblique,
519                (true, false) => StandardFont::CourierBold,
520                (false, true) => StandardFont::CourierOblique,
521                (false, false) => StandardFont::Courier,
522            }
523        } else if !self.is_serif {
524            match (self.is_bold, self.is_italic) {
525                (true, true) => StandardFont::HelveticaBoldOblique,
526                (true, false) => StandardFont::HelveticaBold,
527                (false, true) => StandardFont::HelveticaOblique,
528                (false, false) => StandardFont::Helvetica,
529            }
530        } else {
531            match (self.is_bold, self.is_italic) {
532                (true, true) => StandardFont::TimesBoldItalic,
533                (true, false) => StandardFont::TimesBold,
534                (false, true) => StandardFont::TimesItalic,
535                (false, false) => StandardFont::TimesRoman,
536            }
537        }
538    }
539}
540
541impl Default for FallbackFontQuery {
542    fn default() -> Self {
543        Self {
544            post_script_name: None,
545            font_name: None,
546            font_family: None,
547            font_stretch: FontStretch::Normal,
548            font_weight: 400,
549            is_fixed_pitch: false,
550            is_serif: false,
551            is_italic: false,
552            is_bold: false,
553            is_small_cap: false,
554        }
555    }
556}
557
558/// Convert a glyph name to a Unicode character, if possible.
559/// An incomplete implementation of the Adobe Glyph List Specification
560/// <https://github.com/adobe-type-tools/agl-specification>
561pub(crate) fn glyph_name_to_unicode(name: &str) -> Option<char> {
562    if let Some(unicode_str) = glyph_names::get(name) {
563        return unicode_str.chars().next();
564    }
565
566    unicode_from_name(name).or_else(|| {
567        warn!("failed to map glyph name {} to unicode", name);
568
569        None
570    })
571}
572
573pub(crate) fn unicode_from_name(name: &str) -> Option<char> {
574    let convert = |input: &str| u32::from_str_radix(input, 16).ok().and_then(char::from_u32);
575
576    name.starts_with("uni")
577        .then(|| name.get(3..).and_then(convert))
578        .or_else(|| {
579            name.starts_with("u")
580                .then(|| name.get(1..).and_then(convert))
581        })
582        .flatten()
583}
584
585pub(crate) fn read_to_unicode(dict: &Dict<'_>) -> Option<CMap> {
586    dict.get::<Stream<'_>>(TO_UNICODE)
587        .and_then(|s| s.decoded().ok())
588        .and_then(|data| {
589            let cmap_str = std::str::from_utf8(&data).ok()?;
590            parse_cmap(cmap_str)
591        })
592}