hayro_interpret/font/
mod.rs

1//! Interacting with the different kinds of PDF fonts.
2
3use crate::cache::Cache;
4use crate::context::Context;
5use crate::device::Device;
6use crate::font::cid::Type0Font;
7use crate::font::generated::{mac_expert, mac_os_roman, mac_roman, standard, win_ansi};
8use crate::font::true_type::TrueTypeFont;
9use crate::font::type1::Type1Font;
10use crate::font::type3::Type3;
11use crate::interpret::state::State;
12use crate::{CacheKey, FontResolverFn, InterpreterSettings, Paint};
13use bitflags::bitflags;
14use hayro_syntax::object::Dict;
15use hayro_syntax::object::Name;
16use hayro_syntax::object::dict::keys::SUBTYPE;
17use hayro_syntax::object::dict::keys::*;
18use hayro_syntax::page::Resources;
19use hayro_syntax::xref::XRef;
20use kurbo::{Affine, BezPath, Vec2};
21use log::warn;
22use outline::OutlineFont;
23use skrifa::GlyphId;
24use std::fmt::Debug;
25use std::ops::Deref;
26use std::rc::Rc;
27use std::sync::Arc;
28
29mod blob;
30mod cid;
31mod cmap;
32mod generated;
33mod glyph_simulator;
34pub(crate) mod outline;
35mod standard_font;
36mod true_type;
37mod type1;
38pub(crate) mod type3;
39
40pub(crate) const UNITS_PER_EM: f32 = 1000.0;
41
42/// A container for the bytes of a PDF file.
43pub type FontData = Arc<dyn AsRef<[u8]> + Send + Sync>;
44
45use crate::util::hash128;
46pub use standard_font::StandardFont;
47
48/// A glyph that can be drawn.
49pub enum Glyph<'a> {
50    /// A glyph defined by an outline.
51    Outline(OutlineGlyph),
52    /// A type3 glyph, defined by PDF drawing instructions.
53    Type3(Box<Type3Glyph<'a>>),
54}
55
56/// An identifier that uniquely identifies a glyph, for caching purposes.
57#[derive(Clone, Debug)]
58pub struct GlyphIdentifier {
59    id: GlyphId,
60    font: OutlineFont,
61}
62
63impl CacheKey for GlyphIdentifier {
64    fn cache_key(&self) -> u128 {
65        hash128(&(self.id, self.font.cache_key()))
66    }
67}
68
69/// A glyph defined by an outline.
70#[derive(Clone, Debug)]
71pub struct OutlineGlyph {
72    pub(crate) id: GlyphId,
73    pub(crate) font: OutlineFont,
74}
75
76impl OutlineGlyph {
77    /// Return the outline of the glyph, assuming an upem value of 1000.
78    pub fn outline(&self) -> BezPath {
79        self.font.outline_glyph(self.id)
80    }
81
82    /// Return the identifier of the glyph. You can use this to calculate the cache key
83    /// for the glyph.
84    ///
85    /// Note that the `glyph_transform` attribute is not considered in the cache key of
86    /// the identifier, only the glyph ID and the font.
87    pub fn identifier(&self) -> GlyphIdentifier {
88        GlyphIdentifier {
89            id: self.id,
90            font: self.font.clone(),
91        }
92    }
93}
94
95/// A type3 glyph.
96#[derive(Clone)]
97pub struct Type3Glyph<'a> {
98    pub(crate) font: Rc<Type3<'a>>,
99    pub(crate) glyph_id: GlyphId,
100    pub(crate) state: State<'a>,
101    pub(crate) parent_resources: Resources<'a>,
102    pub(crate) cache: Cache,
103    pub(crate) xref: &'a XRef,
104    pub(crate) settings: InterpreterSettings,
105}
106
107/// A glyph defined by PDF drawing instructions.
108impl<'a> Type3Glyph<'a> {
109    /// Draw the type3 glyph to the given device.
110    pub fn interpret(
111        &self,
112        device: &mut impl Device<'a>,
113        transform: Affine,
114        glyph_transform: Affine,
115        paint: &Paint<'a>,
116    ) {
117        self.font
118            .render_glyph(self, transform, glyph_transform, paint, device);
119    }
120}
121
122impl CacheKey for Type3Glyph<'_> {
123    fn cache_key(&self) -> u128 {
124        hash128(&(self.font.cache_key(), self.glyph_id))
125    }
126}
127
128#[derive(Clone, Debug)]
129pub(crate) struct Font<'a>(u128, FontType<'a>);
130
131impl<'a> Font<'a> {
132    pub(crate) fn new(dict: &Dict<'a>, resolver: &FontResolverFn) -> Option<Self> {
133        let f_type = match dict.get::<Name>(SUBTYPE)?.deref() {
134            TYPE1 | MM_TYPE1 => FontType::Type1(Rc::new(Type1Font::new(dict, resolver)?)),
135            TRUE_TYPE => TrueTypeFont::new(dict)
136                .map(Rc::new)
137                .map(FontType::TrueType)
138                .or_else(|| {
139                    Type1Font::new(dict, resolver)
140                        .map(Rc::new)
141                        .map(FontType::Type1)
142                })?,
143            TYPE0 => FontType::Type0(Rc::new(Type0Font::new(dict)?)),
144            TYPE3 => FontType::Type3(Rc::new(Type3::new(dict))),
145            f => {
146                warn!(
147                    "unimplemented font type {:?}",
148                    std::str::from_utf8(f).unwrap_or("unknown type")
149                );
150
151                return None;
152            }
153        };
154
155        let cache_key = dict.cache_key();
156
157        Some(Self(cache_key, f_type))
158    }
159
160    pub(crate) fn map_code(&self, code: u32) -> GlyphId {
161        match &self.1 {
162            FontType::Type1(f) => {
163                debug_assert!(code <= u8::MAX as u32);
164
165                f.map_code(code as u8)
166            }
167            FontType::TrueType(t) => {
168                debug_assert!(code <= u8::MAX as u32);
169
170                t.map_code(code as u8)
171            }
172            FontType::Type0(t) => t.map_code(code),
173            FontType::Type3(t) => {
174                debug_assert!(code <= u8::MAX as u32);
175
176                t.map_code(code as u8)
177            }
178        }
179    }
180
181    pub(crate) fn get_glyph(
182        &self,
183        glyph: GlyphId,
184        ctx: &mut Context<'a>,
185        resources: &Resources<'a>,
186        origin_displacement: Vec2,
187    ) -> (Glyph<'a>, Affine) {
188        let glyph_transform = ctx.get().text_state.full_transform()
189            * Affine::scale(1.0 / UNITS_PER_EM as f64)
190            * Affine::translate(origin_displacement);
191
192        let glyph = match &self.1 {
193            FontType::Type1(t) => {
194                let font = OutlineFont::Type1(t.clone());
195                Glyph::Outline(OutlineGlyph { id: glyph, font })
196            }
197            FontType::TrueType(t) => {
198                let font = OutlineFont::TrueType(t.clone());
199                Glyph::Outline(OutlineGlyph { id: glyph, font })
200            }
201            FontType::Type0(t) => {
202                let font = OutlineFont::Type0(t.clone());
203                Glyph::Outline(OutlineGlyph { id: glyph, font })
204            }
205            FontType::Type3(t) => {
206                let shape_glyph = Type3Glyph {
207                    font: t.clone(),
208                    glyph_id: glyph,
209                    state: ctx.get().clone(),
210                    parent_resources: resources.clone(),
211                    cache: ctx.object_cache.clone(),
212                    xref: ctx.xref,
213                    settings: ctx.settings.clone(),
214                };
215
216                Glyph::Type3(Box::new(shape_glyph))
217            }
218        };
219
220        (glyph, glyph_transform)
221    }
222
223    pub(crate) fn code_advance(&self, code: u32) -> Vec2 {
224        match &self.1 {
225            FontType::Type1(t) => {
226                debug_assert!(code <= u8::MAX as u32);
227
228                Vec2::new(t.glyph_width(code as u8).unwrap_or(0.0) as f64, 0.0)
229            }
230            FontType::TrueType(t) => {
231                debug_assert!(code <= u8::MAX as u32);
232
233                Vec2::new(t.glyph_width(code as u8) as f64, 0.0)
234            }
235            FontType::Type0(t) => t.code_advance(code),
236            FontType::Type3(t) => {
237                debug_assert!(code <= u8::MAX as u32);
238
239                Vec2::new(t.glyph_width(code as u8) as f64, 0.0)
240            }
241        }
242    }
243
244    pub(crate) fn origin_displacement(&self, code: u32) -> Vec2 {
245        match &self.1 {
246            FontType::Type1(_) => Vec2::default(),
247            FontType::TrueType(_) => Vec2::default(),
248            FontType::Type0(t) => t.origin_displacement(code),
249            FontType::Type3(_) => Vec2::default(),
250        }
251    }
252
253    pub(crate) fn read_code(&self, bytes: &[u8], offset: usize) -> (u32, usize) {
254        match &self.1 {
255            FontType::Type1(_) => (bytes[offset] as u32, 1),
256            FontType::TrueType(_) => (bytes[offset] as u32, 1),
257            FontType::Type0(t) => t.read_code(bytes, offset),
258            FontType::Type3(_) => (bytes[offset] as u32, 1),
259        }
260    }
261
262    pub(crate) fn is_horizontal(&self) -> bool {
263        match &self.1 {
264            FontType::Type1(_) => true,
265            FontType::TrueType(_) => true,
266            FontType::Type0(t) => t.is_horizontal(),
267            FontType::Type3(_) => true,
268        }
269    }
270}
271
272impl CacheKey for Font<'_> {
273    fn cache_key(&self) -> u128 {
274        self.0
275    }
276}
277
278#[derive(Clone, Debug)]
279enum FontType<'a> {
280    Type1(Rc<Type1Font>),
281    TrueType(Rc<TrueTypeFont>),
282    Type0(Rc<Type0Font>),
283    Type3(Rc<Type3<'a>>),
284}
285
286#[derive(Debug)]
287enum Encoding {
288    Standard,
289    MacRoman,
290    WinAnsi,
291    MacExpert,
292    BuiltIn,
293}
294
295impl Encoding {
296    fn map_code(&self, code: u8) -> Option<&'static str> {
297        if code == 0 {
298            return Some(".notdef");
299        }
300        match self {
301            Encoding::Standard => standard::get(code),
302            Encoding::MacRoman => mac_roman::get(code).or_else(|| mac_os_roman::get(code)),
303            Encoding::WinAnsi => win_ansi::get(code),
304            Encoding::MacExpert => mac_expert::get(code),
305            Encoding::BuiltIn => None,
306        }
307    }
308}
309
310/// The font stretch.
311#[derive(Debug, Copy, Clone)]
312pub enum FontStretch {
313    /// Normal.
314    Normal,
315    /// Ultra condensed.
316    UltraCondensed,
317    /// Extra condensed.
318    ExtraCondensed,
319    /// Condensed.
320    Condensed,
321    /// Semi condensed.
322    SemiCondensed,
323    /// Semi expanded.
324    SemiExpanded,
325    /// Expanded.
326    Expanded,
327    /// Extra expanded.
328    ExtraExpanded,
329    /// Ultra expanded.
330    UltraExpanded,
331}
332
333impl FontStretch {
334    fn from_string(s: &str) -> Self {
335        match s {
336            "UltraCondensed" => FontStretch::UltraCondensed,
337            "ExtraCondensed" => FontStretch::ExtraCondensed,
338            "Condensed" => FontStretch::Condensed,
339            "SemiCondensed" => FontStretch::SemiCondensed,
340            "SemiExpanded" => FontStretch::SemiExpanded,
341            "Expanded" => FontStretch::Expanded,
342            "ExtraExpanded" => FontStretch::ExtraExpanded,
343            "UltraExpanded" => FontStretch::UltraExpanded,
344            _ => FontStretch::Normal,
345        }
346    }
347}
348
349bitflags! {
350    /// Bitflags describing various characteristics of fonts.
351    #[derive(Debug)]
352    pub(crate) struct FontFlags: u32 {
353        const FIXED_PITCH = 1 << 0;
354        const SERIF = 1 << 1;
355        const SYMBOLIC = 1 << 2;
356        const SCRIPT = 1 << 3;
357        const NON_SYMBOLIC = 1 << 5;
358        const ITALIC = 1 << 6;
359        const ALL_CAP = 1 << 16;
360        const SMALL_CAP = 1 << 17;
361        const FORCE_BOLD = 1 << 18;
362    }
363}
364
365/// A query for a font.
366pub enum FontQuery {
367    /// A query for one of the 14 PDF standard fonts.
368    Standard(StandardFont),
369    /// A query for a font that is not embedded in the PDF file.
370    ///
371    /// Note that this type of query is currently not supported,
372    /// but will be implemented in the future.
373    Fallback(FallbackFontQuery),
374}
375
376/// A query for a font with specific properties.
377#[derive(Debug, Clone)]
378pub struct FallbackFontQuery {
379    /// The postscript name of the font.
380    pub post_script_name: Option<String>,
381    /// The name of the font.
382    pub font_name: Option<String>,
383    /// The family of the font.
384    pub font_family: Option<String>,
385    /// The stretch of the font.
386    pub font_stretch: FontStretch,
387    /// The weight of the font.
388    pub font_weight: u32,
389    /// Whether the font is monospaced.
390    pub is_fixed_pitch: bool,
391    /// Whether the font is serif.
392    pub is_serif: bool,
393    /// Whether the font is italic.
394    pub is_italic: bool,
395    /// Whether the font is bold.
396    pub is_bold: bool,
397    /// Whether the font is small cap.
398    pub is_small_cap: bool,
399}
400
401impl FallbackFontQuery {
402    pub(crate) fn new(dict: &Dict) -> Self {
403        let mut data = Self::default();
404
405        let remove_subset_prefix = |s: String| {
406            if s.contains("+") {
407                s.chars().skip(7).collect()
408            } else {
409                s
410            }
411        };
412
413        data.post_script_name = dict
414            .get::<Name>(BASE_FONT)
415            .map(|n| remove_subset_prefix(n.as_str().to_string()));
416
417        if let Some(descriptor) = dict.get::<Dict>(FONT_DESC) {
418            data.font_name = dict
419                .get::<Name>(FONT_NAME)
420                .map(|n| remove_subset_prefix(n.as_str().to_string()));
421            data.font_family = descriptor
422                .get::<Name>(FONT_FAMILY)
423                .map(|n| n.as_str().to_string());
424            data.font_stretch = descriptor
425                .get::<Name>(FONT_STRETCH)
426                .map(|n| FontStretch::from_string(n.as_str()))
427                .unwrap_or(FontStretch::Normal);
428            data.font_weight = descriptor.get::<u32>(FONT_WEIGHT).unwrap_or(400);
429
430            if let Some(flags) = descriptor
431                .get::<u32>(FLAGS)
432                .map(FontFlags::from_bits_truncate)
433            {
434                data.is_serif = flags.contains(FontFlags::SERIF);
435                data.is_italic = flags.contains(FontFlags::ITALIC)
436                    || data
437                        .post_script_name
438                        .as_ref()
439                        .is_some_and(|s| s.contains("Italic"));
440                data.is_small_cap = flags.contains(FontFlags::SMALL_CAP);
441                data.is_bold = data
442                    .post_script_name
443                    .as_ref()
444                    .is_some_and(|s| s.contains("Bold"));
445            }
446        }
447
448        data
449    }
450
451    /// Do a best-effort fallback to the 14 standard fonts based on the query.
452    pub fn pick_standard_font(&self) -> StandardFont {
453        if self.is_fixed_pitch {
454            match (self.is_bold, self.is_italic) {
455                (true, true) => StandardFont::CourierBoldOblique,
456                (true, false) => StandardFont::CourierBold,
457                (false, true) => StandardFont::CourierOblique,
458                (false, false) => StandardFont::Courier,
459            }
460        } else if !self.is_serif {
461            match (self.is_bold, self.is_italic) {
462                (true, true) => StandardFont::HelveticaBoldOblique,
463                (true, false) => StandardFont::HelveticaBold,
464                (false, true) => StandardFont::HelveticaOblique,
465                (false, false) => StandardFont::Helvetica,
466            }
467        } else {
468            match (self.is_bold, self.is_italic) {
469                (true, true) => StandardFont::TimesBoldItalic,
470                (true, false) => StandardFont::TimesBold,
471                (false, true) => StandardFont::TimesItalic,
472                (false, false) => StandardFont::TimesRoman,
473            }
474        }
475    }
476}
477
478impl Default for FallbackFontQuery {
479    fn default() -> Self {
480        Self {
481            post_script_name: None,
482            font_name: None,
483            font_family: None,
484            font_stretch: FontStretch::Normal,
485            font_weight: 400,
486            is_fixed_pitch: false,
487            is_serif: false,
488            is_italic: false,
489            is_bold: false,
490            is_small_cap: false,
491        }
492    }
493}