hayro_interpret/interpret/
mod.rs

1use crate::ClipPath;
2use crate::FillRule;
3use crate::color::ColorSpace;
4use crate::context::Context;
5use crate::convert::{convert_line_cap, convert_line_join};
6use crate::device::Device;
7use crate::font::{FontData, FontQuery};
8use crate::interpret::path::{
9    close_path, fill_path, fill_path_impl, fill_stroke_path, stroke_path,
10};
11use crate::interpret::state::{handle_gs, restore_state, save_sate};
12use crate::interpret::text::TextRenderingMode;
13use crate::pattern::{Pattern, ShadingPattern};
14use crate::shading::Shading;
15use crate::util::OptionLog;
16use crate::x_object::{ImageXObject, XObject, draw_image_xobject, draw_xobject};
17use hayro_syntax::content::ops::TypedInstruction;
18use hayro_syntax::object::{Dict, Object, dict_or_stream};
19use hayro_syntax::page::{Page, Resources};
20use kurbo::{Affine, Point, Shape};
21use log::warn;
22use smallvec::smallvec;
23use std::sync::Arc;
24
25pub(crate) mod path;
26pub(crate) mod state;
27pub(crate) mod text;
28
29/// A callback function for resolving font queries.
30///
31/// The first argument is the raw data, the second argument is the index in case the font
32/// is a TTC, otherwise it should be 0.
33pub type FontResolverFn = Arc<dyn Fn(&FontQuery) -> Option<(FontData, u32)> + Send + Sync>;
34/// A callback function for resolving warnings during interpretation.
35pub type WarningSinkFn = Arc<dyn Fn(InterpreterWarning) + Send + Sync>;
36
37#[derive(Clone)]
38/// Settings that should be applied during the interpretation process.
39pub struct InterpreterSettings {
40    /// Nearly every PDF contains text. In most cases, PDF files embed the fonts they use, and
41    /// hayro can therefore read the font files and do all the processing needed. However, there
42    /// are two problems:
43    /// - Fonts don't _have_ to be embedded, it's possible that the PDF file only defines the basic
44    ///   metadata of the font, like its name, but relies on the PDF processor to find that font
45    ///   in its environment.
46    /// - The PDF specification requires a list of 14 fonts that should always be available to a
47    ///   PDF processor. These include:
48    ///   - Times New Roman (Normal, Bold, Italic, BoldItalic)
49    ///   - Courier (Normal, Bold, Italic, BoldItalic)
50    ///   - Helvetica (Normal, Bold, Italic, BoldItalic)
51    ///   - ZapfDingBats
52    ///   - Symbol
53    ///
54    /// Because of this, if any of the above situations occurs, this callback will be called, which
55    /// expects the data of an appropriate font to be returned, if available. If no such font is
56    /// provided, the text will most likely fail to render.
57    ///
58    /// For the font data, there are two different formats that are accepted:
59    /// - Any valid TTF/OTF font.
60    /// - A valid CFF font program.
61    ///
62    /// The following recommendations are given for the implementation of this callback function.
63    ///
64    /// For the standard fonts, in case the original fonts are available on the system, you should
65    /// just return those. Otherwise, for Helvetica, Courier and Times New Roman, the best alternative
66    /// are the corresponding fonts of the [Liberation font family](https://github.com/liberationfonts/liberation-fonts).
67    /// If you prefer smaller fonts, you can use the [Foxit CFF fonts](https://github.com/LaurenzV/hayro/tree/master/assets/standard_fonts),
68    /// which are much smaller but are missing glyphs for certain scripts.
69    ///
70    /// For the `Symbol` and `ZapfDingBats` fonts, you should also prefer the system fonts, and if
71    /// not available to you, you can, similarly to above, use the corresponding fonts from Foxit.
72    ///
73    /// If you don't want having to deal with this, you can just enable the `embed-fonts` feature
74    /// and use the default implementation of the callback.
75    pub font_resolver: FontResolverFn,
76
77    /// In certain cases, `hayro` will emit a warning in case an issue was encountered while interpreting
78    /// the PDF file. Providing a callback allows you to catch those warnings and handle them, if desired.
79    pub warning_sink: WarningSinkFn,
80}
81
82impl Default for InterpreterSettings {
83    fn default() -> Self {
84        Self {
85            #[cfg(not(feature = "embed-fonts"))]
86            font_resolver: Arc::new(|_| None),
87            #[cfg(feature = "embed-fonts")]
88            font_resolver: Arc::new(|query| match query {
89                FontQuery::Standard(s) => Some(s.get_font_data()),
90                FontQuery::Fallback(f) => Some(f.pick_standard_font().get_font_data()),
91            }),
92            warning_sink: Arc::new(|_| {}),
93        }
94    }
95}
96
97#[derive(Copy, Clone, Debug)]
98/// Warnings that can occur while interpreting a PDF file.
99pub enum InterpreterWarning {
100    /// A JPX image was encountered, even though the `jpeg2000` feature is not enabled.
101    JpxImage,
102    /// An unsupported font kind was encountered.
103    ///
104    /// Currently, only CID fonts with non-identity encoding are unsupported.
105    UnsupportedFont,
106    /// An image failed to decode.
107    ImageDecodeFailure,
108}
109
110/// interpret the contents of the page and render them into the device.
111pub fn interpret_page<'a>(
112    page: &Page<'a>,
113    context: &mut Context<'a>,
114    device: &mut impl Device<'a>,
115) {
116    let resources = page.resources();
117    interpret(page.typed_operations(), resources, context, device)
118}
119
120/// Interpret the instructions from `ops` and render them into the device.
121pub fn interpret<'a, 'b>(
122    ops: impl Iterator<Item = TypedInstruction<'b>>,
123    resources: &Resources<'a>,
124    context: &mut Context<'a>,
125    device: &mut impl Device<'a>,
126) {
127    let num_states = context.num_states();
128    let n_clips = context.get().n_clips;
129
130    save_sate(context);
131
132    for op in ops {
133        match op {
134            TypedInstruction::SaveState(_) => save_sate(context),
135            TypedInstruction::StrokeColorDeviceRgb(s) => {
136                context.get_mut().stroke_cs = ColorSpace::device_rgb();
137                context.get_mut().stroke_color =
138                    smallvec![s.0.as_f32(), s.1.as_f32(), s.2.as_f32()];
139            }
140            TypedInstruction::StrokeColorDeviceGray(s) => {
141                context.get_mut().stroke_cs = ColorSpace::device_gray();
142                context.get_mut().stroke_color = smallvec![s.0.as_f32()];
143            }
144            TypedInstruction::StrokeColorCmyk(s) => {
145                context.get_mut().stroke_cs = ColorSpace::device_cmyk();
146                context.get_mut().stroke_color =
147                    smallvec![s.0.as_f32(), s.1.as_f32(), s.2.as_f32(), s.3.as_f32()];
148            }
149            TypedInstruction::LineWidth(w) => {
150                context.get_mut().stroke_props.line_width = w.0.as_f32();
151            }
152            TypedInstruction::LineCap(c) => {
153                context.get_mut().stroke_props.line_cap = convert_line_cap(c);
154            }
155            TypedInstruction::LineJoin(j) => {
156                context.get_mut().stroke_props.line_join = convert_line_join(j);
157            }
158            TypedInstruction::MiterLimit(l) => {
159                context.get_mut().stroke_props.miter_limit = l.0.as_f32();
160            }
161            TypedInstruction::Transform(t) => {
162                context.pre_concat_transform(t);
163            }
164            TypedInstruction::RectPath(r) => {
165                let rect = kurbo::Rect::new(
166                    r.0.as_f64(),
167                    r.1.as_f64(),
168                    r.0.as_f64() + r.2.as_f64(),
169                    r.1.as_f64() + r.3.as_f64(),
170                )
171                .to_path(0.1);
172                context.path_mut().extend(rect);
173            }
174            TypedInstruction::MoveTo(m) => {
175                let p = Point::new(m.0.as_f64(), m.1.as_f64());
176                *(context.last_point_mut()) = p;
177                *(context.sub_path_start_mut()) = p;
178                context.path_mut().move_to(p);
179            }
180            TypedInstruction::FillPathEvenOdd(_) => {
181                context.get_mut().fill_rule = FillRule::EvenOdd;
182                fill_path(context, device);
183            }
184            TypedInstruction::FillPathNonZero(_) => {
185                context.get_mut().fill_rule = FillRule::NonZero;
186                fill_path(context, device);
187            }
188            TypedInstruction::FillPathNonZeroCompatibility(_) => {
189                context.get_mut().fill_rule = FillRule::NonZero;
190                fill_path(context, device);
191            }
192            TypedInstruction::FillAndStrokeEvenOdd(_) => {
193                context.get_mut().fill_rule = FillRule::EvenOdd;
194                fill_stroke_path(context, device);
195            }
196            TypedInstruction::FillAndStrokeNonZero(_) => {
197                context.get_mut().fill_rule = FillRule::NonZero;
198                fill_stroke_path(context, device);
199            }
200            TypedInstruction::CloseAndStrokePath(_) => {
201                close_path(context);
202                stroke_path(context, device);
203            }
204            TypedInstruction::CloseFillAndStrokeEvenOdd(_) => {
205                close_path(context);
206                context.get_mut().fill_rule = FillRule::EvenOdd;
207                fill_stroke_path(context, device);
208            }
209            TypedInstruction::CloseFillAndStrokeNonZero(_) => {
210                close_path(context);
211                context.get_mut().fill_rule = FillRule::NonZero;
212                fill_stroke_path(context, device);
213            }
214            TypedInstruction::NonStrokeColorDeviceGray(s) => {
215                context.get_mut().none_stroke_cs = ColorSpace::device_gray();
216                context.get_mut().non_stroke_color = smallvec![s.0.as_f32()];
217            }
218            TypedInstruction::NonStrokeColorDeviceRgb(s) => {
219                context.get_mut().none_stroke_cs = ColorSpace::device_rgb();
220                context.get_mut().non_stroke_color =
221                    smallvec![s.0.as_f32(), s.1.as_f32(), s.2.as_f32()];
222            }
223            TypedInstruction::NonStrokeColorCmyk(s) => {
224                context.get_mut().none_stroke_cs = ColorSpace::device_cmyk();
225                context.get_mut().non_stroke_color =
226                    smallvec![s.0.as_f32(), s.1.as_f32(), s.2.as_f32(), s.3.as_f32()];
227            }
228            TypedInstruction::LineTo(m) => {
229                if !context.path().elements().is_empty() {
230                    let last_point = *context.last_point();
231                    let mut p = Point::new(m.0.as_f64(), m.1.as_f64());
232                    *(context.last_point_mut()) = p;
233                    if last_point == p {
234                        // Add a small delta so that zero width lines can still have a round stroke.
235                        p.x += 0.0001;
236                    }
237
238                    context.path_mut().line_to(p);
239                }
240            }
241            TypedInstruction::CubicTo(c) => {
242                if !context.path().elements().is_empty() {
243                    let p1 = Point::new(c.0.as_f64(), c.1.as_f64());
244                    let p2 = Point::new(c.2.as_f64(), c.3.as_f64());
245                    let p3 = Point::new(c.4.as_f64(), c.5.as_f64());
246
247                    *(context.last_point_mut()) = p3;
248
249                    context.path_mut().curve_to(p1, p2, p3)
250                }
251            }
252            TypedInstruction::CubicStartTo(c) => {
253                if !context.path().elements().is_empty() {
254                    let p1 = *context.last_point();
255                    let p2 = Point::new(c.0.as_f64(), c.1.as_f64());
256                    let p3 = Point::new(c.2.as_f64(), c.3.as_f64());
257
258                    *(context.last_point_mut()) = p3;
259
260                    context.path_mut().curve_to(p1, p2, p3)
261                }
262            }
263            TypedInstruction::CubicEndTo(c) => {
264                if !context.path().elements().is_empty() {
265                    let p2 = Point::new(c.0.as_f64(), c.1.as_f64());
266                    let p3 = Point::new(c.2.as_f64(), c.3.as_f64());
267
268                    *(context.last_point_mut()) = p3;
269
270                    context.path_mut().curve_to(p2, p3, p3)
271                }
272            }
273            TypedInstruction::ClosePath(_) => {
274                close_path(context);
275            }
276            TypedInstruction::SetGraphicsState(gs) => {
277                if let Some(gs) = resources
278                    .get_ext_g_state::<Dict>(gs.0.clone(), Box::new(|_| None), Box::new(Some))
279                    .warn_none(&format!("failed to get extgstate {}", gs.0.as_str()))
280                {
281                    handle_gs(&gs, context, resources);
282                }
283            }
284            TypedInstruction::StrokePath(_) => {
285                stroke_path(context, device);
286            }
287            TypedInstruction::EndPath(_) => {
288                if let Some(clip) = *context.clip()
289                    && !context.path().elements().is_empty()
290                {
291                    let clip_path = context.get().ctm * context.path().clone();
292                    context.push_bbox(clip_path.bounding_box());
293
294                    device.push_clip_path(&ClipPath {
295                        path: clip_path,
296                        fill: clip,
297                    });
298
299                    context.get_mut().n_clips += 1;
300
301                    *(context.clip_mut()) = None;
302                }
303
304                context.path_mut().truncate(0);
305            }
306            TypedInstruction::NonStrokeColor(c) => {
307                let fill_c = &mut context.get_mut().non_stroke_color;
308                fill_c.truncate(0);
309
310                for e in c.0 {
311                    fill_c.push(e.as_f32());
312                }
313            }
314            TypedInstruction::StrokeColor(c) => {
315                let stroke_c = &mut context.get_mut().stroke_color;
316                stroke_c.truncate(0);
317
318                for e in c.0 {
319                    stroke_c.push(e.as_f32());
320                }
321            }
322            TypedInstruction::ClipNonZero(_) => {
323                *(context.clip_mut()) = Some(FillRule::NonZero);
324            }
325            TypedInstruction::ClipEvenOdd(_) => {
326                *(context.clip_mut()) = Some(FillRule::EvenOdd);
327            }
328            TypedInstruction::RestoreState(_) => restore_state(context, device),
329            TypedInstruction::FlatnessTolerance(_) => {
330                // Ignore for now.
331            }
332            TypedInstruction::ColorSpaceStroke(c) => {
333                let cs = if let Some(named) = ColorSpace::new_from_name(c.0.clone()) {
334                    named
335                } else {
336                    context
337                        .get_color_space(resources, c.0)
338                        .unwrap_or(ColorSpace::device_gray())
339                };
340
341                context.get_mut().stroke_color = cs.initial_color();
342                context.get_mut().stroke_cs = cs;
343            }
344            TypedInstruction::ColorSpaceNonStroke(c) => {
345                let cs = if let Some(named) = ColorSpace::new_from_name(c.0.clone()) {
346                    named
347                } else {
348                    context
349                        .get_color_space(resources, c.0)
350                        .unwrap_or(ColorSpace::device_gray())
351                };
352
353                context.get_mut().non_stroke_color = cs.initial_color();
354                context.get_mut().none_stroke_cs = cs;
355            }
356            TypedInstruction::DashPattern(p) => {
357                context.get_mut().stroke_props.dash_offset = p.1.as_f32();
358                // kurbo apparently cannot properly deal with offsets that are exactly 0.
359                context.get_mut().stroke_props.dash_array =
360                    p.0.iter::<f32>()
361                        .map(|n| if n == 0.0 { 0.01 } else { n })
362                        .collect();
363            }
364            TypedInstruction::RenderingIntent(_) => {
365                // Ignore for now.
366            }
367            TypedInstruction::NonStrokeColorNamed(n) => {
368                context.get_mut().non_stroke_color = n.0.into_iter().map(|n| n.as_f32()).collect();
369                context.get_mut().non_stroke_pattern = n.1.and_then(|name| {
370                    resources.get_pattern(
371                        name,
372                        Box::new(|_| None),
373                        Box::new(|d| Pattern::new(d, context, resources)),
374                    )
375                });
376            }
377            TypedInstruction::StrokeColorNamed(n) => {
378                context.get_mut().stroke_color = n.0.into_iter().map(|n| n.as_f32()).collect();
379                context.get_mut().stroke_pattern = n.1.and_then(|name| {
380                    resources.get_pattern(
381                        name,
382                        Box::new(|_| None),
383                        Box::new(|d| Pattern::new(d, context, resources)),
384                    )
385                });
386            }
387            TypedInstruction::BeginMarkedContentWithProperties(_) => {}
388            TypedInstruction::MarkedContentPointWithProperties(_) => {}
389            TypedInstruction::EndMarkedContent(_) => {}
390            TypedInstruction::MarkedContentPoint(_) => {}
391            TypedInstruction::BeginMarkedContent(_) => {}
392            TypedInstruction::BeginText(_) => {
393                context.get_mut().text_state.text_matrix = Affine::IDENTITY;
394                context.get_mut().text_state.text_line_matrix = Affine::IDENTITY;
395            }
396            TypedInstruction::SetTextMatrix(m) => {
397                let m = Affine::new([
398                    m.0.as_f64(),
399                    m.1.as_f64(),
400                    m.2.as_f64(),
401                    m.3.as_f64(),
402                    m.4.as_f64(),
403                    m.5.as_f64(),
404                ]);
405                context.get_mut().text_state.text_line_matrix = m;
406                context.get_mut().text_state.text_matrix = m;
407            }
408            TypedInstruction::EndText(_) => {
409                let has_outline = context
410                    .get()
411                    .text_state
412                    .clip_paths
413                    .segments()
414                    .next()
415                    .is_some();
416
417                if has_outline {
418                    let clip_path = context.get().ctm * context.get().text_state.clip_paths.clone();
419
420                    context.push_bbox(clip_path.bounding_box());
421
422                    device.push_clip_path(&ClipPath {
423                        path: clip_path,
424                        fill: FillRule::NonZero,
425                    });
426                    context.get_mut().n_clips += 1;
427                }
428
429                context.get_mut().text_state.clip_paths.truncate(0);
430            }
431            TypedInstruction::TextFont(t) => {
432                let font = context.get_font(resources, t.0);
433                context.get_mut().text_state.font_size = t.1.as_f32();
434                context.get_mut().text_state.font = font;
435            }
436            TypedInstruction::ShowText(s) => {
437                text::show_text_string(context, device, resources, s.0);
438            }
439            TypedInstruction::ShowTexts(s) => {
440                for obj in s.0.iter::<Object>() {
441                    if let Some(adjustment) = obj.clone().into_f32() {
442                        context.get_mut().text_state.apply_adjustment(adjustment);
443                    } else if let Some(text) = obj.into_string() {
444                        text::show_text_string(context, device, resources, text);
445                    }
446                }
447            }
448            TypedInstruction::HorizontalScaling(h) => {
449                context.get_mut().text_state.horizontal_scaling = h.0.as_f32();
450            }
451            TypedInstruction::TextLeading(tl) => {
452                context.get_mut().text_state.leading = tl.0.as_f32();
453            }
454            TypedInstruction::CharacterSpacing(c) => {
455                context.get_mut().text_state.char_space = c.0.as_f32()
456            }
457            TypedInstruction::WordSpacing(w) => {
458                context.get_mut().text_state.word_space = w.0.as_f32();
459            }
460            TypedInstruction::NextLine(n) => {
461                let (tx, ty) = (n.0.as_f64(), n.1.as_f64());
462                text::next_line(context, tx, ty)
463            }
464            TypedInstruction::NextLineUsingLeading(_) => {
465                text::next_line(context, 0.0, -context.get().text_state.leading as f64);
466            }
467            TypedInstruction::NextLineAndShowText(n) => {
468                text::next_line(context, 0.0, -context.get().text_state.leading as f64);
469                text::show_text_string(context, device, resources, n.0)
470            }
471            TypedInstruction::TextRenderingMode(r) => {
472                let mode = match r.0.as_i32() {
473                    0 => TextRenderingMode::Fill,
474                    1 => TextRenderingMode::Stroke,
475                    2 => TextRenderingMode::FillStroke,
476                    3 => TextRenderingMode::Invisible,
477                    4 => TextRenderingMode::FillAndClip,
478                    5 => TextRenderingMode::StrokeAndClip,
479                    6 => TextRenderingMode::FillAndStrokeAndClip,
480                    7 => TextRenderingMode::Clip,
481                    _ => {
482                        warn!("unknown text rendering mode {}", r.0.as_i32());
483
484                        TextRenderingMode::Fill
485                    }
486                };
487
488                context.get_mut().text_state.render_mode = mode;
489            }
490            TypedInstruction::NextLineAndSetLeading(n) => {
491                let (tx, ty) = (n.0.as_f64(), n.1.as_f64());
492                context.get_mut().text_state.leading = -ty as f32;
493                text::next_line(context, tx, ty)
494            }
495            TypedInstruction::ShapeGlyph(_) => {}
496            TypedInstruction::XObject(x) => {
497                let cache = context.object_cache.clone();
498                if let Some(x_object) = resources.get_x_object(
499                    x.0,
500                    Box::new(|_| None),
501                    Box::new(|s| XObject::new(&s, &context.settings.warning_sink, &cache)),
502                ) {
503                    draw_xobject(&x_object, resources, context, device);
504                }
505            }
506            TypedInstruction::InlineImage(i) => {
507                let warning_sink = context.settings.warning_sink.clone();
508                let cache = context.object_cache.clone();
509                if let Some(x_object) = ImageXObject::new(
510                    &i.0,
511                    |name| context.get_color_space(resources, name.clone()),
512                    &warning_sink,
513                    &cache,
514                    false,
515                ) {
516                    draw_image_xobject(&x_object, context, device);
517                }
518            }
519            TypedInstruction::TextRise(t) => {
520                context.get_mut().text_state.rise = t.0.as_f32();
521            }
522            TypedInstruction::Shading(s) => {
523                if let Some(sp) = resources
524                    .get_shading(s.0, Box::new(|_| None), Box::new(Some))
525                    .and_then(|o| dict_or_stream(&o))
526                    .and_then(|s| Shading::new(&s.0, s.1.as_ref(), &context.object_cache))
527                    .map(|s| {
528                        Pattern::Shading(ShadingPattern {
529                            shading: Arc::new(s),
530                            matrix: Affine::IDENTITY,
531                        })
532                    })
533                {
534                    context.save_state();
535                    context.push_root_transform();
536                    let st = context.get_mut();
537                    st.non_stroke_pattern = Some(sp);
538                    st.none_stroke_cs = ColorSpace::pattern();
539
540                    device.set_soft_mask(st.soft_mask.clone());
541                    device.push_transparency_group(st.non_stroke_alpha, None);
542
543                    let bbox = context.bbox().to_path(0.1);
544                    let inverted_bbox = context.get().ctm.inverse() * bbox;
545                    fill_path_impl(context, device, Some(&inverted_bbox));
546
547                    device.pop_transparency_group();
548
549                    context.restore_state();
550                } else {
551                    warn!("failed to process shading");
552                }
553            }
554            TypedInstruction::BeginCompatibility(_) => {}
555            TypedInstruction::EndCompatibility(_) => {}
556            TypedInstruction::ColorGlyph(_) => {}
557            TypedInstruction::ShowTextWithParameters(t) => {
558                context.get_mut().text_state.word_space = t.0.as_f32();
559                context.get_mut().text_state.char_space = t.1.as_f32();
560                text::next_line(context, 0.0, -context.get().text_state.leading as f64);
561                text::show_text_string(context, device, resources, t.2)
562            }
563            _ => {
564                warn!("failed to read an operator");
565            }
566        }
567    }
568
569    while context.num_states() > num_states {
570        restore_state(context, device);
571    }
572
573    // Invalid files may still have pending clip paths.
574    while context.get().n_clips > n_clips {
575        device.pop_clip_path();
576        context.pop_bbox();
577        context.get_mut().n_clips -= 1;
578    }
579}