hayro_interpret/interpret/
mod.rs

1use crate::ClipPath;
2use crate::FillRule;
3use crate::color::ColorSpace;
4use crate::context::Context;
5use crate::convert::{convert_line_cap, convert_line_join};
6use crate::device::Device;
7use crate::font::{FontData, FontQuery};
8use crate::interpret::path::{fill_path, fill_path_impl, fill_stroke_path, stroke_path};
9use crate::interpret::state::{handle_gs, restore_state, save_sate};
10use crate::interpret::text::TextRenderingMode;
11use crate::pattern::{Pattern, ShadingPattern};
12use crate::shading::Shading;
13use crate::util::OptionLog;
14use crate::x_object::{ImageXObject, XObject, draw_image_xobject, draw_xobject};
15use hayro_syntax::content::ops::TypedInstruction;
16use hayro_syntax::object::{Dict, Object, dict_or_stream};
17use hayro_syntax::page::Resources;
18use kurbo::{Affine, Point, Shape};
19use log::warn;
20use smallvec::smallvec;
21use std::sync::Arc;
22
23pub(crate) mod path;
24pub(crate) mod state;
25pub(crate) mod text;
26
27/// A callback function for resolving font queries.
28///
29/// The first argument is the raw data, the second argument is the index in case the font
30/// is a TTC, otherwise it should be 0.
31pub type FontResolverFn = Arc<dyn Fn(&FontQuery) -> Option<(FontData, u32)> + Send + Sync>;
32/// A callback function for resolving warnings during interpretation.
33pub type WarningSinkFn = Arc<dyn Fn(InterpreterWarning) + Send + Sync>;
34
35#[derive(Clone)]
36/// Settings that should be applied during the interpretation process.
37pub struct InterpreterSettings {
38    /// Nearly every PDF contains text. In most cases, PDF files embed the fonts they use, and
39    /// hayro can therefore read the font files and do all the processing needed. However, there
40    /// are two problems:
41    /// - Fonts don't _have_ to be embedded, it's possible that the PDF file only defines the basic
42    ///   metadata of the font, like its name, but relies on the PDF processor to find that font
43    ///   in its environment.
44    /// - The PDF specification requires a list of 14 fonts that should always be available to a
45    ///   PDF processor. These include:
46    ///   - Times New Roman (Normal, Bold, Italic, BoldItalic)
47    ///   - Courier (Normal, Bold, Italic, BoldItalic)
48    ///   - Helvetica (Normal, Bold, Italic, BoldItalic)
49    ///   - ZapfDingBats
50    ///   - Symbol
51    ///
52    /// Because of this, if any of the above situations occurs, this callback will be called, which
53    /// expects the data of an appropriate font to be returned, if available. If no such font is
54    /// provided, the text will most likely fail to render.
55    ///
56    /// For the font data, there are two different formats that are accepted:
57    /// - Any valid TTF/OTF font.
58    /// - A valid CFF font program.
59    ///
60    /// The following recommendations are given for the implementation of this callback function.
61    ///
62    /// For the standard fonts, in case the original fonts are available on the system, you should
63    /// just return those. Otherwise, for Helvetica, Courier and Times New Roman, the best alternative
64    /// are the corresponding fonts of the [Liberation font family](https://github.com/liberationfonts/liberation-fonts).
65    /// If you prefer smaller fonts, you can use the [Foxit CFF fonts](https://github.com/LaurenzV/hayro/tree/master/assets/standard_fonts),
66    /// which are much smaller but are missing glyphs for certain scripts.
67    ///
68    /// For the `Symbol` and `ZapfDingBats` fonts, you should also prefer the system fonts, and if
69    /// not available to you, you can, similarly to above, use the corresponding fonts from Foxit.
70    pub font_resolver: FontResolverFn,
71
72    /// In certain cases, `hayro` will emit a warning in case an issue was encountered while interpreting
73    /// the PDF file. Providing a callback allows you to catch those warnings and handle them, if desired.
74    pub warning_sink: WarningSinkFn,
75}
76
77impl Default for InterpreterSettings {
78    fn default() -> Self {
79        Self {
80            font_resolver: Arc::new(|_| None),
81            warning_sink: Arc::new(|_| {}),
82        }
83    }
84}
85
86#[derive(Copy, Clone, Debug)]
87/// Warnings that can occur while interpreting a PDF file.
88pub enum InterpreterWarning {
89    /// A JPX image was encountered, even though the `jpeg2000` feature is not enabled.
90    JpxImage,
91    /// An unsupported font kind was encountered.
92    ///
93    /// Currently, only CID fonts with non-identity encoding are unsupported.
94    UnsupportedFont,
95    /// An image failed to decode.
96    ImageDecodeFailure,
97}
98
99/// Interpret the instructions from `ops` and render them into the device.
100pub fn interpret<'a, 'b>(
101    ops: impl Iterator<Item = TypedInstruction<'b>>,
102    resources: &Resources<'a>,
103    context: &mut Context<'a>,
104    device: &mut impl Device,
105) {
106    let num_states = context.num_states();
107
108    save_sate(context);
109
110    for op in ops {
111        match op {
112            TypedInstruction::SaveState(_) => save_sate(context),
113            TypedInstruction::StrokeColorDeviceRgb(s) => {
114                context.get_mut().stroke_cs = ColorSpace::device_rgb();
115                context.get_mut().stroke_color =
116                    smallvec![s.0.as_f32(), s.1.as_f32(), s.2.as_f32()];
117            }
118            TypedInstruction::StrokeColorDeviceGray(s) => {
119                context.get_mut().stroke_cs = ColorSpace::device_gray();
120                context.get_mut().stroke_color = smallvec![s.0.as_f32()];
121            }
122            TypedInstruction::StrokeColorCmyk(s) => {
123                context.get_mut().stroke_cs = ColorSpace::device_cmyk();
124                context.get_mut().stroke_color =
125                    smallvec![s.0.as_f32(), s.1.as_f32(), s.2.as_f32(), s.3.as_f32()];
126            }
127            TypedInstruction::LineWidth(w) => {
128                context.get_mut().line_width = w.0.as_f32();
129            }
130            TypedInstruction::LineCap(c) => {
131                context.get_mut().line_cap = convert_line_cap(c);
132            }
133            TypedInstruction::LineJoin(j) => {
134                context.get_mut().line_join = convert_line_join(j);
135            }
136            TypedInstruction::MiterLimit(l) => {
137                context.get_mut().miter_limit = l.0.as_f32();
138            }
139            TypedInstruction::Transform(t) => {
140                context.pre_concat_transform(t);
141            }
142            TypedInstruction::RectPath(r) => {
143                let rect = kurbo::Rect::new(
144                    r.0.as_f64(),
145                    r.1.as_f64(),
146                    r.0.as_f64() + r.2.as_f64(),
147                    r.1.as_f64() + r.3.as_f64(),
148                )
149                .to_path(0.1);
150                context.path_mut().extend(rect);
151            }
152            TypedInstruction::MoveTo(m) => {
153                let p = Point::new(m.0.as_f64(), m.1.as_f64());
154                *(context.last_point_mut()) = p;
155                *(context.sub_path_start_mut()) = p;
156                context.path_mut().move_to(p);
157            }
158            TypedInstruction::FillPathEvenOdd(_) => {
159                context.get_mut().fill_rule = FillRule::EvenOdd;
160                fill_path(context, device);
161            }
162            TypedInstruction::FillPathNonZero(_) => {
163                context.get_mut().fill_rule = FillRule::NonZero;
164                fill_path(context, device);
165            }
166            TypedInstruction::FillPathNonZeroCompatibility(_) => {
167                context.get_mut().fill_rule = FillRule::NonZero;
168                fill_path(context, device);
169            }
170            TypedInstruction::FillAndStrokeEvenOdd(_) => {
171                context.get_mut().fill_rule = FillRule::EvenOdd;
172                fill_stroke_path(context, device);
173            }
174            TypedInstruction::FillAndStrokeNonZero(_) => {
175                context.get_mut().fill_rule = FillRule::NonZero;
176                fill_stroke_path(context, device);
177            }
178            TypedInstruction::CloseAndStrokePath(_) => {
179                context.path_mut().close_path();
180                stroke_path(context, device);
181            }
182            TypedInstruction::CloseFillAndStrokeEvenOdd(_) => {
183                context.path_mut().close_path();
184                context.get_mut().fill_rule = FillRule::EvenOdd;
185                fill_stroke_path(context, device);
186            }
187            TypedInstruction::CloseFillAndStrokeNonZero(_) => {
188                context.path_mut().close_path();
189                context.get_mut().fill_rule = FillRule::NonZero;
190                fill_stroke_path(context, device);
191            }
192            TypedInstruction::NonStrokeColorDeviceGray(s) => {
193                context.get_mut().none_stroke_cs = ColorSpace::device_gray();
194                context.get_mut().non_stroke_color = smallvec![s.0.as_f32()];
195            }
196            TypedInstruction::NonStrokeColorDeviceRgb(s) => {
197                context.get_mut().none_stroke_cs = ColorSpace::device_rgb();
198                context.get_mut().non_stroke_color =
199                    smallvec![s.0.as_f32(), s.1.as_f32(), s.2.as_f32()];
200            }
201            TypedInstruction::NonStrokeColorCmyk(s) => {
202                context.get_mut().none_stroke_cs = ColorSpace::device_cmyk();
203                context.get_mut().non_stroke_color =
204                    smallvec![s.0.as_f32(), s.1.as_f32(), s.2.as_f32(), s.3.as_f32()];
205            }
206            TypedInstruction::LineTo(m) => {
207                let last_point = *context.last_point();
208                let mut p = Point::new(m.0.as_f64(), m.1.as_f64());
209                *(context.last_point_mut()) = p;
210                if last_point == p {
211                    // Add a small delta so that zero width lines can still have a round stroke.
212                    p.x += 0.0001;
213                }
214
215                context.path_mut().line_to(p);
216            }
217            TypedInstruction::CubicTo(c) => {
218                let p1 = Point::new(c.0.as_f64(), c.1.as_f64());
219                let p2 = Point::new(c.2.as_f64(), c.3.as_f64());
220                let p3 = Point::new(c.4.as_f64(), c.5.as_f64());
221
222                *(context.last_point_mut()) = p3;
223
224                context.path_mut().curve_to(p1, p2, p3)
225            }
226            TypedInstruction::CubicStartTo(c) => {
227                let p1 = *context.last_point();
228                let p2 = Point::new(c.0.as_f64(), c.1.as_f64());
229                let p3 = Point::new(c.2.as_f64(), c.3.as_f64());
230
231                *(context.last_point_mut()) = p3;
232
233                context.path_mut().curve_to(p1, p2, p3)
234            }
235            TypedInstruction::CubicEndTo(c) => {
236                let p2 = Point::new(c.0.as_f64(), c.1.as_f64());
237                let p3 = Point::new(c.2.as_f64(), c.3.as_f64());
238
239                *(context.last_point_mut()) = p3;
240
241                context.path_mut().curve_to(p2, p3, p3)
242            }
243            TypedInstruction::ClosePath(_) => {
244                context.path_mut().close_path();
245
246                *(context.last_point_mut()) = *context.sub_path_start();
247            }
248            TypedInstruction::SetGraphicsState(gs) => {
249                if let Some(gs) = resources
250                    .get_ext_g_state::<Dict>(gs.0.clone(), Box::new(|_| None), Box::new(Some))
251                    .warn_none(&format!("failed to get extgstate {}", gs.0.as_str()))
252                {
253                    handle_gs(&gs, context, resources);
254                }
255            }
256            TypedInstruction::StrokePath(_) => {
257                stroke_path(context, device);
258            }
259            TypedInstruction::EndPath(_) => {
260                if let Some(clip) = *context.clip()
261                    && !context.path().elements().is_empty()
262                {
263                    device.set_transform(context.get().ctm);
264                    device.push_clip_path(&ClipPath {
265                        path: context.path().clone(),
266                        fill: clip,
267                    });
268
269                    context.get_mut().n_clips += 1;
270
271                    *(context.clip_mut()) = None;
272                }
273
274                context.path_mut().truncate(0);
275            }
276            TypedInstruction::NonStrokeColor(c) => {
277                let fill_c = &mut context.get_mut().non_stroke_color;
278                fill_c.truncate(0);
279
280                for e in c.0 {
281                    fill_c.push(e.as_f32());
282                }
283            }
284            TypedInstruction::StrokeColor(c) => {
285                let stroke_c = &mut context.get_mut().stroke_color;
286                stroke_c.truncate(0);
287
288                for e in c.0 {
289                    stroke_c.push(e.as_f32());
290                }
291            }
292            TypedInstruction::ClipNonZero(_) => {
293                *(context.clip_mut()) = Some(FillRule::NonZero);
294            }
295            TypedInstruction::ClipEvenOdd(_) => {
296                *(context.clip_mut()) = Some(FillRule::EvenOdd);
297            }
298            TypedInstruction::RestoreState(_) => restore_state(context, device),
299            TypedInstruction::FlatnessTolerance(_) => {
300                // Ignore for now.
301            }
302            TypedInstruction::ColorSpaceStroke(c) => {
303                let cs = if let Some(named) = ColorSpace::new_from_name(c.0.clone()) {
304                    named
305                } else {
306                    context
307                        .get_color_space(resources, c.0)
308                        .unwrap_or(ColorSpace::device_gray())
309                };
310
311                context.get_mut().stroke_color = cs.initial_color();
312                context.get_mut().stroke_cs = cs;
313            }
314            TypedInstruction::ColorSpaceNonStroke(c) => {
315                let cs = if let Some(named) = ColorSpace::new_from_name(c.0.clone()) {
316                    named
317                } else {
318                    context
319                        .get_color_space(resources, c.0)
320                        .unwrap_or(ColorSpace::device_gray())
321                };
322
323                context.get_mut().non_stroke_color = cs.initial_color();
324                context.get_mut().none_stroke_cs = cs;
325            }
326            TypedInstruction::DashPattern(p) => {
327                context.get_mut().dash_offset = p.1.as_f32();
328                // kurbo apparently cannot properly deal with offsets that are exactly 0.
329                context.get_mut().dash_array =
330                    p.0.iter::<f32>()
331                        .map(|n| if n == 0.0 { 0.01 } else { n })
332                        .collect();
333            }
334            TypedInstruction::RenderingIntent(_) => {
335                // Ignore for now.
336            }
337            TypedInstruction::NonStrokeColorNamed(n) => {
338                context.get_mut().non_stroke_color = n.0.into_iter().map(|n| n.as_f32()).collect();
339                context.get_mut().non_stroke_pattern = n.1.and_then(|name| {
340                    resources.get_pattern(
341                        name,
342                        Box::new(|_| None),
343                        Box::new(|d| Pattern::new(d, context, resources)),
344                    )
345                });
346            }
347            TypedInstruction::StrokeColorNamed(n) => {
348                context.get_mut().stroke_color = n.0.into_iter().map(|n| n.as_f32()).collect();
349                context.get_mut().stroke_pattern = n.1.and_then(|name| {
350                    resources.get_pattern(
351                        name,
352                        Box::new(|_| None),
353                        Box::new(|d| Pattern::new(d, context, resources)),
354                    )
355                });
356            }
357            TypedInstruction::BeginMarkedContentWithProperties(_) => {}
358            TypedInstruction::MarkedContentPointWithProperties(_) => {}
359            TypedInstruction::EndMarkedContent(_) => {}
360            TypedInstruction::MarkedContentPoint(_) => {}
361            TypedInstruction::BeginMarkedContent(_) => {}
362            TypedInstruction::BeginText(_) => {
363                context.get_mut().text_state.text_matrix = Affine::IDENTITY;
364                context.get_mut().text_state.text_line_matrix = Affine::IDENTITY;
365            }
366            TypedInstruction::SetTextMatrix(m) => {
367                let m = Affine::new([
368                    m.0.as_f64(),
369                    m.1.as_f64(),
370                    m.2.as_f64(),
371                    m.3.as_f64(),
372                    m.4.as_f64(),
373                    m.5.as_f64(),
374                ]);
375                context.get_mut().text_state.text_line_matrix = m;
376                context.get_mut().text_state.text_matrix = m;
377            }
378            TypedInstruction::EndText(_) => {
379                let has_outline = context
380                    .get()
381                    .text_state
382                    .clip_paths
383                    .segments()
384                    .next()
385                    .is_some();
386
387                if has_outline {
388                    device.set_transform(context.get().ctm);
389                    device.push_clip_path(&ClipPath {
390                        path: context.get().text_state.clip_paths.clone(),
391                        fill: FillRule::NonZero,
392                    });
393                    context.get_mut().n_clips += 1;
394                }
395
396                context.get_mut().text_state.clip_paths.truncate(0);
397            }
398            TypedInstruction::TextFont(t) => {
399                let font = context.get_font(resources, t.0);
400                context.get_mut().text_state.font_size = t.1.as_f32();
401                context.get_mut().text_state.font = font;
402            }
403            TypedInstruction::ShowText(s) => {
404                text::show_text_string(context, device, resources, s.0);
405            }
406            TypedInstruction::ShowTexts(s) => {
407                for obj in s.0.iter::<Object>() {
408                    if let Some(adjustment) = obj.clone().into_f32() {
409                        context.get_mut().text_state.apply_adjustment(adjustment);
410                    } else if let Some(text) = obj.into_string() {
411                        text::show_text_string(context, device, resources, text);
412                    }
413                }
414            }
415            TypedInstruction::HorizontalScaling(h) => {
416                context.get_mut().text_state.horizontal_scaling = h.0.as_f32();
417            }
418            TypedInstruction::TextLeading(tl) => {
419                context.get_mut().text_state.leading = tl.0.as_f32();
420            }
421            TypedInstruction::CharacterSpacing(c) => {
422                context.get_mut().text_state.char_space = c.0.as_f32()
423            }
424            TypedInstruction::WordSpacing(w) => {
425                context.get_mut().text_state.word_space = w.0.as_f32();
426            }
427            TypedInstruction::NextLine(n) => {
428                let (tx, ty) = (n.0.as_f64(), n.1.as_f64());
429                text::next_line(context, tx, ty)
430            }
431            TypedInstruction::NextLineUsingLeading(_) => {
432                text::next_line(context, 0.0, -context.get().text_state.leading as f64);
433            }
434            TypedInstruction::NextLineAndShowText(n) => {
435                text::next_line(context, 0.0, -context.get().text_state.leading as f64);
436                text::show_text_string(context, device, resources, n.0)
437            }
438            TypedInstruction::TextRenderingMode(r) => {
439                let mode = match r.0.as_i32() {
440                    0 => TextRenderingMode::Fill,
441                    1 => TextRenderingMode::Stroke,
442                    2 => TextRenderingMode::FillStroke,
443                    3 => TextRenderingMode::Invisible,
444                    4 => TextRenderingMode::FillAndClip,
445                    5 => TextRenderingMode::StrokeAndClip,
446                    6 => TextRenderingMode::FillAndStrokeAndClip,
447                    7 => TextRenderingMode::Clip,
448                    _ => {
449                        warn!("unknown text rendering mode {}", r.0.as_i32());
450
451                        TextRenderingMode::Fill
452                    }
453                };
454
455                context.get_mut().text_state.render_mode = mode;
456            }
457            TypedInstruction::NextLineAndSetLeading(n) => {
458                let (tx, ty) = (n.0.as_f64(), n.1.as_f64());
459                context.get_mut().text_state.leading = -ty as f32;
460                text::next_line(context, tx, ty)
461            }
462            TypedInstruction::ShapeGlyph(_) => {}
463            TypedInstruction::XObject(x) => {
464                if let Some(x_object) = resources.get_x_object(
465                    x.0,
466                    Box::new(|_| None),
467                    Box::new(|s| XObject::new(&s, &context.settings.warning_sink)),
468                ) {
469                    draw_xobject(&x_object, resources, context, device);
470                }
471            }
472            TypedInstruction::InlineImage(i) => {
473                let warning_sink = context.settings.warning_sink.clone();
474                if let Some(x_object) = ImageXObject::new(
475                    &i.0,
476                    |name| context.get_color_space(resources, name.clone()),
477                    &warning_sink,
478                ) {
479                    draw_image_xobject(&x_object, context, device);
480                }
481            }
482            TypedInstruction::TextRise(t) => {
483                context.get_mut().text_state.rise = t.0.as_f32();
484            }
485            TypedInstruction::Shading(s) => {
486                if let Some(sp) = resources
487                    .get_shading(s.0, Box::new(|_| None), Box::new(Some))
488                    .and_then(|o| dict_or_stream(&o))
489                    .and_then(|s| Shading::new(&s.0, s.1.as_ref()))
490                    .map(|s| {
491                        Pattern::Shading(ShadingPattern {
492                            shading: Arc::new(s),
493                            matrix: Affine::IDENTITY,
494                        })
495                    })
496                {
497                    context.save_state();
498                    context.push_root_transform();
499                    let st = context.get_mut();
500                    st.non_stroke_pattern = Some(sp);
501                    st.none_stroke_cs = ColorSpace::pattern();
502
503                    device.set_soft_mask(st.soft_mask.clone());
504                    device.push_transparency_group(st.non_stroke_alpha, None);
505
506                    let bbox = context.bbox().to_path(0.1);
507                    let inverted_bbox = context.get().ctm.inverse() * bbox;
508                    fill_path_impl(context, device, Some(&inverted_bbox));
509
510                    device.pop_transparency_group();
511
512                    context.restore_state();
513                } else {
514                    warn!("failed to process shading");
515                }
516            }
517            TypedInstruction::BeginCompatibility(_) => {}
518            TypedInstruction::EndCompatibility(_) => {}
519            TypedInstruction::ColorGlyph(_) => {}
520            TypedInstruction::ShowTextWithParameters(t) => {
521                context.get_mut().text_state.word_space = t.0.as_f32();
522                context.get_mut().text_state.char_space = t.1.as_f32();
523                text::next_line(context, 0.0, -context.get().text_state.leading as f64);
524                text::show_text_string(context, device, resources, t.2)
525            }
526            _ => {
527                warn!("failed to read an operator");
528            }
529        }
530    }
531
532    while context.num_states() > num_states {
533        restore_state(context, device);
534    }
535}