hayro_interpret/interpret/
mod.rs

1use crate::ClipPath;
2use crate::FillRule;
3use crate::color::ColorSpace;
4use crate::context::Context;
5use crate::convert::{convert_line_cap, convert_line_join};
6use crate::device::Device;
7use crate::font::{FontData, FontQuery};
8use crate::interpret::path::{
9    close_path, fill_path, fill_path_impl, fill_stroke_path, stroke_path,
10};
11use crate::interpret::state::{handle_gs, restore_state, save_sate};
12use crate::interpret::text::TextRenderingMode;
13use crate::pattern::{Pattern, ShadingPattern};
14use crate::shading::Shading;
15use crate::util::OptionLog;
16use crate::x_object::{ImageXObject, XObject, draw_image_xobject, draw_xobject};
17use hayro_syntax::content::ops::TypedInstruction;
18use hayro_syntax::object::{Dict, Object, dict_or_stream};
19use hayro_syntax::page::{Page, Resources};
20use kurbo::{Affine, Point, Shape};
21use log::warn;
22use smallvec::smallvec;
23use std::sync::Arc;
24
25pub(crate) mod path;
26pub(crate) mod state;
27pub(crate) mod text;
28
29/// A callback function for resolving font queries.
30///
31/// The first argument is the raw data, the second argument is the index in case the font
32/// is a TTC, otherwise it should be 0.
33pub type FontResolverFn = Arc<dyn Fn(&FontQuery) -> Option<(FontData, u32)> + Send + Sync>;
34/// A callback function for resolving warnings during interpretation.
35pub type WarningSinkFn = Arc<dyn Fn(InterpreterWarning) + Send + Sync>;
36
37#[derive(Clone)]
38/// Settings that should be applied during the interpretation process.
39pub struct InterpreterSettings {
40    /// Nearly every PDF contains text. In most cases, PDF files embed the fonts they use, and
41    /// hayro can therefore read the font files and do all the processing needed. However, there
42    /// are two problems:
43    /// - Fonts don't _have_ to be embedded, it's possible that the PDF file only defines the basic
44    ///   metadata of the font, like its name, but relies on the PDF processor to find that font
45    ///   in its environment.
46    /// - The PDF specification requires a list of 14 fonts that should always be available to a
47    ///   PDF processor. These include:
48    ///   - Times New Roman (Normal, Bold, Italic, BoldItalic)
49    ///   - Courier (Normal, Bold, Italic, BoldItalic)
50    ///   - Helvetica (Normal, Bold, Italic, BoldItalic)
51    ///   - ZapfDingBats
52    ///   - Symbol
53    ///
54    /// Because of this, if any of the above situations occurs, this callback will be called, which
55    /// expects the data of an appropriate font to be returned, if available. If no such font is
56    /// provided, the text will most likely fail to render.
57    ///
58    /// For the font data, there are two different formats that are accepted:
59    /// - Any valid TTF/OTF font.
60    /// - A valid CFF font program.
61    ///
62    /// The following recommendations are given for the implementation of this callback function.
63    ///
64    /// For the standard fonts, in case the original fonts are available on the system, you should
65    /// just return those. Otherwise, for Helvetica, Courier and Times New Roman, the best alternative
66    /// are the corresponding fonts of the [Liberation font family](https://github.com/liberationfonts/liberation-fonts).
67    /// If you prefer smaller fonts, you can use the [Foxit CFF fonts](https://github.com/LaurenzV/hayro/tree/master/assets/standard_fonts),
68    /// which are much smaller but are missing glyphs for certain scripts.
69    ///
70    /// For the `Symbol` and `ZapfDingBats` fonts, you should also prefer the system fonts, and if
71    /// not available to you, you can, similarly to above, use the corresponding fonts from Foxit.
72    ///
73    /// If you don't want having to deal with this, you can just enable the `embed-fonts` feature
74    /// and use the default implementation of the callback.
75    pub font_resolver: FontResolverFn,
76
77    /// In certain cases, `hayro` will emit a warning in case an issue was encountered while interpreting
78    /// the PDF file. Providing a callback allows you to catch those warnings and handle them, if desired.
79    pub warning_sink: WarningSinkFn,
80}
81
82impl Default for InterpreterSettings {
83    fn default() -> Self {
84        Self {
85            #[cfg(not(feature = "embed-fonts"))]
86            font_resolver: Arc::new(|_| None),
87            #[cfg(feature = "embed-fonts")]
88            font_resolver: Arc::new(|query| match query {
89                FontQuery::Standard(s) => Some(s.get_font_data()),
90                FontQuery::Fallback(f) => Some(f.pick_standard_font().get_font_data()),
91            }),
92            warning_sink: Arc::new(|_| {}),
93        }
94    }
95}
96
97#[derive(Copy, Clone, Debug)]
98/// Warnings that can occur while interpreting a PDF file.
99pub enum InterpreterWarning {
100    /// A JPX image was encountered, even though the `jpeg2000` feature is not enabled.
101    JpxImage,
102    /// An unsupported font kind was encountered.
103    ///
104    /// Currently, only CID fonts with non-identity encoding are unsupported.
105    UnsupportedFont,
106    /// An image failed to decode.
107    ImageDecodeFailure,
108}
109
110/// interpret the contents of the page and render them into the device.
111pub fn interpret_page<'a>(
112    page: &Page<'a>,
113    context: &mut Context<'a>,
114    device: &mut impl Device<'a>,
115) {
116    let resources = page.resources();
117    interpret(page.typed_operations(), resources, context, device)
118}
119
120/// Interpret the instructions from `ops` and render them into the device.
121pub fn interpret<'a, 'b>(
122    ops: impl Iterator<Item = TypedInstruction<'b>>,
123    resources: &Resources<'a>,
124    context: &mut Context<'a>,
125    device: &mut impl Device<'a>,
126) {
127    let num_states = context.num_states();
128    let n_clips = context.get().n_clips;
129
130    save_sate(context);
131
132    for op in ops {
133        match op {
134            TypedInstruction::SaveState(_) => save_sate(context),
135            TypedInstruction::StrokeColorDeviceRgb(s) => {
136                context.get_mut().stroke_cs = ColorSpace::device_rgb();
137                context.get_mut().stroke_color =
138                    smallvec![s.0.as_f32(), s.1.as_f32(), s.2.as_f32()];
139            }
140            TypedInstruction::StrokeColorDeviceGray(s) => {
141                context.get_mut().stroke_cs = ColorSpace::device_gray();
142                context.get_mut().stroke_color = smallvec![s.0.as_f32()];
143            }
144            TypedInstruction::StrokeColorCmyk(s) => {
145                context.get_mut().stroke_cs = ColorSpace::device_cmyk();
146                context.get_mut().stroke_color =
147                    smallvec![s.0.as_f32(), s.1.as_f32(), s.2.as_f32(), s.3.as_f32()];
148            }
149            TypedInstruction::LineWidth(w) => {
150                context.get_mut().stroke_props.line_width = w.0.as_f32();
151            }
152            TypedInstruction::LineCap(c) => {
153                context.get_mut().stroke_props.line_cap = convert_line_cap(c);
154            }
155            TypedInstruction::LineJoin(j) => {
156                context.get_mut().stroke_props.line_join = convert_line_join(j);
157            }
158            TypedInstruction::MiterLimit(l) => {
159                context.get_mut().stroke_props.miter_limit = l.0.as_f32();
160            }
161            TypedInstruction::Transform(t) => {
162                context.pre_concat_transform(t);
163            }
164            TypedInstruction::RectPath(r) => {
165                let rect = kurbo::Rect::new(
166                    r.0.as_f64(),
167                    r.1.as_f64(),
168                    r.0.as_f64() + r.2.as_f64(),
169                    r.1.as_f64() + r.3.as_f64(),
170                )
171                .to_path(0.1);
172                context.path_mut().extend(rect);
173            }
174            TypedInstruction::MoveTo(m) => {
175                let p = Point::new(m.0.as_f64(), m.1.as_f64());
176                *(context.last_point_mut()) = p;
177                *(context.sub_path_start_mut()) = p;
178                context.path_mut().move_to(p);
179            }
180            TypedInstruction::FillPathEvenOdd(_) => {
181                context.get_mut().fill_rule = FillRule::EvenOdd;
182                fill_path(context, device);
183            }
184            TypedInstruction::FillPathNonZero(_) => {
185                context.get_mut().fill_rule = FillRule::NonZero;
186                fill_path(context, device);
187            }
188            TypedInstruction::FillPathNonZeroCompatibility(_) => {
189                context.get_mut().fill_rule = FillRule::NonZero;
190                fill_path(context, device);
191            }
192            TypedInstruction::FillAndStrokeEvenOdd(_) => {
193                context.get_mut().fill_rule = FillRule::EvenOdd;
194                fill_stroke_path(context, device);
195            }
196            TypedInstruction::FillAndStrokeNonZero(_) => {
197                context.get_mut().fill_rule = FillRule::NonZero;
198                fill_stroke_path(context, device);
199            }
200            TypedInstruction::CloseAndStrokePath(_) => {
201                close_path(context);
202                stroke_path(context, device);
203            }
204            TypedInstruction::CloseFillAndStrokeEvenOdd(_) => {
205                close_path(context);
206                context.get_mut().fill_rule = FillRule::EvenOdd;
207                fill_stroke_path(context, device);
208            }
209            TypedInstruction::CloseFillAndStrokeNonZero(_) => {
210                close_path(context);
211                context.get_mut().fill_rule = FillRule::NonZero;
212                fill_stroke_path(context, device);
213            }
214            TypedInstruction::NonStrokeColorDeviceGray(s) => {
215                context.get_mut().none_stroke_cs = ColorSpace::device_gray();
216                context.get_mut().non_stroke_color = smallvec![s.0.as_f32()];
217            }
218            TypedInstruction::NonStrokeColorDeviceRgb(s) => {
219                context.get_mut().none_stroke_cs = ColorSpace::device_rgb();
220                context.get_mut().non_stroke_color =
221                    smallvec![s.0.as_f32(), s.1.as_f32(), s.2.as_f32()];
222            }
223            TypedInstruction::NonStrokeColorCmyk(s) => {
224                context.get_mut().none_stroke_cs = ColorSpace::device_cmyk();
225                context.get_mut().non_stroke_color =
226                    smallvec![s.0.as_f32(), s.1.as_f32(), s.2.as_f32(), s.3.as_f32()];
227            }
228            TypedInstruction::LineTo(m) => {
229                let last_point = *context.last_point();
230                let mut p = Point::new(m.0.as_f64(), m.1.as_f64());
231                *(context.last_point_mut()) = p;
232                if last_point == p {
233                    // Add a small delta so that zero width lines can still have a round stroke.
234                    p.x += 0.0001;
235                }
236
237                context.path_mut().line_to(p);
238            }
239            TypedInstruction::CubicTo(c) => {
240                let p1 = Point::new(c.0.as_f64(), c.1.as_f64());
241                let p2 = Point::new(c.2.as_f64(), c.3.as_f64());
242                let p3 = Point::new(c.4.as_f64(), c.5.as_f64());
243
244                *(context.last_point_mut()) = p3;
245
246                context.path_mut().curve_to(p1, p2, p3)
247            }
248            TypedInstruction::CubicStartTo(c) => {
249                let p1 = *context.last_point();
250                let p2 = Point::new(c.0.as_f64(), c.1.as_f64());
251                let p3 = Point::new(c.2.as_f64(), c.3.as_f64());
252
253                *(context.last_point_mut()) = p3;
254
255                context.path_mut().curve_to(p1, p2, p3)
256            }
257            TypedInstruction::CubicEndTo(c) => {
258                let p2 = Point::new(c.0.as_f64(), c.1.as_f64());
259                let p3 = Point::new(c.2.as_f64(), c.3.as_f64());
260
261                *(context.last_point_mut()) = p3;
262
263                context.path_mut().curve_to(p2, p3, p3)
264            }
265            TypedInstruction::ClosePath(_) => {
266                close_path(context);
267            }
268            TypedInstruction::SetGraphicsState(gs) => {
269                if let Some(gs) = resources
270                    .get_ext_g_state::<Dict>(gs.0.clone(), Box::new(|_| None), Box::new(Some))
271                    .warn_none(&format!("failed to get extgstate {}", gs.0.as_str()))
272                {
273                    handle_gs(&gs, context, resources);
274                }
275            }
276            TypedInstruction::StrokePath(_) => {
277                stroke_path(context, device);
278            }
279            TypedInstruction::EndPath(_) => {
280                if let Some(clip) = *context.clip()
281                    && !context.path().elements().is_empty()
282                {
283                    device.push_clip_path(&ClipPath {
284                        path: context.get().ctm * context.path().clone(),
285                        fill: clip,
286                    });
287
288                    context.get_mut().n_clips += 1;
289
290                    *(context.clip_mut()) = None;
291                }
292
293                context.path_mut().truncate(0);
294            }
295            TypedInstruction::NonStrokeColor(c) => {
296                let fill_c = &mut context.get_mut().non_stroke_color;
297                fill_c.truncate(0);
298
299                for e in c.0 {
300                    fill_c.push(e.as_f32());
301                }
302            }
303            TypedInstruction::StrokeColor(c) => {
304                let stroke_c = &mut context.get_mut().stroke_color;
305                stroke_c.truncate(0);
306
307                for e in c.0 {
308                    stroke_c.push(e.as_f32());
309                }
310            }
311            TypedInstruction::ClipNonZero(_) => {
312                *(context.clip_mut()) = Some(FillRule::NonZero);
313            }
314            TypedInstruction::ClipEvenOdd(_) => {
315                *(context.clip_mut()) = Some(FillRule::EvenOdd);
316            }
317            TypedInstruction::RestoreState(_) => restore_state(context, device),
318            TypedInstruction::FlatnessTolerance(_) => {
319                // Ignore for now.
320            }
321            TypedInstruction::ColorSpaceStroke(c) => {
322                let cs = if let Some(named) = ColorSpace::new_from_name(c.0.clone()) {
323                    named
324                } else {
325                    context
326                        .get_color_space(resources, c.0)
327                        .unwrap_or(ColorSpace::device_gray())
328                };
329
330                context.get_mut().stroke_color = cs.initial_color();
331                context.get_mut().stroke_cs = cs;
332            }
333            TypedInstruction::ColorSpaceNonStroke(c) => {
334                let cs = if let Some(named) = ColorSpace::new_from_name(c.0.clone()) {
335                    named
336                } else {
337                    context
338                        .get_color_space(resources, c.0)
339                        .unwrap_or(ColorSpace::device_gray())
340                };
341
342                context.get_mut().non_stroke_color = cs.initial_color();
343                context.get_mut().none_stroke_cs = cs;
344            }
345            TypedInstruction::DashPattern(p) => {
346                context.get_mut().stroke_props.dash_offset = p.1.as_f32();
347                // kurbo apparently cannot properly deal with offsets that are exactly 0.
348                context.get_mut().stroke_props.dash_array =
349                    p.0.iter::<f32>()
350                        .map(|n| if n == 0.0 { 0.01 } else { n })
351                        .collect();
352            }
353            TypedInstruction::RenderingIntent(_) => {
354                // Ignore for now.
355            }
356            TypedInstruction::NonStrokeColorNamed(n) => {
357                context.get_mut().non_stroke_color = n.0.into_iter().map(|n| n.as_f32()).collect();
358                context.get_mut().non_stroke_pattern = n.1.and_then(|name| {
359                    resources.get_pattern(
360                        name,
361                        Box::new(|_| None),
362                        Box::new(|d| Pattern::new(d, context, resources)),
363                    )
364                });
365            }
366            TypedInstruction::StrokeColorNamed(n) => {
367                context.get_mut().stroke_color = n.0.into_iter().map(|n| n.as_f32()).collect();
368                context.get_mut().stroke_pattern = n.1.and_then(|name| {
369                    resources.get_pattern(
370                        name,
371                        Box::new(|_| None),
372                        Box::new(|d| Pattern::new(d, context, resources)),
373                    )
374                });
375            }
376            TypedInstruction::BeginMarkedContentWithProperties(_) => {}
377            TypedInstruction::MarkedContentPointWithProperties(_) => {}
378            TypedInstruction::EndMarkedContent(_) => {}
379            TypedInstruction::MarkedContentPoint(_) => {}
380            TypedInstruction::BeginMarkedContent(_) => {}
381            TypedInstruction::BeginText(_) => {
382                context.get_mut().text_state.text_matrix = Affine::IDENTITY;
383                context.get_mut().text_state.text_line_matrix = Affine::IDENTITY;
384            }
385            TypedInstruction::SetTextMatrix(m) => {
386                let m = Affine::new([
387                    m.0.as_f64(),
388                    m.1.as_f64(),
389                    m.2.as_f64(),
390                    m.3.as_f64(),
391                    m.4.as_f64(),
392                    m.5.as_f64(),
393                ]);
394                context.get_mut().text_state.text_line_matrix = m;
395                context.get_mut().text_state.text_matrix = m;
396            }
397            TypedInstruction::EndText(_) => {
398                let has_outline = context
399                    .get()
400                    .text_state
401                    .clip_paths
402                    .segments()
403                    .next()
404                    .is_some();
405
406                if has_outline {
407                    device.push_clip_path(&ClipPath {
408                        path: context.get().ctm * context.get().text_state.clip_paths.clone(),
409                        fill: FillRule::NonZero,
410                    });
411                    context.get_mut().n_clips += 1;
412                }
413
414                context.get_mut().text_state.clip_paths.truncate(0);
415            }
416            TypedInstruction::TextFont(t) => {
417                let font = context.get_font(resources, t.0);
418                context.get_mut().text_state.font_size = t.1.as_f32();
419                context.get_mut().text_state.font = font;
420            }
421            TypedInstruction::ShowText(s) => {
422                text::show_text_string(context, device, resources, s.0);
423            }
424            TypedInstruction::ShowTexts(s) => {
425                for obj in s.0.iter::<Object>() {
426                    if let Some(adjustment) = obj.clone().into_f32() {
427                        context.get_mut().text_state.apply_adjustment(adjustment);
428                    } else if let Some(text) = obj.into_string() {
429                        text::show_text_string(context, device, resources, text);
430                    }
431                }
432            }
433            TypedInstruction::HorizontalScaling(h) => {
434                context.get_mut().text_state.horizontal_scaling = h.0.as_f32();
435            }
436            TypedInstruction::TextLeading(tl) => {
437                context.get_mut().text_state.leading = tl.0.as_f32();
438            }
439            TypedInstruction::CharacterSpacing(c) => {
440                context.get_mut().text_state.char_space = c.0.as_f32()
441            }
442            TypedInstruction::WordSpacing(w) => {
443                context.get_mut().text_state.word_space = w.0.as_f32();
444            }
445            TypedInstruction::NextLine(n) => {
446                let (tx, ty) = (n.0.as_f64(), n.1.as_f64());
447                text::next_line(context, tx, ty)
448            }
449            TypedInstruction::NextLineUsingLeading(_) => {
450                text::next_line(context, 0.0, -context.get().text_state.leading as f64);
451            }
452            TypedInstruction::NextLineAndShowText(n) => {
453                text::next_line(context, 0.0, -context.get().text_state.leading as f64);
454                text::show_text_string(context, device, resources, n.0)
455            }
456            TypedInstruction::TextRenderingMode(r) => {
457                let mode = match r.0.as_i32() {
458                    0 => TextRenderingMode::Fill,
459                    1 => TextRenderingMode::Stroke,
460                    2 => TextRenderingMode::FillStroke,
461                    3 => TextRenderingMode::Invisible,
462                    4 => TextRenderingMode::FillAndClip,
463                    5 => TextRenderingMode::StrokeAndClip,
464                    6 => TextRenderingMode::FillAndStrokeAndClip,
465                    7 => TextRenderingMode::Clip,
466                    _ => {
467                        warn!("unknown text rendering mode {}", r.0.as_i32());
468
469                        TextRenderingMode::Fill
470                    }
471                };
472
473                context.get_mut().text_state.render_mode = mode;
474            }
475            TypedInstruction::NextLineAndSetLeading(n) => {
476                let (tx, ty) = (n.0.as_f64(), n.1.as_f64());
477                context.get_mut().text_state.leading = -ty as f32;
478                text::next_line(context, tx, ty)
479            }
480            TypedInstruction::ShapeGlyph(_) => {}
481            TypedInstruction::XObject(x) => {
482                let cache = context.object_cache.clone();
483                if let Some(x_object) = resources.get_x_object(
484                    x.0,
485                    Box::new(|_| None),
486                    Box::new(|s| XObject::new(&s, &context.settings.warning_sink, &cache)),
487                ) {
488                    draw_xobject(&x_object, resources, context, device);
489                }
490            }
491            TypedInstruction::InlineImage(i) => {
492                let warning_sink = context.settings.warning_sink.clone();
493                let cache = context.object_cache.clone();
494                if let Some(x_object) = ImageXObject::new(
495                    &i.0,
496                    |name| context.get_color_space(resources, name.clone()),
497                    &warning_sink,
498                    &cache,
499                ) {
500                    draw_image_xobject(&x_object, context, device);
501                }
502            }
503            TypedInstruction::TextRise(t) => {
504                context.get_mut().text_state.rise = t.0.as_f32();
505            }
506            TypedInstruction::Shading(s) => {
507                if let Some(sp) = resources
508                    .get_shading(s.0, Box::new(|_| None), Box::new(Some))
509                    .and_then(|o| dict_or_stream(&o))
510                    .and_then(|s| Shading::new(&s.0, s.1.as_ref(), &context.object_cache))
511                    .map(|s| {
512                        Pattern::Shading(ShadingPattern {
513                            shading: Arc::new(s),
514                            matrix: Affine::IDENTITY,
515                        })
516                    })
517                {
518                    context.save_state();
519                    context.push_root_transform();
520                    let st = context.get_mut();
521                    st.non_stroke_pattern = Some(sp);
522                    st.none_stroke_cs = ColorSpace::pattern();
523
524                    device.set_soft_mask(st.soft_mask.clone());
525                    device.push_transparency_group(st.non_stroke_alpha, None);
526
527                    let bbox = context.bbox().to_path(0.1);
528                    let inverted_bbox = context.get().ctm.inverse() * bbox;
529                    fill_path_impl(context, device, Some(&inverted_bbox));
530
531                    device.pop_transparency_group();
532
533                    context.restore_state();
534                } else {
535                    warn!("failed to process shading");
536                }
537            }
538            TypedInstruction::BeginCompatibility(_) => {}
539            TypedInstruction::EndCompatibility(_) => {}
540            TypedInstruction::ColorGlyph(_) => {}
541            TypedInstruction::ShowTextWithParameters(t) => {
542                context.get_mut().text_state.word_space = t.0.as_f32();
543                context.get_mut().text_state.char_space = t.1.as_f32();
544                text::next_line(context, 0.0, -context.get().text_state.leading as f64);
545                text::show_text_string(context, device, resources, t.2)
546            }
547            _ => {
548                warn!("failed to read an operator");
549            }
550        }
551    }
552
553    while context.num_states() > num_states {
554        restore_state(context, device);
555    }
556
557    // Invalid files may still have pending clip paths.
558    while context.get().n_clips > n_clips {
559        device.pop_clip_path();
560        context.get_mut().n_clips -= 1;
561    }
562}