Skip to main content

pdf_interpret/interpret/
mod.rs

1use crate::FillRule;
2use crate::color::ColorSpace;
3use crate::context::Context;
4use crate::convert::{convert_line_cap, convert_line_join};
5use crate::device::Device;
6use crate::font::{Font, FontData, FontQuery, StandardFont};
7use crate::interpret::path::{
8    close_path, fill_path, fill_path_impl, fill_stroke_path, stroke_path,
9};
10use crate::interpret::state::{TextStateFont, handle_gs};
11use crate::interpret::text::TextRenderingMode;
12use crate::pattern::{Pattern, ShadingPattern};
13use crate::shading::Shading;
14use crate::util::{OptionLog, RectExt};
15use crate::x_object::{
16    FormXObject, ImageXObject, XObject, draw_form_xobject, draw_image_xobject, draw_xobject,
17};
18use kurbo::{Affine, Point, Shape};
19use log::warn;
20use pdf_syntax::content::ops::TypedInstruction;
21use pdf_syntax::object::dict::keys::{ANNOTS, AP, F, FT, MCID, N, OC, RECT};
22use pdf_syntax::object::{Array, Dict, Name, Object, Rect, Stream, dict_or_stream};
23use pdf_syntax::page::{Page, Resources};
24use smallvec::smallvec;
25use std::sync::{Arc, OnceLock};
26
27pub(crate) mod path;
28pub(crate) mod state;
29pub(crate) mod text;
30
31pub use state::ActiveTransferFunction;
32
33/// A callback function for resolving font queries.
34///
35/// The first argument is the raw data, the second argument is the index in case the font
36/// is a TTC, otherwise it should be 0.
37pub type FontResolverFn = Arc<dyn Fn(&FontQuery) -> Option<(FontData, u32)> + Send + Sync>;
38/// A callback function for resolving cmap names to their files.
39pub type CMapResolverFn =
40    Arc<dyn Fn(pdf_font::cmap::CMapName<'_>) -> Option<&'static [u8]> + Send + Sync>;
41/// A callback function for resolving warnings during interpretation.
42pub type WarningSinkFn = Arc<dyn Fn(InterpreterWarning) + Send + Sync>;
43
44#[derive(Clone)]
45/// Settings that should be applied during the interpretation process.
46pub struct InterpreterSettings {
47    /// Nearly every PDF contains text. In most cases, PDF files embed the fonts they use, and
48    /// pdf-interpret can therefore read the font files and do all the processing needed. However, there
49    /// are two problems:
50    /// - Fonts don't _have_ to be embedded, it's possible that the PDF file only defines the basic
51    ///   metadata of the font, like its name, but relies on the PDF processor to find that font
52    ///   in its environment.
53    /// - The PDF specification requires a list of 14 fonts that should always be available to a
54    ///   PDF processor. These include:
55    ///   - Times New Roman (Normal, Bold, Italic, `BoldItalic`)
56    ///   - Courier (Normal, Bold, Italic, `BoldItalic`)
57    ///   - Helvetica (Normal, Bold, Italic, `BoldItalic`)
58    ///   - `ZapfDingBats`
59    ///   - Symbol
60    ///
61    /// Because of this, if any of the above situations occurs, this callback will be called, which
62    /// expects the data of an appropriate font to be returned, if available. If no such font is
63    /// provided, the text will most likely fail to render.
64    ///
65    /// For the font data, there are two different formats that are accepted:
66    /// - Any valid TTF/OTF font.
67    /// - A valid CFF font program.
68    ///
69    /// The following recommendations are given for the implementation of this callback function.
70    ///
71    /// For the standard fonts, in case the original fonts are available on the system, you should
72    /// just return those. Otherwise, for Helvetica, Courier and Times New Roman, the best alternative
73    /// are the corresponding fonts of the [Liberation font family](https://github.com/liberationfonts/liberation-fonts).
74    /// If you prefer smaller fonts, you can use the [Foxit CFF fonts](https://github.com/LaurenzV/pdf-interpret/tree/master/assets/standard_fonts),
75    /// which are much smaller but are missing glyphs for certain scripts.
76    ///
77    /// For the `Symbol` and `ZapfDingBats` fonts, you should also prefer the system fonts, and if
78    /// not available to you, you can, similarly to above, use the corresponding fonts from Foxit.
79    ///
80    /// If you don't want having to deal with this, you can just enable the `embed-fonts` feature
81    /// and use the default implementation of the callback.
82    pub font_resolver: FontResolverFn,
83    /// A callback for resolving cmaps that aren't embedded.
84    ///
85    /// When the PDF requires using a cmap that is not directly embedded in the PDF,
86    /// this callback will be called to attempt fetching the data of the file.
87    ///
88    /// When the `embed-cmaps` feature is enabled, this uses `load_embedded`
89    /// method from `pdf-interpret-cmap` by default, which embeds the cmap files for
90    /// all 61 predefined cmaps
91    /// that the PDF specification requires to be readily available on a system.
92    /// Otherwise, you can implement your custom logic for lazily fetching the
93    /// data. If you are fine not supporting such PDFs, you can simply pass a closure
94    /// that always returns `None`.
95    pub cmap_resolver: CMapResolverFn,
96    /// In certain cases, `pdf-interpret` will emit a warning in case an issue was encountered while interpreting
97    /// the PDF file. Providing a callback allows you to catch those warnings and handle them, if desired.
98    pub warning_sink: WarningSinkFn,
99    /// Whether annotations should be rendered as well.
100    ///
101    /// Note that this feature is currently not fully implemented yet, so some
102    /// annotations might be missing.
103    pub render_annotations: bool,
104    /// Whether to skip `/FT /Sig` (signature widget) appearance streams.
105    ///
106    /// Rendering sets this to `true` to match MuPDF behaviour, but text
107    /// extraction should set it to `false` so that signature text is included.
108    pub skip_signature_widgets: bool,
109}
110
111/// Known paths for CJK fonts, ordered by preference.
112/// Covers macOS, Ubuntu/Debian, Fedora/RHEL, and Alpine Linux.
113#[cfg(feature = "embed-fonts")]
114const CJK_FONT_CANDIDATE_PATHS: &[&str] = &[
115    // macOS — ships with every installation
116    "/System/Library/Fonts/Supplemental/Arial Unicode.ttf",
117    // Noto CJK — most common on Linux
118    "/usr/share/fonts/noto-cjk/NotoSansCJK-Regular.ttc",
119    "/usr/share/fonts/truetype/noto/NotoSansCJK-Regular.ttc",
120    "/usr/share/fonts/opentype/noto/NotoSansCJK-Regular.ttc",
121    "/usr/share/fonts/noto-cjk/NotoSansCJKsc-Regular.otf",
122    // WenQuanYi — fallback on older Ubuntu/Debian systems
123    "/usr/share/fonts/truetype/wqy/wqy-microhei.ttc",
124    "/usr/share/fonts/truetype/wqy/wqy-zenhei.ttc",
125    // Arphic (traditional)
126    "/usr/share/fonts/truetype/arphic/uming.ttc",
127    // Alpine Linux
128    "/usr/share/fonts/noto-cjk/NotoSansCJK-Regular.ttc",
129];
130
131/// Lazily loaded CJK system font bytes.  `None` means no CJK font was found.
132#[cfg(feature = "embed-fonts")]
133static SYSTEM_CJK_FONT: OnceLock<Option<Arc<Vec<u8>>>> = OnceLock::new();
134
135/// Try to load a CJK font from the host system, returning its raw bytes.
136#[cfg(feature = "embed-fonts")]
137fn system_cjk_font() -> Option<FontData> {
138    SYSTEM_CJK_FONT
139        .get_or_init(|| {
140            for path in CJK_FONT_CANDIDATE_PATHS {
141                if let Ok(bytes) = std::fs::read(path) {
142                    log::debug!("CJK fallback font loaded from {path}");
143                    return Some(Arc::new(bytes));
144                }
145            }
146            log::warn!(
147                "no system CJK font found; non-embedded CJK fonts will render with a Latin fallback"
148            );
149            None
150        })
151        .as_ref()
152        .map(|data| -> FontData { data.clone() })
153}
154
155impl Default for InterpreterSettings {
156    fn default() -> Self {
157        Self {
158            #[cfg(not(feature = "embed-fonts"))]
159            font_resolver: Arc::new(|_| None),
160            #[cfg(feature = "embed-fonts")]
161            font_resolver: Arc::new(|query| match query {
162                FontQuery::Standard(s) => Some(s.get_font_data()),
163                FontQuery::Fallback(f) => {
164                    // For non-embedded CJK fonts (Adobe-GB1, CNS1, Japan1, Korea1)
165                    // try a system CJK font first so characters render correctly.
166                    // This avoids the situation where a Latin fallback font is used
167                    // and Chinese/Japanese/Korean glyphs appear as "d", "a", etc.
168                    if f.character_collection
169                        .as_ref()
170                        .is_some_and(|cc| cc.family.is_cjk())
171                        && let Some(data) = system_cjk_font()
172                    {
173                        return Some((data, 0));
174                    }
175                    Some(f.pick_standard_font().get_font_data())
176                }
177            }),
178            #[cfg(feature = "embed-cmaps")]
179            cmap_resolver: Arc::new(pdf_font::cmap::load_embedded),
180            #[cfg(not(feature = "embed-cmaps"))]
181            cmap_resolver: Arc::new(|_| None),
182            warning_sink: Arc::new(|_| {}),
183            render_annotations: true,
184            skip_signature_widgets: true,
185        }
186    }
187}
188
189#[derive(Copy, Clone, Debug)]
190/// Warnings that can occur while interpreting a PDF file.
191pub enum InterpreterWarning {
192    /// An unsupported font kind was encountered.
193    ///
194    /// Currently, only CID fonts with non-identity encoding are unsupported.
195    UnsupportedFont,
196    /// An image failed to decode.
197    ImageDecodeFailure,
198}
199
200/// interpret the contents of the page and render them into the device.
201pub fn interpret_page<'a>(
202    page: &Page<'a>,
203    context: &mut Context<'a>,
204    device: &mut impl Device<'a>,
205) {
206    let resources = page.resources();
207    interpret(page.typed_operations(), resources, context, device);
208
209    if context.settings.render_annotations
210        && let Some(annot_arr) = page.raw().get::<Array<'_>>(ANNOTS)
211    {
212        for annot in annot_arr.iter::<Dict<'_>>() {
213            let flags = annot.get::<u32>(F).unwrap_or(0);
214
215            // Annotation should be hidden.
216            if flags & 2 != 0 {
217                continue;
218            }
219
220            // MuPDF renders signature widgets (/FT /Sig) with its own built-in
221            // "SIGN here" indicator and ignores the custom /AP/N stream, so we
222            // skip AP rendering for these annotations to match MuPDF output.
223            // Text extraction disables this skip so signature text is included.
224            if context.settings.skip_signature_widgets
225                && annot
226                    .get::<Name>(FT)
227                    .as_deref()
228                    .is_some_and(|n| n == b"Sig")
229            {
230                continue;
231            }
232
233            if let Some(apx) = annot
234                .get::<Dict<'_>>(AP)
235                .and_then(|ap| ap.get::<Stream<'_>>(N))
236                .and_then(|o| FormXObject::new(&o))
237            {
238                let Some(rect) = annot.get::<Rect>(RECT) else {
239                    continue;
240                };
241
242                let annot_rect = rect.to_kurbo();
243                // 12.5.5. Appearance streams
244                // "The algorithm outlined in this subclause shall be used
245                // to map from the coordinate system of the appearance XObject."
246
247                // 1) The appearance’s bounding box (specified by its BBox entry)
248                // shall be transformed, using Matrix, to produce a
249                // quadrilateral with arbitrary orientation. The transformed
250                // appearance box is the smallest upright rectangle that
251                // encompasses this quadrilateral.
252                let transformed_rect = (apx.matrix
253                    * kurbo::Rect::new(
254                        apx.bbox[0] as f64,
255                        apx.bbox[1] as f64,
256                        apx.bbox[2] as f64,
257                        apx.bbox[3] as f64,
258                    )
259                    .to_path(0.1))
260                .bounding_box();
261
262                // 2) A matrix A shall be computed that scales and translates
263                // the transformed appearance box to align with the edges
264                // of the annotation’s rectangle (specified by the Rect entry).
265                // A maps the lower-left corner (the corner with the smallest
266                // x and y coordinates) and the upper-right corner (the
267                // corner with the greatest x and y coordinates) of the
268                // transformed appearance box to the corresponding corners
269                // of the annotation’s rectangle.
270                let affine = Affine::new([
271                    annot_rect.width() / transformed_rect.width(),
272                    0.0,
273                    0.0,
274                    annot_rect.height() / transformed_rect.height(),
275                    annot_rect.x0 - transformed_rect.x0,
276                    annot_rect.y0 - transformed_rect.y0,
277                ]);
278
279                // 3) Matrix shall be concatenated with A to form a matrix
280                // AA that maps from the appearance’s coordinate system to
281                // the annotation’s rectangle in default user space.
282                context.save_state();
283                context.pre_concat_affine(affine);
284                context.push_root_transform();
285
286                draw_form_xobject(resources, &apx, context, device);
287                context.pop_root_transform();
288                context.restore_state(device);
289            }
290        }
291    }
292}
293
294/// Interpret the instructions from `ops` and render them into the device.
295pub fn interpret<'a, 'b>(
296    ops: impl Iterator<Item = TypedInstruction<'b>>,
297    resources: &Resources<'a>,
298    context: &mut Context<'a>,
299    device: &mut impl Device<'a>,
300) {
301    let num_states = context.num_states();
302
303    context.save_state();
304
305    for op in ops {
306        match op {
307            TypedInstruction::SaveState(_) => context.save_state(),
308            TypedInstruction::StrokeColorDeviceRgb(s) => {
309                context.get_mut().graphics_state.stroke_cs = ColorSpace::device_rgb();
310                context.get_mut().graphics_state.stroke_color =
311                    smallvec![s.0.as_f32(), s.1.as_f32(), s.2.as_f32()];
312            }
313            TypedInstruction::StrokeColorDeviceGray(s) => {
314                context.get_mut().graphics_state.stroke_cs = ColorSpace::device_gray();
315                context.get_mut().graphics_state.stroke_color = smallvec![s.0.as_f32()];
316            }
317            TypedInstruction::StrokeColorCmyk(s) => {
318                context.get_mut().graphics_state.stroke_cs = ColorSpace::device_cmyk();
319                context.get_mut().graphics_state.stroke_color =
320                    smallvec![s.0.as_f32(), s.1.as_f32(), s.2.as_f32(), s.3.as_f32()];
321            }
322            TypedInstruction::LineWidth(w) => {
323                context.get_mut().graphics_state.stroke_props.line_width = w.0.as_f32();
324            }
325            TypedInstruction::LineCap(c) => {
326                context.get_mut().graphics_state.stroke_props.line_cap = convert_line_cap(c);
327            }
328            TypedInstruction::LineJoin(j) => {
329                context.get_mut().graphics_state.stroke_props.line_join = convert_line_join(j);
330            }
331            TypedInstruction::MiterLimit(l) => {
332                context.get_mut().graphics_state.stroke_props.miter_limit = l.0.as_f32();
333            }
334            TypedInstruction::Transform(t) => {
335                context.pre_concat_transform(t);
336            }
337            TypedInstruction::RectPath(r) => {
338                let rect = kurbo::Rect::new(
339                    r.0.as_f64(),
340                    r.1.as_f64(),
341                    r.0.as_f64() + r.2.as_f64(),
342                    r.1.as_f64() + r.3.as_f64(),
343                )
344                .to_path(0.1);
345                context.path_mut().extend(rect);
346            }
347            TypedInstruction::MoveTo(m) => {
348                let p = Point::new(m.0.as_f64(), m.1.as_f64());
349                *(context.last_point_mut()) = p;
350                *(context.sub_path_start_mut()) = p;
351                context.path_mut().move_to(p);
352            }
353            TypedInstruction::FillPathEvenOdd(_) => {
354                fill_path(context, device, FillRule::EvenOdd);
355            }
356            TypedInstruction::FillPathNonZero(_) => {
357                fill_path(context, device, FillRule::NonZero);
358            }
359            TypedInstruction::FillPathNonZeroCompatibility(_) => {
360                fill_path(context, device, FillRule::NonZero);
361            }
362            TypedInstruction::FillAndStrokeEvenOdd(_) => {
363                fill_stroke_path(context, device, FillRule::EvenOdd);
364            }
365            TypedInstruction::FillAndStrokeNonZero(_) => {
366                fill_stroke_path(context, device, FillRule::NonZero);
367            }
368            TypedInstruction::CloseAndStrokePath(_) => {
369                close_path(context);
370                stroke_path(context, device);
371            }
372            TypedInstruction::CloseFillAndStrokeEvenOdd(_) => {
373                close_path(context);
374                fill_stroke_path(context, device, FillRule::EvenOdd);
375            }
376            TypedInstruction::CloseFillAndStrokeNonZero(_) => {
377                close_path(context);
378                fill_stroke_path(context, device, FillRule::NonZero);
379            }
380            TypedInstruction::NonStrokeColorDeviceGray(s) => {
381                context.get_mut().graphics_state.none_stroke_cs = ColorSpace::device_gray();
382                context.get_mut().graphics_state.non_stroke_color = smallvec![s.0.as_f32()];
383            }
384            TypedInstruction::NonStrokeColorDeviceRgb(s) => {
385                context.get_mut().graphics_state.none_stroke_cs = ColorSpace::device_rgb();
386                context.get_mut().graphics_state.non_stroke_color =
387                    smallvec![s.0.as_f32(), s.1.as_f32(), s.2.as_f32()];
388            }
389            TypedInstruction::NonStrokeColorCmyk(s) => {
390                context.get_mut().graphics_state.none_stroke_cs = ColorSpace::device_cmyk();
391                context.get_mut().graphics_state.non_stroke_color =
392                    smallvec![s.0.as_f32(), s.1.as_f32(), s.2.as_f32(), s.3.as_f32()];
393            }
394            TypedInstruction::LineTo(m) => {
395                if !context.path().elements().is_empty() {
396                    let last_point = *context.last_point();
397                    let mut p = Point::new(m.0.as_f64(), m.1.as_f64());
398                    *(context.last_point_mut()) = p;
399                    if last_point == p {
400                        // Add a small delta so that zero width lines can still have a round stroke.
401                        p.x += 0.0001;
402                    }
403
404                    context.path_mut().line_to(p);
405                }
406            }
407            TypedInstruction::CubicTo(c) => {
408                if !context.path().elements().is_empty() {
409                    let p1 = Point::new(c.0.as_f64(), c.1.as_f64());
410                    let p2 = Point::new(c.2.as_f64(), c.3.as_f64());
411                    let p3 = Point::new(c.4.as_f64(), c.5.as_f64());
412
413                    *(context.last_point_mut()) = p3;
414
415                    context.path_mut().curve_to(p1, p2, p3);
416                }
417            }
418            TypedInstruction::CubicStartTo(c) => {
419                if !context.path().elements().is_empty() {
420                    let p1 = *context.last_point();
421                    let p2 = Point::new(c.0.as_f64(), c.1.as_f64());
422                    let p3 = Point::new(c.2.as_f64(), c.3.as_f64());
423
424                    *(context.last_point_mut()) = p3;
425
426                    context.path_mut().curve_to(p1, p2, p3);
427                }
428            }
429            TypedInstruction::CubicEndTo(c) => {
430                if !context.path().elements().is_empty() {
431                    let p2 = Point::new(c.0.as_f64(), c.1.as_f64());
432                    let p3 = Point::new(c.2.as_f64(), c.3.as_f64());
433
434                    *(context.last_point_mut()) = p3;
435
436                    context.path_mut().curve_to(p2, p3, p3);
437                }
438            }
439            TypedInstruction::ClosePath(_) => {
440                close_path(context);
441            }
442            TypedInstruction::SetGraphicsState(gs) => {
443                if let Some(gs) = resources
444                    .get_ext_g_state(gs.0.clone())
445                    .warn_none(&format!("failed to get extgstate {}", gs.0.as_str()))
446                {
447                    handle_gs(&gs, context, resources);
448                }
449            }
450            TypedInstruction::StrokePath(_) => {
451                stroke_path(context, device);
452            }
453            TypedInstruction::EndPath(_) => {
454                if let Some(clip) = *context.clip()
455                    && !context.path().elements().is_empty()
456                {
457                    let clip_path = context.get().ctm * context.path().clone();
458                    context.push_clip_path(clip_path, clip, device);
459
460                    *(context.clip_mut()) = None;
461                }
462
463                context.path_mut().truncate(0);
464            }
465            TypedInstruction::NonStrokeColor(c) => {
466                let fill_c = &mut context.get_mut().graphics_state.non_stroke_color;
467                fill_c.truncate(0);
468
469                for e in c.0 {
470                    fill_c.push(e.as_f32());
471                }
472            }
473            TypedInstruction::StrokeColor(c) => {
474                let stroke_c = &mut context.get_mut().graphics_state.stroke_color;
475                stroke_c.truncate(0);
476
477                for e in c.0 {
478                    stroke_c.push(e.as_f32());
479                }
480            }
481            TypedInstruction::ClipNonZero(_) => {
482                *(context.clip_mut()) = Some(FillRule::NonZero);
483            }
484            TypedInstruction::ClipEvenOdd(_) => {
485                *(context.clip_mut()) = Some(FillRule::EvenOdd);
486            }
487            TypedInstruction::RestoreState(_) => context.restore_state(device),
488            TypedInstruction::FlatnessTolerance(_) => {
489                // Ignore for now.
490            }
491            TypedInstruction::ColorSpaceStroke(c) => {
492                let cs = if let Some(named) = ColorSpace::new_from_name(c.0.clone()) {
493                    named
494                } else {
495                    context
496                        .get_color_space(resources, c.0)
497                        .unwrap_or(ColorSpace::device_gray())
498                };
499
500                context.get_mut().graphics_state.stroke_color = cs.initial_color();
501                context.get_mut().graphics_state.stroke_cs = cs;
502            }
503            TypedInstruction::ColorSpaceNonStroke(c) => {
504                let cs = if let Some(named) = ColorSpace::new_from_name(c.0.clone()) {
505                    named
506                } else {
507                    context
508                        .get_color_space(resources, c.0)
509                        .unwrap_or(ColorSpace::device_gray())
510                };
511
512                context.get_mut().graphics_state.non_stroke_color = cs.initial_color();
513                context.get_mut().graphics_state.none_stroke_cs = cs;
514            }
515            TypedInstruction::DashPattern(p) => {
516                context.get_mut().graphics_state.stroke_props.dash_offset = p.1.as_f32();
517                // kurbo apparently cannot properly deal with offsets that are exactly 0.
518                context.get_mut().graphics_state.stroke_props.dash_array =
519                    p.0.iter::<f32>()
520                        .map(|n| if n == 0.0 { 0.01 } else { n })
521                        .collect();
522            }
523            TypedInstruction::RenderingIntent(_) => {
524                // Ignore for now.
525            }
526            TypedInstruction::NonStrokeColorNamed(n) => {
527                context.get_mut().graphics_state.non_stroke_color =
528                    n.0.into_iter().map(|n| n.as_f32()).collect();
529                context.get_mut().graphics_state.non_stroke_pattern = n.1.and_then(|name| {
530                    resources
531                        .get_pattern(name)
532                        .and_then(|d| Pattern::new(d, context, resources))
533                });
534            }
535            TypedInstruction::StrokeColorNamed(n) => {
536                context.get_mut().graphics_state.stroke_color =
537                    n.0.into_iter().map(|n| n.as_f32()).collect();
538                context.get_mut().graphics_state.stroke_pattern = n.1.and_then(|name| {
539                    resources
540                        .get_pattern(name)
541                        .and_then(|d| Pattern::new(d, context, resources))
542                });
543            }
544            TypedInstruction::BeginMarkedContentWithProperties(bdc) => {
545                // Properties can be either:
546                // 1. A Name that references an entry in the Resources/Properties dictionary
547                // 2. An inline dictionary with an OC key
548
549                let mcid = dict_or_stream(&bdc.1).and_then(|(props, _)| props.get::<i32>(MCID));
550
551                let oc = bdc
552                    .1
553                    .clone()
554                    .into_name()
555                    .and_then(|name| {
556                        let r = resources.properties.get_ref(name.clone())?;
557                        let d = resources
558                            .properties
559                            .get::<Dict<'_>>(name)
560                            .unwrap_or_default();
561                        Some((d, r))
562                    })
563                    .or_else(|| {
564                        let (props, _) = dict_or_stream(&bdc.1)?;
565                        let r = props.get_ref(OC)?;
566                        let d = props.get::<Dict<'_>>(OC).unwrap_or_default();
567                        Some((d, r))
568                    });
569
570                if let Some((dict, oc_ref)) = oc {
571                    context.ocg_state.begin_ocg(&dict, oc_ref.into());
572                } else {
573                    context.ocg_state.begin_marked_content();
574                }
575
576                device.begin_marked_content(&bdc.0, mcid);
577            }
578            TypedInstruction::MarkedContentPointWithProperties(_) => {}
579            TypedInstruction::EndMarkedContent(_) => {
580                context.ocg_state.end_marked_content();
581                device.end_marked_content();
582            }
583            TypedInstruction::MarkedContentPoint(_) => {}
584            TypedInstruction::BeginMarkedContent(bmc) => {
585                context.ocg_state.begin_marked_content();
586                device.begin_marked_content(&bmc.0, None);
587            }
588            TypedInstruction::BeginText(_) => {
589                context.get_mut().text_state.text_matrix = Affine::IDENTITY;
590                context.get_mut().text_state.text_line_matrix = Affine::IDENTITY;
591            }
592            TypedInstruction::SetTextMatrix(m) => {
593                let m = Affine::new([
594                    m.0.as_f64(),
595                    m.1.as_f64(),
596                    m.2.as_f64(),
597                    m.3.as_f64(),
598                    m.4.as_f64(),
599                    m.5.as_f64(),
600                ]);
601                context.get_mut().text_state.text_line_matrix = m;
602                context.get_mut().text_state.text_matrix = m;
603            }
604            TypedInstruction::EndText(_) => {
605                let has_outline = context
606                    .get()
607                    .text_state
608                    .clip_paths
609                    .segments()
610                    .next()
611                    .is_some();
612
613                if has_outline {
614                    let clip_path = context.get().ctm * context.get().text_state.clip_paths.clone();
615
616                    context.push_clip_path(clip_path, FillRule::NonZero, device);
617                }
618
619                context.get_mut().text_state.clip_paths.truncate(0);
620            }
621            TypedInstruction::TextFont(t) => {
622                let name = t.0;
623
624                // In case we are unable to resolve the font, two scenarios:
625                // 1) If the font doesn't exist in the first place in the resource dictionary,
626                // assume Helvetica (this seems to be what other PDF viewers do).
627                // 2) In case it's `None` because we were unable to resolve the font
628                // (for whatever reason), leave it as `None`. Better showing no
629                // text at all than garbage text.
630                let font = if let Some(font_dict) = resources.get_font(name.clone()) {
631                    context.resolve_font(&font_dict)
632                } else {
633                    Font::new_standard(StandardFont::Helvetica, &context.settings.font_resolver)
634                        .map(TextStateFont::Fallback)
635                };
636
637                context.get_mut().text_state.font_size = t.1.as_f32();
638                context.get_mut().text_state.font = font;
639            }
640            TypedInstruction::ShowText(s) => {
641                if context.get().text_state.font.is_none() {
642                    // Even if no explicit font was set, we try to assume Helvetica. Acrobat
643                    // seems to do the same.
644                    context.get_mut().text_state.font = Font::new_standard(
645                        StandardFont::Helvetica,
646                        &context.settings.font_resolver,
647                    )
648                    .map(TextStateFont::Fallback);
649                }
650
651                text::show_text_string(context, device, resources, s.0);
652            }
653            TypedInstruction::ShowTexts(s) => {
654                if context.get().text_state.font.is_none() {
655                    // Even if no explicit font was set, we try to assume Helvetica. Acrobat
656                    // seems to do the same.
657                    context.get_mut().text_state.font = Font::new_standard(
658                        StandardFont::Helvetica,
659                        &context.settings.font_resolver,
660                    )
661                    .map(TextStateFont::Fallback);
662                }
663
664                for obj in s.0.iter::<Object<'_>>() {
665                    if let Some(adjustment) = obj.clone().into_f32() {
666                        // ANN[r17/TEX1] Surface TJ adjustment to the Device
667                        // before mutating the text matrix so extractors can
668                        // record the word-boundary signal alongside the
669                        // spatial gap they'd otherwise have to infer.
670                        device.text_adjustment(adjustment);
671                        context.get_mut().text_state.apply_adjustment(adjustment);
672                    } else if let Some(text) = obj.into_string() {
673                        text::show_text_string(context, device, resources, text);
674                    }
675                }
676            }
677            TypedInstruction::HorizontalScaling(h) => {
678                context.get_mut().text_state.horizontal_scaling = h.0.as_f32();
679            }
680            TypedInstruction::TextLeading(tl) => {
681                context.get_mut().text_state.leading = tl.0.as_f32();
682            }
683            TypedInstruction::CharacterSpacing(c) => {
684                context.get_mut().text_state.char_space = c.0.as_f32();
685            }
686            TypedInstruction::WordSpacing(w) => {
687                context.get_mut().text_state.word_space = w.0.as_f32();
688            }
689            TypedInstruction::NextLine(n) => {
690                let (tx, ty) = (n.0.as_f64(), n.1.as_f64());
691                text::next_line(context, tx, ty);
692            }
693            TypedInstruction::NextLineUsingLeading(_) => {
694                text::next_line(context, 0.0, -context.get().text_state.leading as f64);
695            }
696            TypedInstruction::NextLineAndShowText(n) => {
697                text::next_line(context, 0.0, -context.get().text_state.leading as f64);
698                text::show_text_string(context, device, resources, n.0);
699            }
700            TypedInstruction::TextRenderingMode(r) => {
701                let mode = match r.0.as_i64() {
702                    0 => TextRenderingMode::Fill,
703                    1 => TextRenderingMode::Stroke,
704                    2 => TextRenderingMode::FillStroke,
705                    3 => TextRenderingMode::Invisible,
706                    4 => TextRenderingMode::FillAndClip,
707                    5 => TextRenderingMode::StrokeAndClip,
708                    6 => TextRenderingMode::FillAndStrokeAndClip,
709                    7 => TextRenderingMode::Clip,
710                    _ => {
711                        warn!("unknown text rendering mode {}", r.0.as_i64());
712
713                        TextRenderingMode::Fill
714                    }
715                };
716
717                context.get_mut().text_state.render_mode = mode;
718            }
719            TypedInstruction::NextLineAndSetLeading(n) => {
720                let (tx, ty) = (n.0.as_f64(), n.1.as_f64());
721                context.get_mut().text_state.leading = -ty as f32;
722                text::next_line(context, tx, ty);
723            }
724            // d1: uncolored (shape) glyph header.  The advance width (wx) and
725            // bounding-box arguments are intentionally ignored here: the glyph
726            // advance is taken from the Type3 font's /Widths array (via
727            // Font::code_advance), and the is_shape_glyph flag is determined
728            // by the pre-scan in Type3::render_glyph before the stream is
729            // interpreted.
730            TypedInstruction::ShapeGlyph(_) => {}
731            TypedInstruction::XObject(x) => {
732                let cache = context.object_cache.clone();
733                let transfer_function = context.get().graphics_state.transfer_function.clone();
734                if let Some(x_object) = resources.get_x_object(x.0).and_then(|s| {
735                    XObject::new(
736                        &s,
737                        &context.settings.warning_sink,
738                        &cache,
739                        transfer_function.clone(),
740                    )
741                }) {
742                    draw_xobject(&x_object, resources, context, device);
743                }
744            }
745            TypedInstruction::InlineImage(i) => {
746                let warning_sink = context.settings.warning_sink.clone();
747                let transfer_function = context.get().graphics_state.transfer_function.clone();
748                let cache = context.object_cache.clone();
749                if let Some(x_object) = ImageXObject::new(
750                    &i.0,
751                    |name| context.get_color_space(resources, name.clone()),
752                    &warning_sink,
753                    &cache,
754                    false,
755                    transfer_function,
756                ) {
757                    draw_image_xobject(&x_object, context, device);
758                }
759            }
760            TypedInstruction::TextRise(t) => {
761                context.get_mut().text_state.rise = t.0.as_f32();
762            }
763            TypedInstruction::Shading(s) => {
764                if !context.ocg_state.is_visible() {
765                    continue;
766                }
767
768                let transfer_function = context.get().graphics_state.transfer_function.clone();
769
770                if let Some(sp) = resources
771                    .get_shading(s.0)
772                    .and_then(|o| dict_or_stream(&o))
773                    .and_then(|s| Shading::new(&s.0, s.1.as_ref(), &context.object_cache))
774                    .map(|s| {
775                        Pattern::Shading(ShadingPattern {
776                            shading: Arc::new(s),
777                            matrix: Affine::IDENTITY,
778                            opacity: context.get().graphics_state.non_stroke_alpha,
779                            transfer_function: transfer_function.clone(),
780                        })
781                    })
782                {
783                    context.save_state();
784                    context.push_root_transform();
785                    let st = context.get_mut();
786                    st.graphics_state.non_stroke_pattern = Some(sp);
787                    st.graphics_state.none_stroke_cs = ColorSpace::pattern();
788
789                    device.set_soft_mask(st.graphics_state.soft_mask.clone());
790                    device.set_blend_mode(st.graphics_state.blend_mode);
791
792                    let bbox = context.bbox().to_path(0.1);
793                    let inverted_bbox = context.get().ctm.inverse() * bbox;
794                    fill_path_impl(context, device, FillRule::NonZero, Some(&inverted_bbox));
795
796                    context.pop_root_transform();
797                    context.restore_state(device);
798                } else {
799                    warn!("failed to process shading");
800                }
801            }
802            TypedInstruction::BeginCompatibility(_) => {}
803            TypedInstruction::EndCompatibility(_) => {}
804            // d0: colored glyph header.  The advance width (wx) argument is
805            // intentionally ignored here for the same reason as d1 above.
806            TypedInstruction::ColorGlyph(_) => {}
807            TypedInstruction::ShowTextWithParameters(t) => {
808                context.get_mut().text_state.word_space = t.0.as_f32();
809                context.get_mut().text_state.char_space = t.1.as_f32();
810                text::next_line(context, 0.0, -context.get().text_state.leading as f64);
811                text::show_text_string(context, device, resources, t.2);
812            }
813            _ => {
814                warn!("failed to read an operator");
815            }
816        }
817    }
818
819    while context.num_states() > num_states {
820        context.restore_state(device);
821    }
822}