Skip to main content

pdf_interpret/interpret/
mod.rs

1use crate::FillRule;
2use crate::color::ColorSpace;
3use crate::context::Context;
4use crate::convert::{convert_line_cap, convert_line_join};
5use crate::device::Device;
6use crate::font::{Font, FontData, FontQuery, StandardFont};
7use crate::interpret::path::{
8    close_path, fill_path, fill_path_impl, fill_stroke_path, stroke_path,
9};
10use crate::interpret::state::{TextStateFont, handle_gs};
11use crate::interpret::text::TextRenderingMode;
12use crate::pattern::{Pattern, ShadingPattern};
13use crate::shading::Shading;
14use crate::util::{OptionLog, RectExt};
15use crate::x_object::{
16    FormXObject, ImageXObject, XObject, draw_form_xobject, draw_image_xobject, draw_xobject,
17};
18use kurbo::{Affine, Point, Shape};
19use log::warn;
20use pdf_syntax::content::ops::TypedInstruction;
21use pdf_syntax::object::dict::keys::{ANNOTS, AP, F, FT, MCID, N, OC, RECT};
22use pdf_syntax::object::{Array, Dict, Name, Object, Rect, Stream, dict_or_stream};
23use pdf_syntax::page::{Page, Resources};
24use smallvec::smallvec;
25use std::sync::{Arc, OnceLock};
26
27pub(crate) mod path;
28pub(crate) mod state;
29pub(crate) mod text;
30
31pub use state::ActiveTransferFunction;
32
33/// A callback function for resolving font queries.
34///
35/// The first argument is the raw data, the second argument is the index in case the font
36/// is a TTC, otherwise it should be 0.
37pub type FontResolverFn = Arc<dyn Fn(&FontQuery) -> Option<(FontData, u32)> + Send + Sync>;
38/// A callback function for resolving cmap names to their files.
39pub type CMapResolverFn =
40    Arc<dyn Fn(pdf_font::cmap::CMapName<'_>) -> Option<&'static [u8]> + Send + Sync>;
41/// A callback function for resolving warnings during interpretation.
42pub type WarningSinkFn = Arc<dyn Fn(InterpreterWarning) + Send + Sync>;
43
44#[derive(Clone)]
45/// Settings that should be applied during the interpretation process.
46pub struct InterpreterSettings {
47    /// Nearly every PDF contains text. In most cases, PDF files embed the fonts they use, and
48    /// pdf-interpret can therefore read the font files and do all the processing needed. However, there
49    /// are two problems:
50    /// - Fonts don't _have_ to be embedded, it's possible that the PDF file only defines the basic
51    ///   metadata of the font, like its name, but relies on the PDF processor to find that font
52    ///   in its environment.
53    /// - The PDF specification requires a list of 14 fonts that should always be available to a
54    ///   PDF processor. These include:
55    ///   - Times New Roman (Normal, Bold, Italic, `BoldItalic`)
56    ///   - Courier (Normal, Bold, Italic, `BoldItalic`)
57    ///   - Helvetica (Normal, Bold, Italic, `BoldItalic`)
58    ///   - `ZapfDingBats`
59    ///   - Symbol
60    ///
61    /// Because of this, if any of the above situations occurs, this callback will be called, which
62    /// expects the data of an appropriate font to be returned, if available. If no such font is
63    /// provided, the text will most likely fail to render.
64    ///
65    /// For the font data, there are two different formats that are accepted:
66    /// - Any valid TTF/OTF font.
67    /// - A valid CFF font program.
68    ///
69    /// The following recommendations are given for the implementation of this callback function.
70    ///
71    /// For the standard fonts, in case the original fonts are available on the system, you should
72    /// just return those. Otherwise, for Helvetica, Courier and Times New Roman, the best alternative
73    /// are the corresponding fonts of the [Liberation font family](https://github.com/liberationfonts/liberation-fonts).
74    /// If you prefer smaller fonts, you can use the [Foxit CFF fonts](https://github.com/LaurenzV/pdf-interpret/tree/master/assets/standard_fonts),
75    /// which are much smaller but are missing glyphs for certain scripts.
76    ///
77    /// For the `Symbol` and `ZapfDingBats` fonts, you should also prefer the system fonts, and if
78    /// not available to you, you can, similarly to above, use the corresponding fonts from Foxit.
79    ///
80    /// If you don't want having to deal with this, you can just enable the `embed-fonts` feature
81    /// and use the default implementation of the callback.
82    pub font_resolver: FontResolverFn,
83    /// A callback for resolving cmaps that aren't embedded.
84    ///
85    /// When the PDF requires using a cmap that is not directly embedded in the PDF,
86    /// this callback will be called to attempt fetching the data of the file.
87    ///
88    /// When the `embed-cmaps` feature is enabled, this uses `load_embedded`
89    /// method from `pdf-interpret-cmap` by default, which embeds the cmap files for
90    /// all 61 predefined cmaps
91    /// that the PDF specification requires to be readily available on a system.
92    /// Otherwise, you can implement your custom logic for lazily fetching the
93    /// data. If you are fine not supporting such PDFs, you can simply pass a closure
94    /// that always returns `None`.
95    pub cmap_resolver: CMapResolverFn,
96    /// In certain cases, `pdf-interpret` will emit a warning in case an issue was encountered while interpreting
97    /// the PDF file. Providing a callback allows you to catch those warnings and handle them, if desired.
98    pub warning_sink: WarningSinkFn,
99    /// Whether annotations should be rendered as well.
100    ///
101    /// Note that this feature is currently not fully implemented yet, so some
102    /// annotations might be missing.
103    pub render_annotations: bool,
104    /// Whether to skip `/FT /Sig` (signature widget) appearance streams.
105    ///
106    /// Rendering sets this to `true` to match MuPDF behaviour, but text
107    /// extraction should set it to `false` so that signature text is included.
108    pub skip_signature_widgets: bool,
109    /// Maximum number of content-stream operators to interpret.
110    ///
111    /// `None` preserves the historical unlimited behavior for callers that do
112    /// not configure processing limits.
113    pub max_operator_count: Option<u64>,
114}
115
116/// Known paths for CJK fonts, ordered by preference.
117/// Covers macOS, Ubuntu/Debian, Fedora/RHEL, and Alpine Linux.
118#[cfg(feature = "embed-fonts")]
119const CJK_FONT_CANDIDATE_PATHS: &[&str] = &[
120    // macOS — ships with every installation
121    "/System/Library/Fonts/Supplemental/Arial Unicode.ttf",
122    // Noto CJK — most common on Linux
123    "/usr/share/fonts/noto-cjk/NotoSansCJK-Regular.ttc",
124    "/usr/share/fonts/truetype/noto/NotoSansCJK-Regular.ttc",
125    "/usr/share/fonts/opentype/noto/NotoSansCJK-Regular.ttc",
126    "/usr/share/fonts/noto-cjk/NotoSansCJKsc-Regular.otf",
127    // WenQuanYi — fallback on older Ubuntu/Debian systems
128    "/usr/share/fonts/truetype/wqy/wqy-microhei.ttc",
129    "/usr/share/fonts/truetype/wqy/wqy-zenhei.ttc",
130    // Arphic (traditional)
131    "/usr/share/fonts/truetype/arphic/uming.ttc",
132    // Alpine Linux
133    "/usr/share/fonts/noto-cjk/NotoSansCJK-Regular.ttc",
134];
135
136/// Lazily loaded CJK system font bytes.  `None` means no CJK font was found.
137#[cfg(feature = "embed-fonts")]
138static SYSTEM_CJK_FONT: OnceLock<Option<Arc<Vec<u8>>>> = OnceLock::new();
139
140/// Try to load a CJK font from the host system, returning its raw bytes.
141#[cfg(feature = "embed-fonts")]
142fn system_cjk_font() -> Option<FontData> {
143    SYSTEM_CJK_FONT
144        .get_or_init(|| {
145            for path in CJK_FONT_CANDIDATE_PATHS {
146                if let Ok(bytes) = std::fs::read(path) {
147                    log::debug!("CJK fallback font loaded from {path}");
148                    return Some(Arc::new(bytes));
149                }
150            }
151            log::warn!(
152                "no system CJK font found; non-embedded CJK fonts will render with a Latin fallback"
153            );
154            None
155        })
156        .as_ref()
157        .map(|data| -> FontData { data.clone() })
158}
159
160impl Default for InterpreterSettings {
161    fn default() -> Self {
162        Self {
163            #[cfg(not(feature = "embed-fonts"))]
164            font_resolver: Arc::new(|_| None),
165            #[cfg(feature = "embed-fonts")]
166            font_resolver: Arc::new(|query| match query {
167                FontQuery::Standard(s) => Some(s.get_font_data()),
168                FontQuery::Fallback(f) => {
169                    // For non-embedded CJK fonts (Adobe-GB1, CNS1, Japan1, Korea1)
170                    // try a system CJK font first so characters render correctly.
171                    // This avoids the situation where a Latin fallback font is used
172                    // and Chinese/Japanese/Korean glyphs appear as "d", "a", etc.
173                    if f.character_collection
174                        .as_ref()
175                        .is_some_and(|cc| cc.family.is_cjk())
176                        && let Some(data) = system_cjk_font()
177                    {
178                        return Some((data, 0));
179                    }
180                    Some(f.pick_standard_font().get_font_data())
181                }
182            }),
183            #[cfg(feature = "embed-cmaps")]
184            cmap_resolver: Arc::new(pdf_font::cmap::load_embedded),
185            #[cfg(not(feature = "embed-cmaps"))]
186            cmap_resolver: Arc::new(|_| None),
187            warning_sink: Arc::new(|_| {}),
188            render_annotations: true,
189            skip_signature_widgets: true,
190            max_operator_count: None,
191        }
192    }
193}
194
195#[derive(Copy, Clone, Debug)]
196/// Warnings that can occur while interpreting a PDF file.
197pub enum InterpreterWarning {
198    /// An unsupported font kind was encountered.
199    ///
200    /// Currently, only CID fonts with non-identity encoding are unsupported.
201    UnsupportedFont,
202    /// An image failed to decode.
203    ImageDecodeFailure,
204}
205
206/// interpret the contents of the page and render them into the device.
207pub fn interpret_page<'a>(
208    page: &Page<'a>,
209    context: &mut Context<'a>,
210    device: &mut impl Device<'a>,
211) {
212    let resources = page.resources();
213    interpret(page.typed_operations(), resources, context, device);
214
215    if context.settings.render_annotations
216        && let Some(annot_arr) = page.raw().get::<Array<'_>>(ANNOTS)
217    {
218        for annot in annot_arr.iter::<Dict<'_>>() {
219            let flags = annot.get::<u32>(F).unwrap_or(0);
220
221            // Annotation should be hidden.
222            if flags & 2 != 0 {
223                continue;
224            }
225
226            // MuPDF renders signature widgets (/FT /Sig) with its own built-in
227            // "SIGN here" indicator and ignores the custom /AP/N stream, so we
228            // skip AP rendering for these annotations to match MuPDF output.
229            // Text extraction disables this skip so signature text is included.
230            if context.settings.skip_signature_widgets
231                && annot
232                    .get::<Name>(FT)
233                    .as_deref()
234                    .is_some_and(|n| n == b"Sig")
235            {
236                continue;
237            }
238
239            if let Some(apx) = annot
240                .get::<Dict<'_>>(AP)
241                .and_then(|ap| ap.get::<Stream<'_>>(N))
242                .and_then(|o| FormXObject::new(&o))
243            {
244                let Some(rect) = annot.get::<Rect>(RECT) else {
245                    continue;
246                };
247
248                let annot_rect = rect.to_kurbo();
249                // 12.5.5. Appearance streams
250                // "The algorithm outlined in this subclause shall be used
251                // to map from the coordinate system of the appearance XObject."
252
253                // 1) The appearance’s bounding box (specified by its BBox entry)
254                // shall be transformed, using Matrix, to produce a
255                // quadrilateral with arbitrary orientation. The transformed
256                // appearance box is the smallest upright rectangle that
257                // encompasses this quadrilateral.
258                let transformed_rect = (apx.matrix
259                    * kurbo::Rect::new(
260                        apx.bbox[0] as f64,
261                        apx.bbox[1] as f64,
262                        apx.bbox[2] as f64,
263                        apx.bbox[3] as f64,
264                    )
265                    .to_path(0.1))
266                .bounding_box();
267
268                // 2) A matrix A shall be computed that scales and translates
269                // the transformed appearance box to align with the edges
270                // of the annotation’s rectangle (specified by the Rect entry).
271                // A maps the lower-left corner (the corner with the smallest
272                // x and y coordinates) and the upper-right corner (the
273                // corner with the greatest x and y coordinates) of the
274                // transformed appearance box to the corresponding corners
275                // of the annotation’s rectangle.
276                let affine = Affine::new([
277                    annot_rect.width() / transformed_rect.width(),
278                    0.0,
279                    0.0,
280                    annot_rect.height() / transformed_rect.height(),
281                    annot_rect.x0 - transformed_rect.x0,
282                    annot_rect.y0 - transformed_rect.y0,
283                ]);
284
285                // 3) Matrix shall be concatenated with A to form a matrix
286                // AA that maps from the appearance’s coordinate system to
287                // the annotation’s rectangle in default user space.
288                context.save_state();
289                context.pre_concat_affine(affine);
290                context.push_root_transform();
291
292                draw_form_xobject(resources, &apx, context, device);
293                context.pop_root_transform();
294                context.restore_state(device);
295            }
296        }
297    }
298}
299
300/// Interpret the instructions from `ops` and render them into the device.
301pub fn interpret<'a, 'b>(
302    ops: impl Iterator<Item = TypedInstruction<'b>>,
303    resources: &Resources<'a>,
304    context: &mut Context<'a>,
305    device: &mut impl Device<'a>,
306) {
307    let num_states = context.num_states();
308    let max_operator_count = context.settings.max_operator_count.unwrap_or(u64::MAX);
309    let mut operator_count = 0_u64;
310
311    context.save_state();
312
313    for op in ops {
314        operator_count = operator_count.saturating_add(1);
315        if operator_count > max_operator_count {
316            warn!(
317                "content stream operator count exceeds {max_operator_count}, stopping interpretation"
318            );
319            break;
320        }
321
322        match op {
323            TypedInstruction::SaveState(_) => context.save_state(),
324            TypedInstruction::StrokeColorDeviceRgb(s) => {
325                context.get_mut().graphics_state.stroke_cs = ColorSpace::device_rgb();
326                context.get_mut().graphics_state.stroke_color =
327                    smallvec![s.0.as_f32(), s.1.as_f32(), s.2.as_f32()];
328            }
329            TypedInstruction::StrokeColorDeviceGray(s) => {
330                context.get_mut().graphics_state.stroke_cs = ColorSpace::device_gray();
331                context.get_mut().graphics_state.stroke_color = smallvec![s.0.as_f32()];
332            }
333            TypedInstruction::StrokeColorCmyk(s) => {
334                context.get_mut().graphics_state.stroke_cs = ColorSpace::device_cmyk();
335                context.get_mut().graphics_state.stroke_color =
336                    smallvec![s.0.as_f32(), s.1.as_f32(), s.2.as_f32(), s.3.as_f32()];
337            }
338            TypedInstruction::LineWidth(w) => {
339                context.get_mut().graphics_state.stroke_props.line_width = w.0.as_f32();
340            }
341            TypedInstruction::LineCap(c) => {
342                context.get_mut().graphics_state.stroke_props.line_cap = convert_line_cap(c);
343            }
344            TypedInstruction::LineJoin(j) => {
345                context.get_mut().graphics_state.stroke_props.line_join = convert_line_join(j);
346            }
347            TypedInstruction::MiterLimit(l) => {
348                context.get_mut().graphics_state.stroke_props.miter_limit = l.0.as_f32();
349            }
350            TypedInstruction::Transform(t) => {
351                context.pre_concat_transform(t);
352            }
353            TypedInstruction::RectPath(r) => {
354                let rect = kurbo::Rect::new(
355                    r.0.as_f64(),
356                    r.1.as_f64(),
357                    r.0.as_f64() + r.2.as_f64(),
358                    r.1.as_f64() + r.3.as_f64(),
359                )
360                .to_path(0.1);
361                context.path_mut().extend(rect);
362            }
363            TypedInstruction::MoveTo(m) => {
364                let p = Point::new(m.0.as_f64(), m.1.as_f64());
365                *(context.last_point_mut()) = p;
366                *(context.sub_path_start_mut()) = p;
367                context.path_mut().move_to(p);
368            }
369            TypedInstruction::FillPathEvenOdd(_) => {
370                fill_path(context, device, FillRule::EvenOdd);
371            }
372            TypedInstruction::FillPathNonZero(_) => {
373                fill_path(context, device, FillRule::NonZero);
374            }
375            TypedInstruction::FillPathNonZeroCompatibility(_) => {
376                fill_path(context, device, FillRule::NonZero);
377            }
378            TypedInstruction::FillAndStrokeEvenOdd(_) => {
379                fill_stroke_path(context, device, FillRule::EvenOdd);
380            }
381            TypedInstruction::FillAndStrokeNonZero(_) => {
382                fill_stroke_path(context, device, FillRule::NonZero);
383            }
384            TypedInstruction::CloseAndStrokePath(_) => {
385                close_path(context);
386                stroke_path(context, device);
387            }
388            TypedInstruction::CloseFillAndStrokeEvenOdd(_) => {
389                close_path(context);
390                fill_stroke_path(context, device, FillRule::EvenOdd);
391            }
392            TypedInstruction::CloseFillAndStrokeNonZero(_) => {
393                close_path(context);
394                fill_stroke_path(context, device, FillRule::NonZero);
395            }
396            TypedInstruction::NonStrokeColorDeviceGray(s) => {
397                context.get_mut().graphics_state.none_stroke_cs = ColorSpace::device_gray();
398                context.get_mut().graphics_state.non_stroke_color = smallvec![s.0.as_f32()];
399            }
400            TypedInstruction::NonStrokeColorDeviceRgb(s) => {
401                context.get_mut().graphics_state.none_stroke_cs = ColorSpace::device_rgb();
402                context.get_mut().graphics_state.non_stroke_color =
403                    smallvec![s.0.as_f32(), s.1.as_f32(), s.2.as_f32()];
404            }
405            TypedInstruction::NonStrokeColorCmyk(s) => {
406                context.get_mut().graphics_state.none_stroke_cs = ColorSpace::device_cmyk();
407                context.get_mut().graphics_state.non_stroke_color =
408                    smallvec![s.0.as_f32(), s.1.as_f32(), s.2.as_f32(), s.3.as_f32()];
409            }
410            TypedInstruction::LineTo(m) => {
411                if !context.path().elements().is_empty() {
412                    let last_point = *context.last_point();
413                    let mut p = Point::new(m.0.as_f64(), m.1.as_f64());
414                    *(context.last_point_mut()) = p;
415                    if last_point == p {
416                        // Add a small delta so that zero width lines can still have a round stroke.
417                        p.x += 0.0001;
418                    }
419
420                    context.path_mut().line_to(p);
421                }
422            }
423            TypedInstruction::CubicTo(c) => {
424                if !context.path().elements().is_empty() {
425                    let p1 = Point::new(c.0.as_f64(), c.1.as_f64());
426                    let p2 = Point::new(c.2.as_f64(), c.3.as_f64());
427                    let p3 = Point::new(c.4.as_f64(), c.5.as_f64());
428
429                    *(context.last_point_mut()) = p3;
430
431                    context.path_mut().curve_to(p1, p2, p3);
432                }
433            }
434            TypedInstruction::CubicStartTo(c) => {
435                if !context.path().elements().is_empty() {
436                    let p1 = *context.last_point();
437                    let p2 = Point::new(c.0.as_f64(), c.1.as_f64());
438                    let p3 = Point::new(c.2.as_f64(), c.3.as_f64());
439
440                    *(context.last_point_mut()) = p3;
441
442                    context.path_mut().curve_to(p1, p2, p3);
443                }
444            }
445            TypedInstruction::CubicEndTo(c) => {
446                if !context.path().elements().is_empty() {
447                    let p2 = Point::new(c.0.as_f64(), c.1.as_f64());
448                    let p3 = Point::new(c.2.as_f64(), c.3.as_f64());
449
450                    *(context.last_point_mut()) = p3;
451
452                    context.path_mut().curve_to(p2, p3, p3);
453                }
454            }
455            TypedInstruction::ClosePath(_) => {
456                close_path(context);
457            }
458            TypedInstruction::SetGraphicsState(gs) => {
459                if let Some(gs) = resources
460                    .get_ext_g_state(gs.0.clone())
461                    .warn_none(&format!("failed to get extgstate {}", gs.0.as_str()))
462                {
463                    handle_gs(&gs, context, resources);
464                }
465            }
466            TypedInstruction::StrokePath(_) => {
467                stroke_path(context, device);
468            }
469            TypedInstruction::EndPath(_) => {
470                if let Some(clip) = *context.clip()
471                    && !context.path().elements().is_empty()
472                {
473                    let clip_path = context.get().ctm * context.path().clone();
474                    context.push_clip_path(clip_path, clip, device);
475
476                    *(context.clip_mut()) = None;
477                }
478
479                context.path_mut().truncate(0);
480            }
481            TypedInstruction::NonStrokeColor(c) => {
482                let fill_c = &mut context.get_mut().graphics_state.non_stroke_color;
483                fill_c.truncate(0);
484
485                for e in c.0 {
486                    fill_c.push(e.as_f32());
487                }
488            }
489            TypedInstruction::StrokeColor(c) => {
490                let stroke_c = &mut context.get_mut().graphics_state.stroke_color;
491                stroke_c.truncate(0);
492
493                for e in c.0 {
494                    stroke_c.push(e.as_f32());
495                }
496            }
497            TypedInstruction::ClipNonZero(_) => {
498                *(context.clip_mut()) = Some(FillRule::NonZero);
499            }
500            TypedInstruction::ClipEvenOdd(_) => {
501                *(context.clip_mut()) = Some(FillRule::EvenOdd);
502            }
503            TypedInstruction::RestoreState(_) => context.restore_state(device),
504            TypedInstruction::FlatnessTolerance(_) => {
505                // Ignore for now.
506            }
507            TypedInstruction::ColorSpaceStroke(c) => {
508                let cs = if let Some(named) = ColorSpace::new_from_name(c.0.clone()) {
509                    named
510                } else {
511                    context
512                        .get_color_space(resources, c.0)
513                        .unwrap_or(ColorSpace::device_gray())
514                };
515
516                context.get_mut().graphics_state.stroke_color = cs.initial_color();
517                context.get_mut().graphics_state.stroke_cs = cs;
518            }
519            TypedInstruction::ColorSpaceNonStroke(c) => {
520                let cs = if let Some(named) = ColorSpace::new_from_name(c.0.clone()) {
521                    named
522                } else {
523                    context
524                        .get_color_space(resources, c.0)
525                        .unwrap_or(ColorSpace::device_gray())
526                };
527
528                context.get_mut().graphics_state.non_stroke_color = cs.initial_color();
529                context.get_mut().graphics_state.none_stroke_cs = cs;
530            }
531            TypedInstruction::DashPattern(p) => {
532                context.get_mut().graphics_state.stroke_props.dash_offset = p.1.as_f32();
533                // kurbo apparently cannot properly deal with offsets that are exactly 0.
534                context.get_mut().graphics_state.stroke_props.dash_array =
535                    p.0.iter::<f32>()
536                        .map(|n| if n == 0.0 { 0.01 } else { n })
537                        .collect();
538            }
539            TypedInstruction::RenderingIntent(_) => {
540                // Ignore for now.
541            }
542            TypedInstruction::NonStrokeColorNamed(n) => {
543                context.get_mut().graphics_state.non_stroke_color =
544                    n.0.into_iter().map(|n| n.as_f32()).collect();
545                context.get_mut().graphics_state.non_stroke_pattern = n.1.and_then(|name| {
546                    resources
547                        .get_pattern(name)
548                        .and_then(|d| Pattern::new(d, context, resources))
549                });
550            }
551            TypedInstruction::StrokeColorNamed(n) => {
552                context.get_mut().graphics_state.stroke_color =
553                    n.0.into_iter().map(|n| n.as_f32()).collect();
554                context.get_mut().graphics_state.stroke_pattern = n.1.and_then(|name| {
555                    resources
556                        .get_pattern(name)
557                        .and_then(|d| Pattern::new(d, context, resources))
558                });
559            }
560            TypedInstruction::BeginMarkedContentWithProperties(bdc) => {
561                // Properties can be either:
562                // 1. A Name that references an entry in the Resources/Properties dictionary
563                // 2. An inline dictionary with an OC key
564
565                let mcid = dict_or_stream(&bdc.1).and_then(|(props, _)| props.get::<i32>(MCID));
566
567                let oc = bdc
568                    .1
569                    .clone()
570                    .into_name()
571                    .and_then(|name| {
572                        let r = resources.properties.get_ref(name.clone())?;
573                        let d = resources
574                            .properties
575                            .get::<Dict<'_>>(name)
576                            .unwrap_or_default();
577                        Some((d, r))
578                    })
579                    .or_else(|| {
580                        let (props, _) = dict_or_stream(&bdc.1)?;
581                        let r = props.get_ref(OC)?;
582                        let d = props.get::<Dict<'_>>(OC).unwrap_or_default();
583                        Some((d, r))
584                    });
585
586                if let Some((dict, oc_ref)) = oc {
587                    context.ocg_state.begin_ocg(&dict, oc_ref.into());
588                } else {
589                    context.ocg_state.begin_marked_content();
590                }
591
592                device.begin_marked_content(&bdc.0, mcid);
593            }
594            TypedInstruction::MarkedContentPointWithProperties(_) => {}
595            TypedInstruction::EndMarkedContent(_) => {
596                context.ocg_state.end_marked_content();
597                device.end_marked_content();
598            }
599            TypedInstruction::MarkedContentPoint(_) => {}
600            TypedInstruction::BeginMarkedContent(bmc) => {
601                context.ocg_state.begin_marked_content();
602                device.begin_marked_content(&bmc.0, None);
603            }
604            TypedInstruction::BeginText(_) => {
605                context.get_mut().text_state.text_matrix = Affine::IDENTITY;
606                context.get_mut().text_state.text_line_matrix = Affine::IDENTITY;
607            }
608            TypedInstruction::SetTextMatrix(m) => {
609                let m = Affine::new([
610                    m.0.as_f64(),
611                    m.1.as_f64(),
612                    m.2.as_f64(),
613                    m.3.as_f64(),
614                    m.4.as_f64(),
615                    m.5.as_f64(),
616                ]);
617                context.get_mut().text_state.text_line_matrix = m;
618                context.get_mut().text_state.text_matrix = m;
619            }
620            TypedInstruction::EndText(_) => {
621                let has_outline = context
622                    .get()
623                    .text_state
624                    .clip_paths
625                    .segments()
626                    .next()
627                    .is_some();
628
629                if has_outline {
630                    let clip_path = context.get().ctm * context.get().text_state.clip_paths.clone();
631
632                    context.push_clip_path(clip_path, FillRule::NonZero, device);
633                }
634
635                context.get_mut().text_state.clip_paths.truncate(0);
636            }
637            TypedInstruction::TextFont(t) => {
638                let name = t.0;
639
640                // In case we are unable to resolve the font, two scenarios:
641                // 1) If the font doesn't exist in the first place in the resource dictionary,
642                // assume Helvetica (this seems to be what other PDF viewers do).
643                // 2) In case it's `None` because we were unable to resolve the font
644                // (for whatever reason), leave it as `None`. Better showing no
645                // text at all than garbage text.
646                let font = if let Some(font_dict) = resources.get_font(name.clone()) {
647                    context.resolve_font(&font_dict)
648                } else {
649                    Font::new_standard(StandardFont::Helvetica, &context.settings.font_resolver)
650                        .map(TextStateFont::Fallback)
651                };
652
653                context.get_mut().text_state.font_size = t.1.as_f32();
654                context.get_mut().text_state.font = font;
655            }
656            TypedInstruction::ShowText(s) => {
657                if context.get().text_state.font.is_none() {
658                    // Even if no explicit font was set, we try to assume Helvetica. Acrobat
659                    // seems to do the same.
660                    context.get_mut().text_state.font = Font::new_standard(
661                        StandardFont::Helvetica,
662                        &context.settings.font_resolver,
663                    )
664                    .map(TextStateFont::Fallback);
665                }
666
667                text::show_text_string(context, device, resources, s.0);
668            }
669            TypedInstruction::ShowTexts(s) => {
670                if context.get().text_state.font.is_none() {
671                    // Even if no explicit font was set, we try to assume Helvetica. Acrobat
672                    // seems to do the same.
673                    context.get_mut().text_state.font = Font::new_standard(
674                        StandardFont::Helvetica,
675                        &context.settings.font_resolver,
676                    )
677                    .map(TextStateFont::Fallback);
678                }
679
680                for obj in s.0.iter::<Object<'_>>() {
681                    if let Some(adjustment) = obj.clone().into_f32() {
682                        // ANN[r17/TEX1] Surface TJ adjustment to the Device
683                        // before mutating the text matrix so extractors can
684                        // record the word-boundary signal alongside the
685                        // spatial gap they'd otherwise have to infer.
686                        device.text_adjustment(adjustment);
687                        context.get_mut().text_state.apply_adjustment(adjustment);
688                    } else if let Some(text) = obj.into_string() {
689                        text::show_text_string(context, device, resources, text);
690                    }
691                }
692            }
693            TypedInstruction::HorizontalScaling(h) => {
694                context.get_mut().text_state.horizontal_scaling = h.0.as_f32();
695            }
696            TypedInstruction::TextLeading(tl) => {
697                context.get_mut().text_state.leading = tl.0.as_f32();
698            }
699            TypedInstruction::CharacterSpacing(c) => {
700                context.get_mut().text_state.char_space = c.0.as_f32();
701            }
702            TypedInstruction::WordSpacing(w) => {
703                context.get_mut().text_state.word_space = w.0.as_f32();
704            }
705            TypedInstruction::NextLine(n) => {
706                let (tx, ty) = (n.0.as_f64(), n.1.as_f64());
707                text::next_line(context, tx, ty);
708            }
709            TypedInstruction::NextLineUsingLeading(_) => {
710                text::next_line(context, 0.0, -context.get().text_state.leading as f64);
711            }
712            TypedInstruction::NextLineAndShowText(n) => {
713                text::next_line(context, 0.0, -context.get().text_state.leading as f64);
714                text::show_text_string(context, device, resources, n.0);
715            }
716            TypedInstruction::TextRenderingMode(r) => {
717                let mode = match r.0.as_i64() {
718                    0 => TextRenderingMode::Fill,
719                    1 => TextRenderingMode::Stroke,
720                    2 => TextRenderingMode::FillStroke,
721                    3 => TextRenderingMode::Invisible,
722                    4 => TextRenderingMode::FillAndClip,
723                    5 => TextRenderingMode::StrokeAndClip,
724                    6 => TextRenderingMode::FillAndStrokeAndClip,
725                    7 => TextRenderingMode::Clip,
726                    _ => {
727                        warn!("unknown text rendering mode {}", r.0.as_i64());
728
729                        TextRenderingMode::Fill
730                    }
731                };
732
733                context.get_mut().text_state.render_mode = mode;
734            }
735            TypedInstruction::NextLineAndSetLeading(n) => {
736                let (tx, ty) = (n.0.as_f64(), n.1.as_f64());
737                context.get_mut().text_state.leading = -ty as f32;
738                text::next_line(context, tx, ty);
739            }
740            // d1: uncolored (shape) glyph header.  The advance width (wx) and
741            // bounding-box arguments are intentionally ignored here: the glyph
742            // advance is taken from the Type3 font's /Widths array (via
743            // Font::code_advance), and the is_shape_glyph flag is determined
744            // by the pre-scan in Type3::render_glyph before the stream is
745            // interpreted.
746            TypedInstruction::ShapeGlyph(_) => {}
747            TypedInstruction::XObject(x) => {
748                let cache = context.object_cache.clone();
749                let transfer_function = context.get().graphics_state.transfer_function.clone();
750                if let Some(x_object) = resources.get_x_object(x.0).and_then(|s| {
751                    XObject::new(
752                        &s,
753                        &context.settings.warning_sink,
754                        &cache,
755                        transfer_function.clone(),
756                    )
757                }) {
758                    draw_xobject(&x_object, resources, context, device);
759                }
760            }
761            TypedInstruction::InlineImage(i) => {
762                let warning_sink = context.settings.warning_sink.clone();
763                let transfer_function = context.get().graphics_state.transfer_function.clone();
764                let cache = context.object_cache.clone();
765                if let Some(x_object) = ImageXObject::new(
766                    &i.0,
767                    |name| context.get_color_space(resources, name.clone()),
768                    &warning_sink,
769                    &cache,
770                    false,
771                    transfer_function,
772                ) {
773                    draw_image_xobject(&x_object, context, device);
774                }
775            }
776            TypedInstruction::TextRise(t) => {
777                context.get_mut().text_state.rise = t.0.as_f32();
778            }
779            TypedInstruction::Shading(s) => {
780                if !context.ocg_state.is_visible() {
781                    continue;
782                }
783
784                let transfer_function = context.get().graphics_state.transfer_function.clone();
785
786                if let Some(sp) = resources
787                    .get_shading(s.0)
788                    .and_then(|o| dict_or_stream(&o))
789                    .and_then(|s| Shading::new(&s.0, s.1.as_ref(), &context.object_cache))
790                    .map(|s| {
791                        Pattern::Shading(ShadingPattern {
792                            shading: Arc::new(s),
793                            matrix: Affine::IDENTITY,
794                            opacity: context.get().graphics_state.non_stroke_alpha,
795                            transfer_function: transfer_function.clone(),
796                        })
797                    })
798                {
799                    context.save_state();
800                    context.push_root_transform();
801                    let st = context.get_mut();
802                    st.graphics_state.non_stroke_pattern = Some(sp);
803                    st.graphics_state.none_stroke_cs = ColorSpace::pattern();
804
805                    device.set_soft_mask(st.graphics_state.soft_mask.clone());
806                    device.set_blend_mode(st.graphics_state.blend_mode);
807
808                    let bbox = context.bbox().to_path(0.1);
809                    let inverted_bbox = context.get().ctm.inverse() * bbox;
810                    fill_path_impl(context, device, FillRule::NonZero, Some(&inverted_bbox));
811
812                    context.pop_root_transform();
813                    context.restore_state(device);
814                } else {
815                    warn!("failed to process shading");
816                }
817            }
818            TypedInstruction::BeginCompatibility(_) => {}
819            TypedInstruction::EndCompatibility(_) => {}
820            // d0: colored glyph header.  The advance width (wx) argument is
821            // intentionally ignored here for the same reason as d1 above.
822            TypedInstruction::ColorGlyph(_) => {}
823            TypedInstruction::ShowTextWithParameters(t) => {
824                context.get_mut().text_state.word_space = t.0.as_f32();
825                context.get_mut().text_state.char_space = t.1.as_f32();
826                text::next_line(context, 0.0, -context.get().text_state.leading as f64);
827                text::show_text_string(context, device, resources, t.2);
828            }
829            _ => {
830                warn!("failed to read an operator");
831            }
832        }
833    }
834
835    while context.num_states() > num_states {
836        context.restore_state(device);
837    }
838}