Skip to main content

printpdf/
xobject.rs

1use std::collections::BTreeMap;
2
3use lopdf::StringFormat;
4use serde_derive::{Deserialize, Serialize};
5
6use crate::{
7    date::OffsetDateTime,
8    deserialize::PageState,
9    image_types::{RawImage, ImageOptimizationOptions},
10    matrix::CurTransMat,
11    units::{Pt, Px},
12    Op,
13};
14
15/* Parent: Resources dictionary of the page */
16/// External object that gets reference outside the PDF content stream
17/// Gets constructed similar to the `ExtGState`, then inserted into the `/XObject` dictionary
18/// on the page. You can instantiate `XObjects` with the `/Do` operator. The `layer.add_xobject()`
19/// (or better yet, the `layer.add_image()`, `layer.add_form()`) methods will do this for you.
20#[derive(Debug, PartialEq, Clone, Serialize, Deserialize)]
21#[serde(rename_all = "kebab-case", tag = "type", content = "data")]
22pub enum XObject {
23    /// Image XObject, for images
24    Image(RawImage),
25    /// Form XObject, NOT A PDF FORM, this just allows repeatable content
26    /// on a page
27    Form(FormXObject),
28    /// XObject embedded from an external stream
29    ///
30    /// This is mainly used to add XObjects to the resources that the library
31    /// doesn't support natively (such as gradients, patterns, etc).
32    ///
33    /// The only thing this does is to ensure that this stream is set on
34    /// the /Resources dictionary of the page. The `XObjectRef` returned
35    /// by `add_xobject()` is the unique name that can be used to invoke
36    /// the `/Do` operator (by the `use_xobject`)
37    External(ExternalXObject),
38}
39
40impl XObject {
41    pub fn get_width_height(&self) -> Option<(Px, Px)> {
42        match self {
43            XObject::Image(raw_image) => Some((Px(raw_image.width), Px(raw_image.height))),
44            XObject::Form(form_xobject) => form_xobject.size,
45            XObject::External(external_xobject) => {
46                Some((external_xobject.width?, external_xobject.height?))
47            }
48        }
49    }
50}
51
52// translates the xobject to a document object ID
53pub(crate) fn add_xobject_to_document(
54    xobj: &XObject,
55    doc: &mut lopdf::Document,
56    _image_opts: Option<&ImageOptimizationOptions>,
57) -> lopdf::ObjectId {
58    // in the PDF content stream, reference an XObject like this
59    match xobj {
60        XObject::Image(_i) => {
61            #[cfg(feature = "images")]
62            {
63                let stream = crate::image::image_to_stream(_i.clone(), doc, _image_opts);
64                doc.add_object(stream)
65            }
66            #[cfg(not(feature = "images"))]
67            {
68                panic!("Image XObjects require the 'images' feature");
69            }
70        }
71        XObject::Form(f) => {
72            let stream = form_xobject_to_stream(f, doc);
73            doc.add_object(stream)
74        }
75        XObject::External(external_xobject) => {
76            let stream = external_xobject.stream.into_lopdf();
77            doc.add_object(stream)
78        }
79    }
80}
81
82/// External XObject, invoked by `/Do` graphics operator
83#[derive(Debug, PartialEq, Clone, Serialize, Deserialize)]
84#[serde(rename_all = "camelCase")]
85pub struct ExternalXObject {
86    /// External stream of graphics operations
87    pub stream: ExternalStream,
88    /// Optional width
89    #[serde(default)]
90    pub width: Option<Px>,
91    /// Optional height
92    #[serde(default)]
93    pub height: Option<Px>,
94    /// Optional DPI of the object
95    #[serde(default)]
96    pub dpi: Option<f32>,
97}
98
99#[derive(Debug, PartialEq, Default, Clone, Serialize, Deserialize)]
100#[serde(rename_all = "camelCase")]
101pub struct ExternalStream {
102    /// Stream description, for simplicity a simple map, corresponds to PDF dict
103    pub dict: BTreeMap<String, DictItem>,
104    /// Stream content
105    pub content: Vec<u8>,
106    /// Whether the stream can be compressed
107    pub compress: bool,
108}
109
110impl ExternalStream {
111    pub(crate) fn into_lopdf(&self) -> lopdf::Stream {
112        lopdf::Stream::new(build_dict(&self.dict), self.content.clone())
113            .with_compression(self.compress)
114    }
115    pub fn decompressed_content(&self) -> Vec<u8> {
116        self.into_lopdf()
117            .decompressed_content()
118            .unwrap_or(self.content.clone())
119    }
120
121    /// Decode a stream of `Op` from a string (usually to debug PDF issues)
122    pub fn decode_ops(s: &str) -> Result<Vec<Op>, String> {
123        Self::get_ops_internal(s.as_bytes())
124    }
125
126    /// If the stream is decodable as PDF operations, return the operations of the stream
127    pub fn get_ops(&self) -> Result<Vec<Op>, String> {
128        Self::get_ops_internal(&self.decompressed_content())
129    }
130
131    fn get_ops_internal(s: &[u8]) -> Result<Vec<Op>, String> {
132        // Decode the content stream into a vector of lopdf operations.
133        let content = lopdf::content::Content::decode(&s)
134            .map_err(|e| format!("Failed to decode content stream: {}", e))?;
135
136        // Convert lopdf operations to printpdf Ops.
137        let mut page_state = PageState::default();
138        let mut printpdf_ops = Vec::new();
139
140        for (op_id, op) in content.operations.iter().enumerate() {
141            let parsed_op = crate::deserialize::parse_op(
142                0,
143                op_id,
144                &op,
145                &mut page_state,
146                &BTreeMap::new(),
147                &mut Vec::new(),
148            )?;
149            printpdf_ops.extend(parsed_op.into_iter());
150        }
151
152        Ok(printpdf_ops)
153    }
154}
155
156/// Simplified dict item for external streams
157#[derive(Debug, PartialEq, Clone, Serialize, Deserialize)]
158#[serde(rename_all = "kebab-case", tag = "type", content = "data")]
159pub enum DictItem {
160    Array(Vec<DictItem>),
161    String { data: Vec<u8>, literal: bool },
162    Bytes(Vec<u8>),
163    Bool(bool),
164    Float(f32),
165    Int(i64),
166    Real(f32),
167    Name(Vec<u8>),
168    Ref { obj: u32, gen: u16 },
169    Dict { map: BTreeMap<String, DictItem> },
170    Stream { stream: ExternalStream },
171    Null,
172}
173
174impl DictItem {
175    pub fn to_lopdf(&self) -> lopdf::Object {
176        use lopdf::{Object, StringFormat};
177        match self {
178            DictItem::Array(items) => {
179                let objs = items.iter().map(|item| item.to_lopdf()).collect();
180                Object::Array(objs)
181            }
182            DictItem::String { data, literal } => {
183                let format = if *literal {
184                    StringFormat::Literal
185                } else {
186                    StringFormat::Hexadecimal
187                };
188                Object::String(data.clone(), format)
189            }
190            DictItem::Bytes(data) => {
191                // Treat bytes as a hexadecimal string.
192                Object::String(data.clone(), StringFormat::Hexadecimal)
193            }
194            DictItem::Bool(b) => Object::Boolean(*b),
195            DictItem::Float(f) => Object::Real(*f),
196            DictItem::Int(i) => Object::Integer(*i),
197            DictItem::Real(f) => Object::Real(*f),
198            DictItem::Name(name) => Object::Name(name.clone()),
199            DictItem::Ref { obj, gen } => Object::Reference((*obj, *gen)),
200            DictItem::Dict { map } => {
201                let dict = map
202                    .iter()
203                    .map(|(k, v)| (k.as_bytes().to_vec(), v.to_lopdf()))
204                    .collect();
205                Object::Dictionary(dict)
206            }
207            DictItem::Stream { stream } => {
208                let stream_obj = stream.into_lopdf();
209                Object::Stream(stream_obj)
210            }
211            DictItem::Null => Object::Null,
212        }
213    }
214
215    pub fn from_lopdf(o: &lopdf::Object) -> Self {
216        use lopdf::Object;
217        match o {
218            Object::Null => DictItem::Null,
219            Object::Boolean(t) => DictItem::Bool(*t),
220            Object::Integer(i) => DictItem::Int(*i),
221            Object::Real(r) => DictItem::Real(*r),
222            Object::Name(items) => DictItem::Name(items.clone()),
223            Object::String(items, string_format) => DictItem::String {
224                data: items.clone(),
225                literal: *string_format == StringFormat::Literal,
226            },
227            Object::Array(objects) => {
228                DictItem::Array(objects.iter().map(DictItem::from_lopdf).collect())
229            }
230            Object::Dictionary(dictionary) => DictItem::Dict {
231                map: dictionary
232                    .iter()
233                    .map(|s| {
234                        (
235                            String::from_utf8_lossy(&s.0).to_string(),
236                            DictItem::from_lopdf(s.1),
237                        )
238                    })
239                    .collect(),
240            },
241            Object::Stream(stream) => DictItem::Stream {
242                stream: ExternalStream {
243                    compress: stream.allows_compression,
244                    content: stream.content.clone(),
245                    dict: stream
246                        .dict
247                        .iter()
248                        .map(|s| {
249                            (
250                                String::from_utf8_lossy(&s.0).to_string(),
251                                DictItem::from_lopdf(s.1),
252                            )
253                        })
254                        .collect(),
255                },
256            },
257            Object::Reference((a, b)) => DictItem::Ref { obj: *a, gen: *b },
258        }
259    }
260}
261
262/// Describes the format the image bytes are compressed with.
263#[derive(Debug, PartialEq, Copy, Clone)]
264pub enum ImageFilter {
265    /// ???
266    Ascii85,
267    /// Lempel Ziv Welch compression, i.e. zip
268    Lzw,
269    /// Discrete Cosinus Transform, JPEG Baseline.
270    DCT,
271    /// JPEG2000 aka JPX wavelet based compression.
272    JPX,
273}
274
275/// __THIS IS NOT A PDF FORM!__ A form `XObject` can be nearly everything.
276/// PDF allows you to reuse content for the graphics stream in a `FormXObject`.
277/// A `FormXObject` is basically a layer-like content stream and can contain anything
278/// as long as it's a valid strem. A `FormXObject` is intended to be used for reapeated
279/// content on one page.
280#[derive(Debug, PartialEq, Clone, Serialize, Deserialize)]
281#[serde(rename_all = "camelCase")]
282pub struct FormXObject {
283    // /Type /XObject
284    // /Subtype /Form
285    // /FormType Integer
286    /// Form type (currently only Type1)
287    pub form_type: FormType,
288    /// Optional width / height, affects the width / height on instantiation
289    pub size: Option<(Px, Px)>,
290    /// The actual content of this FormXObject
291    pub bytes: Vec<u8>,
292    /* /Matrix [Integer , 6] */
293    /// Optional matrix, maps the form into user space
294    pub matrix: Option<CurTransMat>,
295    /* /Resources << dictionary >> */
296    /// (Optional but strongly recommended; PDF 1.2) A dictionary specifying
297    /// any resources (such as fonts and images) required by the form XObject
298    /// (see Section 3.7, “Content Streams and Resources”).
299    ///
300    /// In PDF 1.1 and earlier, all named resources used in the form XObject must be
301    /// included in the resource dictionary of each page object on which the form
302    /// XObject appears, regardless of whether they also appear in the resource
303    /// dictionary of the form XObject. It can be useful to specify these resources
304    /// in the form XObject’s resource dictionary as well, to determine which resources
305    /// are used inside the form XObject. If a resource is included in both dictionaries,
306    /// it should have the same name in both locations.
307    ///  /// In PDF 1.2 and later versions, form XObjects can be independent of the content
308    /// streams in which they appear, and this is strongly recommended although not
309    /// required. In an independent form XObject, the resource dictionary of the form
310    /// XObject is required and contains all named resources used by the form XObject.
311    /// These resources are not promoted to the outer content stream’s resource
312    /// dictionary, although that stream’s resource dictionary refers to the form XObject.
313    pub resources: Option<BTreeMap<String, DictItem>>,
314    /* /Group << dictionary >> */
315    /// (Optional; PDF 1.4) A group attributes dictionary indicating that the contents of the
316    /// form XObject are to be treated as a group and specifying the attributes of that group
317    /// (see Section 4.9.2, “Group XObjects”).
318    ///
319    /// Note: If a Ref entry (see below) is present, the group attributes also apply to the
320    /// external page imported by that entry, which allows such an imported page to be treated
321    /// as a group without further modification.
322    pub group: Option<GroupXObject>,
323    /* /Ref << dictionary >> */
324    /// (Optional; PDF 1.4) A reference dictionary identifying a page to be imported from another
325    /// PDF file, and for which the form XObject serves as a proxy (see Section 4.9.3, “Reference
326    /// XObjects”).
327    pub ref_dict: Option<BTreeMap<String, DictItem>>,
328    /* /Metadata [stream] */
329    /// (Optional; PDF 1.4) A metadata stream containing metadata for the form XObject
330    /// (see Section 10.2.2, “Metadata Streams”).
331    pub metadata: Option<BTreeMap<String, DictItem>>,
332    /* /PieceInfo << dictionary >> */
333    /// (Optional; PDF 1.3) A page-piece dictionary associated with the form XObject
334    /// (see Section 10.4, “Page-Piece Dictionaries”).
335    pub piece_info: Option<BTreeMap<String, DictItem>>,
336    /* /LastModified (date) */
337    /// (Required if PieceInfo is present; optional otherwise; PDF 1.3) The date and time
338    /// (see Section 3.8.3, “Dates”) when the form XObject’s contents were most recently
339    /// modified. If a page-piece dictionary (PieceInfo) is present, the modification date
340    /// is used to ascertain which of the application data dictionaries it contains correspond
341    /// to the current content of the form (see Section 10.4, “Page-Piece Dictionaries”).
342    pub last_modified: Option<OffsetDateTime>,
343    /* /StructParent integer */
344    /// (Required if the form XObject is a structural content item; PDF 1.3) The integer key of
345    /// the form XObject’s entry in the structural parent tree (see “Finding Structure Elements
346    /// from Content Items” on page 868).
347    pub struct_parent: Option<i64>,
348    /* /StructParents integer */
349    /// __(Required if the form XObject contains marked-content sequences that are structural
350    /// content items; PDF 1.3)__ The integer key of the form XObject’s entry in the structural
351    /// parent tree (see “Finding Structure Elements from Content Items” on page 868).
352    ///
353    /// __Note:__ At most one of the entries StructParent or StructParents may be present. A form
354    /// XObject can be either a content item in its entirety or a container for marked-content
355    /// sequences that are content items, but not both.
356    pub struct_parents: Option<i64>,
357    /* /OPI << dictionary >> */
358    /// (Optional; PDF 1.2) An OPI version dictionary for the form XObject
359    /// (see Section 10.10.6, “Open Prepress Interface (OPI)”).
360    pub opi: Option<BTreeMap<String, DictItem>>,
361    /// (Optional; PDF 1.5) An optional content group or optional content membership dictionary
362    /// (see Section 4.10, “Optional Content”) specifying the optional content properties for
363    /// the form XObject. Before the form is processed, its visibility is determined based on
364    /// this entry. If it is determined to be invisible, the entire form is skipped, as if there
365    /// were no Do operator to invoke it.
366    pub oc: Option<BTreeMap<String, DictItem>>,
367    /* /Name /MyName */
368    /// __(Required in PDF 1.0; optional otherwise)__ The name by which this form XObject is
369    /// referenced in the XObject subdictionary of the current resource dictionary
370    /// (see Section 3.7.2, “Resource Dictionaries”).
371    /// __Note:__ This entry is obsolescent and its use is no longer recommended.
372    /// (See implementation note 55 in Appendix H.)
373    pub name: Option<String>,
374}
375
376fn form_xobject_to_stream(f: &FormXObject, doc: &mut lopdf::Document) -> lopdf::Stream {
377    use lopdf::Object::{String as LoString, *};
378
379    let mut dict = lopdf::Dictionary::from_iter(vec![
380        ("Type", Name("XObject".into())),
381        ("Subtype", Name("Form".into())),
382        ("FormType", Name(f.form_type.get_id().into())),
383    ]);
384
385    if let Some(matrix) = f.matrix.as_ref() {
386        dict.set(
387            "Matrix",
388            Array(matrix.as_array().into_iter().map(Real).collect()),
389        );
390    }
391
392    if let Some(res) = f.resources.as_ref() {
393        dict.set("Resources", build_dict(res));
394    }
395
396    if let Some(g) = f.group.as_ref() {
397        let group_dict = lopdf::Dictionary::from_iter(vec![
398            ("Type", Name("Group".into())),
399            ("S", Name(g.group_type.get_id().into())),
400        ]);
401
402        dict.set("Group", Dictionary(group_dict));
403    }
404
405    if let Some(r) = f.ref_dict.as_ref() {
406        dict.set("Ref", build_dict(&r));
407    }
408
409    if let Some(r) = f.metadata.as_ref() {
410        dict.set("Metadata", doc.add_object(build_dict(&r)));
411    }
412
413    if let Some(r) = f.piece_info.as_ref() {
414        dict.set("PieceInfo", doc.add_object(build_dict(&r)));
415    }
416
417    if let Some(r) = f.last_modified.as_ref() {
418        dict.set(
419            "LastModified",
420            LoString(
421                crate::utils::to_pdf_time_stamp_metadata(r).into_bytes(),
422                lopdf::StringFormat::Literal,
423            ),
424        );
425    }
426
427    if let Some(r) = f.opi.as_ref() {
428        dict.set("OPI", build_dict(&r));
429    }
430
431    if let Some(r) = f.oc.as_ref() {
432        dict.set("OC", build_dict(&r));
433    }
434
435    if let Some(r) = f.name.as_ref() {
436        dict.set(
437            "Name",
438            LoString(r.clone().into(), lopdf::StringFormat::Literal),
439        );
440    }
441
442    if let Some(sp) = &f.struct_parents {
443        dict.set("StructParents", Integer(*sp));
444    } else if let Some(sp) = &f.struct_parent {
445        dict.set("StructParent", Integer(*sp));
446    }
447
448    let stream = lopdf::Stream::new(dict, f.bytes.clone()).with_compression(true);
449    // let _ = stream.compress();
450    stream
451}
452
453pub fn build_dict(r: &BTreeMap<String, DictItem>) -> lopdf::Dictionary {
454    lopdf::Dictionary::from_iter(r.iter().map(|(k, v)| (k.clone(), v.to_lopdf())))
455}
456
457#[derive(Debug, PartialEq, Copy, Clone, Serialize, Deserialize)]
458#[serde(rename_all = "kebab-case")]
459pub enum FormType {
460    /// The only form type ever declared by Adobe
461    /* Integer(1) */
462    Type1,
463}
464
465impl FormType {
466    fn get_id(&self) -> &'static str {
467        match self {
468            FormType::Type1 => "Type1",
469        }
470    }
471}
472
473/// `/Type /Group`` (PDF reference section 4.9.2)
474#[derive(Debug, PartialEq, Copy, Clone, Serialize, Deserialize)]
475#[serde(rename_all = "camelCase")]
476pub struct GroupXObject {
477    #[serde(default)]
478    pub group_type: GroupXObjectType,
479}
480
481#[derive(Debug, Default, PartialEq, Copy, Clone, Serialize, Deserialize)]
482#[serde(rename_all = "kebab-case")]
483pub enum GroupXObjectType {
484    /// Transparency group XObject (currently the only valid GroupXObject type)
485    #[default]
486    TransparencyGroup,
487}
488
489impl GroupXObjectType {
490    pub fn get_id(&self) -> &'static str {
491        match self {
492            GroupXObjectType::TransparencyGroup => "Transparency",
493        }
494    }
495}
496
497/// PDF 1.4 and higher
498/// Contains a PDF file to be embedded in the current PDF
499#[derive(Debug, PartialEq, Clone, Default, Deserialize, Serialize)]
500pub struct ReferenceXObject {
501    /// (Required) The file containing the target document. (?)
502    pub file: Vec<u8>,
503    /// Page number to embed
504    pub page: i64,
505    /// Optional, should be the document ID and version ID from the metadata
506    pub id: [i64; 2],
507}
508
509/// TODO, very low priority
510#[derive(Debug, PartialEq, Clone, Default, Deserialize, Serialize)]
511pub struct PostScriptXObject {
512    /// __(Optional)__ A stream whose contents are to be used in
513    /// place of the PostScript XObject’s stream when the target
514    /// PostScript interpreter is known to support only LanguageLevel 1
515    #[allow(dead_code)]
516    pub level1: Option<Vec<u8>>,
517}
518
519/// Transform that is applied immediately before the
520/// image gets painted. Does not affect anything other
521/// than the image.
522#[derive(Debug, Copy, Clone, Default, PartialEq, Deserialize, Serialize)]
523pub struct XObjectTransform {
524    #[serde(default)]
525    pub translate_x: Option<Pt>,
526    #[serde(default)]
527    pub translate_y: Option<Pt>,
528    /// Rotate (clockwise), in degree angles
529    #[serde(default)]
530    pub rotate: Option<XObjectRotation>,
531    #[serde(default)]
532    pub scale_x: Option<f32>,
533    #[serde(default)]
534    pub scale_y: Option<f32>,
535    /// If set to None, will be set to 300.0 for images
536    #[serde(default)]
537    pub dpi: Option<f32>,
538}
539
540impl XObjectTransform {
541    pub fn get_ctms(&self, wh: Option<(Px, Px)>) -> Vec<CurTransMat> {
542        let mut transforms = Vec::new();
543        let dpi = self.dpi.unwrap_or(300.0);
544
545        if let Some((w, h)) = wh {
546            // PDF maps an image to a 1x1 square, we have to
547            // adjust the transform matrix to fix the distortion
548
549            // Image at the given dpi should 1px = 1pt
550            transforms.push(CurTransMat::Scale(w.into_pt(dpi).0, h.into_pt(dpi).0));
551        }
552
553        if self.scale_x.is_some() || self.scale_y.is_some() {
554            let scale_x = self.scale_x.unwrap_or(1.0);
555            let scale_y = self.scale_y.unwrap_or(1.0);
556            transforms.push(CurTransMat::Scale(scale_x, scale_y));
557        }
558
559        if let Some(rotate) = self.rotate.as_ref() {
560            transforms.push(CurTransMat::Translate(
561                Pt(-rotate.rotation_center_x.into_pt(dpi).0),
562                Pt(-rotate.rotation_center_y.into_pt(dpi).0),
563            ));
564            transforms.push(CurTransMat::Rotate(rotate.angle_ccw_degrees));
565            transforms.push(CurTransMat::Translate(
566                rotate.rotation_center_x.into_pt(dpi),
567                rotate.rotation_center_y.into_pt(dpi),
568            ));
569        }
570
571        if self.translate_x.is_some() || self.translate_y.is_some() {
572            transforms.push(CurTransMat::Translate(
573                self.translate_x.unwrap_or(Pt(0.0)),
574                self.translate_y.unwrap_or(Pt(0.0)),
575            ));
576        }
577
578        transforms
579    }
580
581    /// Combines the transformation matrices produced by `get_ctms` (with no width/height
582    /// adjustment) into one final transformation and returns it in SVG's matrix format.
583    pub fn as_svg_transform(&self) -> String {
584        // Get the list of transformation matrices (using None for the width/height info)
585        let ctms = self.get_ctms(None);
586
587        // Start with the identity transformation.
588        let mut combined = CurTransMat::Identity;
589
590        // Combine each transform in order.
591        for t in ctms {
592            // Assume combine_matrix takes two 6-element arrays and returns the product.
593            let new_arr = CurTransMat::combine_matrix(combined.as_array(), t.as_array());
594            combined = CurTransMat::Raw(new_arr);
595        }
596
597        // Get the final matrix as an array.
598        let arr = combined.as_array();
599        // SVG expects a matrix in the form "matrix(a b c d e f)"
600        format!(
601            "matrix({} {} {} {} {} {})",
602            arr[0], arr[1], arr[2], arr[3], arr[4], arr[5]
603        )
604    }
605}
606
607#[derive(Debug, Copy, Clone, Default, PartialEq, Serialize, Deserialize)]
608#[serde(rename_all = "camelCase")]
609pub struct XObjectRotation {
610    #[serde(default)]
611    pub angle_ccw_degrees: f32,
612    #[serde(default)]
613    pub rotation_center_x: Px,
614    #[serde(default)]
615    pub rotation_center_y: Px,
616}