pdf_rs/
objects.rs

1use std::collections::HashMap;
2
3#[derive(PartialEq, Clone)]
4pub enum PDFNumber {
5    Signed(i64),
6    Unsigned(u64),
7    Real(f64),
8}
9
10#[derive(Clone)]
11pub struct XEntry {
12    /// The value of the entry.
13    pub(crate) value: u64,
14    /// The entry is either in use or deleted.
15    pub(crate) using: bool,
16    /// The object number of the entry.
17    pub(crate) obj_num: u64,
18    /// The generation number of the entry.
19    pub(crate) gen_num: u64,
20}
21
22pub struct Dictionary {
23    entries: HashMap<String, PDFObject>,
24}
25
26pub struct Stream {
27    metadata: Dictionary,
28}
29
30pub enum PDFObject {
31    /// The keywords true and false represent boolean objects with values true and false.
32    Bool(bool),
33    /// ## Numbers
34    /// PDF provides two types of numbers, integer and real. Integers may be specified by
35    /// signed or unsigned constants. Reals may only be in decimal format. Throughout
36    /// this book, number means an object whose type is either integer or real.</br>
37    /// `Note Exponential format for numbers (such as 1.0E3) is not supported.`
38    Number(PDFNumber),
39    /// ## Names
40    /// A name, like a string, is a sequence of characters. It must begin with a slash fol-
41    /// lowed by a letter, followed by a sequence of characters. Names may contain any
42    /// characters except linefeed, carriage return, %, (, ), <, >, [, ], {, and }. Examples of
43    /// names are:
44    /// ```plaintext
45    ///  /Name1
46    ///  /ASomewhatLongerName2
47    ///  /A;Name_With-various***characters?.
48    /// ```
49    Named(String),
50    String(Vec<u8>),
51    /// ## Arrays
52    /// An array is a sequence of PDF objects. An array may contain a mixture of object
53    /// types. An array is represented as a left square bracket ( [ ), followed by a sequence
54    /// of objects, followed by a right square bracket ( ] ). An example of an array is:</br>
55    /// ```plaintext
56    /// [ 0 (Higgs) false 3.14 3 549 /SomeName ]
57    /// ```
58    Array(Vec<PDFObject>),
59    /// A dictionary is an associative table containing pairs of objects. The first element of
60    /// each pair is called the key and the second element is called the value. Unlike dictio-
61    /// naries in the PostScript language, a key must
62    /// be a name. A value can be any kind of object, including a dictionary.
63    /// A dictionary is generally used to collect and tie together the attributes of a complex
64    /// object, with each key–value pair specifying the name and value of an attribute.
65    ///
66    /// A dictionary is represented by two left angle brackets (<<), followed by a sequence
67    /// of key–value pairs, followed by two right angle brackets (>>). For example:
68    /// Example 4.1 Dictionary
69    /// << /Type /Example /Key2 12 /Key3 (a string) >>
70    /// Or, in an example of a dictionary within a dictionary:
71    /// ```plaintext
72    /// << /Type /AlsoAnExample
73    /// /Subtype /Bad
74    /// /Reason (unsure)
75    /// /Version 0.01
76    /// /MyInfo <<
77    /// /Item1 0.4
78    /// /Item2 true
79    /// /LastItem (not!)
80    /// /VeryLastItem (OK)
81    /// >>
82    /// >>
83    /// ```
84    /// Dictionary objects are the main building blocks of a PDF document. Many parts of
85    /// a PDF document, such as pages and fonts, are represented using dictionaries. By
86    /// convention, the **Type** key of such a dictionary specifies the type of object being
87    /// described by the dictionary. Its value is always a name. In some cases, the **Subtype**
88    /// key is used to describe a specialization of a particular type. Its value is always a
89    /// name. For a font, Type is **Font** and four Subtypes exist: Type1, MMType1,
90    /// Type3, and TrueType.
91    Dict(Dictionary),
92    Null,
93    /// Any object used as an element of an array or as a value in a dictionary may be
94    /// specified by either a direct object or an indirect reference. An indirect reference is a
95    /// reference to an indirect object, and consists of the indirect object’s object number,
96    /// generation number, and the **R** keyword:
97    /// ```plaintext
98    /// <indirect reference> ::=
99    /// <object number>
100    /// <generation number>
101    /// R
102    /// ```
103    /// Using an indirect reference to the stream’s length, a stream could be written as:
104    /// ```plaintext
105    /// 7 0 obj
106    /// <<
107    /// /Length 8 0 R
108    /// >>
109    /// stream
110    /// BT
111    /// /F1 12 Tf
112    /// 72 712 Td (A stream with an indirect Length) Tj
113    /// ET
114    /// endstream
115    /// endobj
116    /// 8 0 obj
117    /// 64
118    /// endobj
119    /// ```
120    ObjectRef(u64, u64),
121    /// A direct object is a boolean, number, string, name, array, dictionary, stream, or null,
122    /// as described in the previous sections. An indirect object is an object that has been
123    /// labeled so that it can be referenced by other objects. Any type of object may be an
124    /// indirect object. Indirect objects are very useful; for example, if the length of a
125    /// stream is not known before it is written, the value of the stream’s **Length** key may
126    /// be specified as an indirect object that is stored in the file after the stream.</br>
127    /// An indirect object consists of an object identifier, a direct object, and the **endobj**
128    /// keyword. The object identifier consists of an integer object number, an integer gen-
129    /// eration number, and the **obj** keyword:
130    /// ```plaintext
131    /// <indirect object> ::=
132    /// <object ID> ::=
133    /// <object ID>
134    /// <direct object>
135    /// endobj
136    /// <object number>
137    /// <generation number>
138    /// obj
139    /// ```
140    /// The combination of object number and generation number serves as a unique iden-
141    /// tifier for an indirect object. Throughout its existence, an indirect object retains the
142    /// object number and generation number it was initially assigned, even if the object is
143    /// modified.</br>
144    /// Each indirect object has a unique object number, and indirect objects are often but
145    /// not necessarily numbered sequentially in the file, beginning with o
146    IndirectObject(u64, u64, Box<PDFObject>),
147    /// ## Streams
148    /// A stream, like a string, is a sequence of characters. However, an application can
149    /// read a small portion of a stream at a time, while a string must be read in its entirety.
150    /// For this reason, objects with potentially large amounts of data, such as images and
151    /// page descriptions, are represented as streams.
152    ///
153    /// A stream consists of a dictionary that describes a sequence of characters, followed
154    /// by the keyword stream, followed by one or more lines of characters, followed by
155    /// the keyword endstream.
156    /// ```plaintext
157    /// <stream> ::= <dictionary>
158    /// stream
159    /// {<lines of characters>}*
160    /// endstream
161    /// ```
162    Stream(Stream),
163}
164
165impl PDFObject {
166    /// Returns true if the object is a boolean.
167    pub fn is_bool(&self) -> bool {
168        match self {
169            PDFObject::Bool(_) => true,
170            _ => false,
171        }
172    }
173    /// Returns the boolean value of the object if it is a boolean.
174    pub fn as_bool(&self) -> Option<bool> {
175        match self {
176            PDFObject::Bool(b) => Some(*b),
177            _ => None,
178        }
179    }
180
181    /// Returns true if the object is a number.
182    pub fn is_number(&self) -> bool {
183        match self {
184            PDFObject::Number(_) => true,
185            _ => false,
186        }
187    }
188    /// Returns the number value of the object if it is a number.
189    pub fn as_number(&self) -> Option<&PDFNumber> {
190        match self {
191            PDFObject::Number(n) => Some(n),
192            _ => None,
193        }
194    }
195    /// Returns true if the object is a string.
196    pub fn is_string(&self) -> bool {
197        match self {
198            PDFObject::String(_) => true,
199            _ => false,
200        }
201    }
202    /// Returns the string byte sequence of the object if it is a string.
203    pub fn as_str_bytes(&self) -> Option<&[u8]> {
204        match self {
205            PDFObject::String(buf) => Some(buf),
206            _ => None,
207        }
208    }
209
210    /// Returns the string value of the object if it is a string.
211    pub fn is_array(&self) -> bool {
212        match self {
213            PDFObject::Array(_) => true,
214            _ => false,
215        }
216    }
217    /// Returns the array of objects if it is an array.
218    pub fn as_array(&self) -> Option<&[PDFObject]> {
219        match self {
220            PDFObject::Array(a) => Some(a),
221            _ => None,
222        }
223    }
224    /// Returns true if the object is a dictionary.
225    pub fn is_dict(&self) -> bool {
226        match self {
227            PDFObject::Dict(_) => true,
228            _ => false,
229        }
230    }
231    /// Returns the dictionary if it is one.
232    pub fn as_dict(&self) -> Option<&Dictionary> {
233        match self {
234            PDFObject::Dict(d) => Some(d),
235            _ => None,
236        }
237    }
238    /// Returns the dictionary if it is one.
239    pub fn to_dict(self) -> Option<Dictionary> {
240        match self {
241            PDFObject::Dict(d) => Some(d),
242            _ => None,
243        }
244    }
245    /// Returns true if the object is an indirect object.
246    pub fn is_object_ref(&self) -> bool {
247        match self {
248            PDFObject::ObjectRef(_, ..) => true,
249            _ => false,
250        }
251    }
252    /// Returns the object reference if it is one.
253    pub fn as_object_ref(&self) -> Option<(u64, u64)> {
254        match self {
255            PDFObject::ObjectRef(n, g) => Some((*n, *g)),
256            _ => None,
257        }
258    }
259
260    /// Returns true if the object is an indirect object.
261    pub fn is_indirect_object(&self) -> bool {
262        match self {
263            PDFObject::IndirectObject(_, _, _) => true,
264            _ => false,
265        }
266    }
267    /// Returns the indirect object if it is one.
268    pub fn as_indirect_object(&self) -> Option<(u64, u64, &PDFObject)> {
269        match self {
270            PDFObject::IndirectObject(n, g, data) => Some((*n, *g, data)),
271            _ => None,
272        }
273    }
274
275    /// Returns true if the object is null.
276    pub fn is_null(&self) -> bool {
277        match self {
278            PDFObject::Null => true,
279            _ => false,
280        }
281    }
282    /// Returns true if the object is a stream.
283    pub fn is_stream(&self)->bool{
284        match self {
285            PDFObject::Stream(_) => true,
286            _ => false,
287        }
288    }
289
290    /// Returns the stream if it is one.
291    pub fn as_stream(&self)->Option<&Stream>{
292        match self {
293            PDFObject::Stream(s) => Some(s),
294            _ => None,
295        }
296    }
297    /// Returns true if the object is a name.
298    pub fn is_name(&self)->bool{
299        match self {
300            PDFObject::Named(_) => true,
301            _ => false,
302        }
303    }
304    /// Returns the name if it is one.
305    pub fn as_name(&self)->Option<&String>{
306        match self {
307            PDFObject::Named(s) => Some(s),
308            _ => None,
309        }
310    }
311
312}
313
314impl Dictionary {
315    /// Creates a new dictionary with the given entries.
316    pub(crate) fn new(entries: HashMap<String, PDFObject>) -> Self {
317        Dictionary { entries }
318    }
319    /// Returns the value of the entry with the given key.
320    pub fn get(&self, key: &str)-> Option<&PDFObject> {
321        self.entries.get(key)
322    }
323
324    /// Removes the entry with the given key.
325    pub fn remove(&mut self,key:&str)->Option<PDFObject>{
326        self.entries.remove(key)
327    }
328    /// Returns true if the dictionary contains the given key.
329    pub fn contain(&self, key: &str)->bool{
330        self.entries.contains_key(key)
331    }
332
333    /// Returns the value of the entry with the given key as a name.
334    pub fn get_named_value(&self, key: &str) -> Option<&String> {
335        self.get(key).and_then(|it| it.as_name())
336    }
337
338
339    /// Returns the value of the entry with the given key as a u64.
340    pub fn get_u64_num(&self, key: &str) -> Option<u64> {
341        self.get(key)
342            .and_then(|it| it.as_number())
343            .and_then(|it| if let PDFNumber::Unsigned(num) = it { Some(*num) } else { None })
344    }
345
346    /// Returns true if the value of the entry with the given key is the given name.
347    pub fn named_value_was(&self, keys: &str,except:&str) -> bool {
348        if let Some(value) = self.get_named_value(keys) {
349            value == except
350        } else {
351            false
352        }
353    }
354
355    /// Returns the value of the entry with the given key as an array.
356    pub fn get_array_value(&self, key: &str) -> Option<&[PDFObject]> {
357        self.get(key).and_then(|it| it.as_array())
358    }
359}
360
361impl XEntry {
362    pub(crate) fn new(obj_num: u64, gen_num: u64, value: u64, using: bool) -> Self {
363        XEntry {
364            obj_num,
365            gen_num,
366            using,
367            value,
368        }
369    }
370    /// Returns the object number of the entry.
371    pub fn get_obj_num(&self)->u64{
372        self.obj_num
373    }
374    /// Returns the generation number of the entry.
375    pub fn get_gen_num(&self)->u64{
376        self.gen_num
377    }
378    /// Returns true if the entry is currently being used.
379    pub fn is_using(&self) -> bool {
380        self.using
381    }
382
383    /// Returns true if the entry is freed.
384    pub fn is_freed(&self)->bool{
385        !self.using
386    }
387    /// Returns the value of the entry.
388    pub fn get_value(&self)->u64{
389        self.value
390    }
391}
392
393impl Stream {
394    /// Creates a new stream with the given metadata.
395    pub(crate) fn new(metadata: Dictionary,buf:Vec<u8>) -> Self {
396        Stream { metadata }
397    }
398}