pdf_rs/
objects.rs

1use std::collections::HashMap;
2
3#[derive(PartialEq, Clone)]
4pub enum PDFNumber {
5    Signed(i64),
6    Unsigned(u64),
7    Real(f64),
8}
9
10#[derive(Clone)]
11pub struct XEntry {
12    /// The value of the entry.
13    pub(crate) value: u64,
14    /// The entry is either in use or deleted.
15    pub(crate) using: bool,
16    /// The object number of the entry.
17    pub(crate) obj_num: u64,
18    /// The generation number of the entry.
19    pub(crate) gen_num: u64,
20}
21
22pub struct Dictionary {
23    entries: HashMap<String, Option<PDFObject>>,
24}
25
26pub struct Stream {
27    metadata: Dictionary,
28}
29
30pub enum PDFObject {
31    /// The keywords true and false represent boolean objects with values true and false.
32    Bool(bool),
33    /// ## Numbers
34    /// PDF provides two types of numbers, integer and real. Integers may be specified by
35    /// signed or unsigned constants. Reals may only be in decimal format. Throughout
36    /// this book, number means an object whose type is either integer or real.</br>
37    /// `Note Exponential format for numbers (such as 1.0E3) is not supported.`
38    Number(PDFNumber),
39    /// ## Names
40    /// A name, like a string, is a sequence of characters. It must begin with a slash fol-
41    /// lowed by a letter, followed by a sequence of characters. Names may contain any
42    /// characters except linefeed, carriage return, %, (, ), <, >, [, ], {, and }. Examples of
43    /// names are:
44    /// ```plaintext
45    ///  /Name1
46    ///  /ASomewhatLongerName2
47    ///  /A;Name_With-various***characters?.
48    /// ```
49    Named(String),
50    String(Vec<u8>),
51    /// ## Arrays
52    /// An array is a sequence of PDF objects. An array may contain a mixture of object
53    /// types. An array is represented as a left square bracket ( [ ), followed by a sequence
54    /// of objects, followed by a right square bracket ( ] ). An example of an array is:</br>
55    /// ```plaintext
56    /// [ 0 (Higgs) false 3.14 3 549 /SomeName ]
57    /// ```
58    Array(Vec<PDFObject>),
59    /// A dictionary is an associative table containing pairs of objects. The first element of
60    /// each pair is called the key and the second element is called the value. Unlike dictio-
61    /// naries in the PostScript language, a key must
62    /// be a name. A value can be any kind of object, including a dictionary.
63    /// A dictionary is generally used to collect and tie together the attributes of a complex
64    /// object, with each key–value pair specifying the name and value of an attribute.
65    ///
66    /// A dictionary is represented by two left angle brackets (<<), followed by a sequence
67    /// of key–value pairs, followed by two right angle brackets (>>). For example:
68    /// Example 4.1 Dictionary
69    /// << /Type /Example /Key2 12 /Key3 (a string) >>
70    /// Or, in an example of a dictionary within a dictionary:
71    /// ```plaintext
72    /// << /Type /AlsoAnExample
73    /// /Subtype /Bad
74    /// /Reason (unsure)
75    /// /Version 0.01
76    /// /MyInfo <<
77    /// /Item1 0.4
78    /// /Item2 true
79    /// /LastItem (not!)
80    /// /VeryLastItem (OK)
81    /// >>
82    /// >>
83    /// ```
84    /// Dictionary objects are the main building blocks of a PDF document. Many parts of
85    /// a PDF document, such as pages and fonts, are represented using dictionaries. By
86    /// convention, the **Type** key of such a dictionary specifies the type of object being
87    /// described by the dictionary. Its value is always a name. In some cases, the **Subtype**
88    /// key is used to describe a specialization of a particular type. Its value is always a
89    /// name. For a font, Type is **Font** and four Subtypes exist: Type1, MMType1,
90    /// Type3, and TrueType.
91    Dict(Dictionary),
92    Null,
93    /// Any object used as an element of an array or as a value in a dictionary may be
94    /// specified by either a direct object or an indirect reference. An indirect reference is a
95    /// reference to an indirect object, and consists of the indirect object’s object number,
96    /// generation number, and the **R** keyword:
97    /// ```plaintext
98    /// <indirect reference> ::=
99    /// <object number>
100    /// <generation number>
101    /// R
102    /// ```
103    /// Using an indirect reference to the stream’s length, a stream could be written as:
104    /// ```plaintext
105    /// 7 0 obj
106    /// <<
107    /// /Length 8 0 R
108    /// >>
109    /// stream
110    /// BT
111    /// /F1 12 Tf
112    /// 72 712 Td (A stream with an indirect Length) Tj
113    /// ET
114    /// endstream
115    /// endobj
116    /// 8 0 obj
117    /// 64
118    /// endobj
119    /// ```
120    ObjectRef(u64, u64),
121    /// A direct object is a boolean, number, string, name, array, dictionary, stream, or null,
122    /// as described in the previous sections. An indirect object is an object that has been
123    /// labeled so that it can be referenced by other objects. Any type of object may be an
124    /// indirect object. Indirect objects are very useful; for example, if the length of a
125    /// stream is not known before it is written, the value of the stream’s **Length** key may
126    /// be specified as an indirect object that is stored in the file after the stream.</br>
127    /// An indirect object consists of an object identifier, a direct object, and the **endobj**
128    /// keyword. The object identifier consists of an integer object number, an integer gen-
129    /// eration number, and the **obj** keyword:
130    /// ```plaintext
131    /// <indirect object> ::=
132    /// <object ID> ::=
133    /// <object ID>
134    /// <direct object>
135    /// endobj
136    /// <object number>
137    /// <generation number>
138    /// obj
139    /// ```
140    /// The combination of object number and generation number serves as a unique iden-
141    /// tifier for an indirect object. Throughout its existence, an indirect object retains the
142    /// object number and generation number it was initially assigned, even if the object is
143    /// modified.</br>
144    /// Each indirect object has a unique object number, and indirect objects are often but
145    /// not necessarily numbered sequentially in the file, beginning with o
146    IndirectObject(u64, u64, Box<PDFObject>),
147    /// ## Streams
148    /// A stream, like a string, is a sequence of characters. However, an application can
149    /// read a small portion of a stream at a time, while a string must be read in its entirety.
150    /// For this reason, objects with potentially large amounts of data, such as images and
151    /// page descriptions, are represented as streams.
152    ///
153    /// A stream consists of a dictionary that describes a sequence of characters, followed
154    /// by the keyword stream, followed by one or more lines of characters, followed by
155    /// the keyword endstream.
156    /// ```plaintext
157    /// <stream> ::= <dictionary>
158    /// stream
159    /// {<lines of characters>}*
160    /// endstream
161    /// ```
162    Stream(Stream),
163}
164
165impl PDFObject {
166    /// Returns true if the object is a boolean.
167    pub fn is_bool(&self) -> bool {
168        match self {
169            PDFObject::Bool(_) => true,
170            _ => false,
171        }
172    }
173    /// Returns the boolean value of the object if it is a boolean.
174    pub fn as_bool(&self) -> Option<bool> {
175        match self {
176            PDFObject::Bool(b) => Some(*b),
177            _ => None,
178        }
179    }
180
181    /// Returns true if the object is a number.
182    pub fn is_number(&self) -> bool {
183        match self {
184            PDFObject::Number(_) => true,
185            _ => false,
186        }
187    }
188    /// Returns the number value of the object if it is a number.
189    pub fn as_number(&self) -> Option<&PDFNumber> {
190        match self {
191            PDFObject::Number(n) => Some(n),
192            _ => None,
193        }
194    }
195    /// Returns true if the object is a string.
196    pub fn is_string(&self) -> bool {
197        match self {
198            PDFObject::String(_) => true,
199            _ => false,
200        }
201    }
202    /// Returns the string byte sequence of the object if it is a string.
203    pub fn as_str_bytes(&self) -> Option<&[u8]> {
204        match self {
205            PDFObject::String(buf) => Some(buf),
206            _ => None,
207        }
208    }
209
210    /// Returns the string value of the object if it is a string.
211    pub fn is_array(&self) -> bool {
212        match self {
213            PDFObject::Array(_) => true,
214            _ => false,
215        }
216    }
217    /// Returns the array of objects if it is an array.
218    pub fn as_array(&self) -> Option<&[PDFObject]> {
219        match self {
220            PDFObject::Array(a) => Some(a),
221            _ => None,
222        }
223    }
224    /// Returns true if the object is a dictionary.
225    pub fn is_dict(&self) -> bool {
226        match self {
227            PDFObject::Dict(_) => true,
228            _ => false,
229        }
230    }
231    /// Returns the dictionary if it is one.
232    pub fn as_dict(&self) -> Option<&Dictionary> {
233        match self {
234            PDFObject::Dict(d) => Some(d),
235            _ => None,
236        }
237    }
238    /// Returns true if the object is an indirect object.
239    pub fn is_object_ref(&self) -> bool {
240        match self {
241            PDFObject::ObjectRef(_, ..) => true,
242            _ => false,
243        }
244    }
245    /// Returns the object reference if it is one.
246    pub fn as_object_ref(&self) -> Option<(u64, u64)> {
247        match self {
248            PDFObject::ObjectRef(n, g) => Some((*n, *g)),
249            _ => None,
250        }
251    }
252
253    /// Returns true if the object is an indirect object.
254    pub fn is_indirect_object(&self) -> bool {
255        match self {
256            PDFObject::IndirectObject(_, _, _) => true,
257            _ => false,
258        }
259    }
260    /// Returns the indirect object if it is one.
261    pub fn as_indirect_object(&self) -> Option<(u64, u64, &PDFObject)> {
262        match self {
263            PDFObject::IndirectObject(n, g, data) => Some((*n, *g, data)),
264            _ => None,
265        }
266    }
267
268    /// Returns true if the object is null.
269    pub fn is_null(&self) -> bool {
270        match self {
271            PDFObject::Null => true,
272            _ => false,
273        }
274    }
275    /// Returns true if the object is a stream.
276    pub fn is_stream(&self)->bool{
277        match self {
278            PDFObject::Stream(_) => true,
279            _ => false,
280        }
281    }
282
283    /// Returns the stream if it is one.
284    pub fn as_stream(&self)->Option<&Stream>{
285        match self {
286            PDFObject::Stream(s) => Some(s),
287            _ => None,
288        }
289    }
290
291}
292
293impl Dictionary {
294    /// Creates a new dictionary with the given entries.
295    pub(crate) fn new(entries: HashMap<String, Option<PDFObject>>) -> Self {
296        Dictionary { entries }
297    }
298    /// Returns the value of the entry with the given key.
299    pub fn get(&self, key: &str)-> Option<&PDFObject> {
300        match self.entries.get(key){
301            Some(v) => v.as_ref(),
302            None => None,
303        }
304    }
305}
306
307impl XEntry {
308    pub(crate) fn new(obj_num: u64, gen_num: u64, value: u64, using: bool) -> Self {
309        XEntry {
310            obj_num,
311            gen_num,
312            using,
313            value,
314        }
315    }
316    /// Returns the object number of the entry.
317    pub fn get_obj_num(&self)->u64{
318        self.obj_num
319    }
320    /// Returns the generation number of the entry.
321    pub fn get_gen_num(&self)->u64{
322        self.gen_num
323    }
324    /// Returns true if the entry is currently being used.
325    pub fn is_using(&self) -> bool {
326        self.using
327    }
328
329    /// Returns true if the entry is freed.
330    pub fn is_freed(&self)->bool{
331        !self.using
332    }
333    /// Returns the value of the entry.
334    pub fn get_value(&self)->u64{
335        self.value
336    }
337}
338
339impl Stream {
340    /// Creates a new stream with the given metadata.
341    pub(crate) fn new(metadata: Dictionary,buf:Vec<u8>) -> Self {
342        Stream { metadata }
343    }
344}