pdf_rs/objects.rs
1use std::collections::HashMap;
2
3#[derive(PartialEq, Clone)]
4pub enum PDFNumber {
5 Signed(i64),
6 Unsigned(u64),
7 Real(f64),
8}
9
10#[derive(Clone)]
11pub struct XEntry {
12 /// The value of the entry.
13 pub(crate) value: u64,
14 /// The entry is either in use or deleted.
15 pub(crate) using: bool,
16 /// The object number of the entry.
17 pub(crate) obj_num: u64,
18 /// The generation number of the entry.
19 pub(crate) gen_num: u64,
20}
21
22pub struct Dictionary {
23 entries: HashMap<String, PDFObject>,
24}
25
26pub struct Stream {
27 metadata: Dictionary,
28}
29
30pub enum PDFObject {
31 /// The keywords true and false represent boolean objects with values true and false.
32 Bool(bool),
33 /// ## Numbers
34 /// PDF provides two types of numbers, integer and real. Integers may be specified by
35 /// signed or unsigned constants. Reals may only be in decimal format. Throughout
36 /// this book, number means an object whose type is either integer or real.</br>
37 /// `Note Exponential format for numbers (such as 1.0E3) is not supported.`
38 Number(PDFNumber),
39 /// ## Names
40 /// A name, like a string, is a sequence of characters. It must begin with a slash fol-
41 /// lowed by a letter, followed by a sequence of characters. Names may contain any
42 /// characters except linefeed, carriage return, %, (, ), <, >, [, ], {, and }. Examples of
43 /// names are:
44 /// ```plaintext
45 /// /Name1
46 /// /ASomewhatLongerName2
47 /// /A;Name_With-various***characters?.
48 /// ```
49 Named(String),
50 String(Vec<u8>),
51 /// ## Arrays
52 /// An array is a sequence of PDF objects. An array may contain a mixture of object
53 /// types. An array is represented as a left square bracket ( [ ), followed by a sequence
54 /// of objects, followed by a right square bracket ( ] ). An example of an array is:</br>
55 /// ```plaintext
56 /// [ 0 (Higgs) false 3.14 3 549 /SomeName ]
57 /// ```
58 Array(Vec<PDFObject>),
59 /// A dictionary is an associative table containing pairs of objects. The first element of
60 /// each pair is called the key and the second element is called the value. Unlike dictio-
61 /// naries in the PostScript language, a key must
62 /// be a name. A value can be any kind of object, including a dictionary.
63 /// A dictionary is generally used to collect and tie together the attributes of a complex
64 /// object, with each key–value pair specifying the name and value of an attribute.
65 ///
66 /// A dictionary is represented by two left angle brackets (<<), followed by a sequence
67 /// of key–value pairs, followed by two right angle brackets (>>). For example:
68 /// Example 4.1 Dictionary
69 /// << /Type /Example /Key2 12 /Key3 (a string) >>
70 /// Or, in an example of a dictionary within a dictionary:
71 /// ```plaintext
72 /// << /Type /AlsoAnExample
73 /// /Subtype /Bad
74 /// /Reason (unsure)
75 /// /Version 0.01
76 /// /MyInfo <<
77 /// /Item1 0.4
78 /// /Item2 true
79 /// /LastItem (not!)
80 /// /VeryLastItem (OK)
81 /// >>
82 /// >>
83 /// ```
84 /// Dictionary objects are the main building blocks of a PDF document. Many parts of
85 /// a PDF document, such as pages and fonts, are represented using dictionaries. By
86 /// convention, the **Type** key of such a dictionary specifies the type of object being
87 /// described by the dictionary. Its value is always a name. In some cases, the **Subtype**
88 /// key is used to describe a specialization of a particular type. Its value is always a
89 /// name. For a font, Type is **Font** and four Subtypes exist: Type1, MMType1,
90 /// Type3, and TrueType.
91 Dict(Dictionary),
92 Null,
93 /// Any object used as an element of an array or as a value in a dictionary may be
94 /// specified by either a direct object or an indirect reference. An indirect reference is a
95 /// reference to an indirect object, and consists of the indirect object’s object number,
96 /// generation number, and the **R** keyword:
97 /// ```plaintext
98 /// <indirect reference> ::=
99 /// <object number>
100 /// <generation number>
101 /// R
102 /// ```
103 /// Using an indirect reference to the stream’s length, a stream could be written as:
104 /// ```plaintext
105 /// 7 0 obj
106 /// <<
107 /// /Length 8 0 R
108 /// >>
109 /// stream
110 /// BT
111 /// /F1 12 Tf
112 /// 72 712 Td (A stream with an indirect Length) Tj
113 /// ET
114 /// endstream
115 /// endobj
116 /// 8 0 obj
117 /// 64
118 /// endobj
119 /// ```
120 ObjectRef(u64, u64),
121 /// A direct object is a boolean, number, string, name, array, dictionary, stream, or null,
122 /// as described in the previous sections. An indirect object is an object that has been
123 /// labeled so that it can be referenced by other objects. Any type of object may be an
124 /// indirect object. Indirect objects are very useful; for example, if the length of a
125 /// stream is not known before it is written, the value of the stream’s **Length** key may
126 /// be specified as an indirect object that is stored in the file after the stream.</br>
127 /// An indirect object consists of an object identifier, a direct object, and the **endobj**
128 /// keyword. The object identifier consists of an integer object number, an integer gen-
129 /// eration number, and the **obj** keyword:
130 /// ```plaintext
131 /// <indirect object> ::=
132 /// <object ID> ::=
133 /// <object ID>
134 /// <direct object>
135 /// endobj
136 /// <object number>
137 /// <generation number>
138 /// obj
139 /// ```
140 /// The combination of object number and generation number serves as a unique iden-
141 /// tifier for an indirect object. Throughout its existence, an indirect object retains the
142 /// object number and generation number it was initially assigned, even if the object is
143 /// modified.</br>
144 /// Each indirect object has a unique object number, and indirect objects are often but
145 /// not necessarily numbered sequentially in the file, beginning with o
146 IndirectObject(u64, u64, Box<PDFObject>),
147 /// ## Streams
148 /// A stream, like a string, is a sequence of characters. However, an application can
149 /// read a small portion of a stream at a time, while a string must be read in its entirety.
150 /// For this reason, objects with potentially large amounts of data, such as images and
151 /// page descriptions, are represented as streams.
152 ///
153 /// A stream consists of a dictionary that describes a sequence of characters, followed
154 /// by the keyword stream, followed by one or more lines of characters, followed by
155 /// the keyword endstream.
156 /// ```plaintext
157 /// <stream> ::= <dictionary>
158 /// stream
159 /// {<lines of characters>}*
160 /// endstream
161 /// ```
162 Stream(Stream),
163}
164
165impl PDFObject {
166 /// Returns true if the object is a boolean.
167 pub fn is_bool(&self) -> bool {
168 match self {
169 PDFObject::Bool(_) => true,
170 _ => false,
171 }
172 }
173 /// Returns the boolean value of the object if it is a boolean.
174 pub fn as_bool(&self) -> Option<bool> {
175 match self {
176 PDFObject::Bool(b) => Some(*b),
177 _ => None,
178 }
179 }
180
181 /// Returns true if the object is a number.
182 pub fn is_number(&self) -> bool {
183 match self {
184 PDFObject::Number(_) => true,
185 _ => false,
186 }
187 }
188 /// Returns the number value of the object if it is a number.
189 pub fn as_number(&self) -> Option<&PDFNumber> {
190 match self {
191 PDFObject::Number(n) => Some(n),
192 _ => None,
193 }
194 }
195 /// Returns true if the object is a string.
196 pub fn is_string(&self) -> bool {
197 match self {
198 PDFObject::String(_) => true,
199 _ => false,
200 }
201 }
202 /// Returns the string byte sequence of the object if it is a string.
203 pub fn as_str_bytes(&self) -> Option<&[u8]> {
204 match self {
205 PDFObject::String(buf) => Some(buf),
206 _ => None,
207 }
208 }
209
210 /// Returns the string value of the object if it is a string.
211 pub fn is_array(&self) -> bool {
212 match self {
213 PDFObject::Array(_) => true,
214 _ => false,
215 }
216 }
217 /// Returns the array of objects if it is an array.
218 pub fn as_array(&self) -> Option<&[PDFObject]> {
219 match self {
220 PDFObject::Array(a) => Some(a),
221 _ => None,
222 }
223 }
224 /// Returns true if the object is a dictionary.
225 pub fn is_dict(&self) -> bool {
226 match self {
227 PDFObject::Dict(_) => true,
228 _ => false,
229 }
230 }
231 /// Returns the dictionary if it is one.
232 pub fn as_dict(&self) -> Option<&Dictionary> {
233 match self {
234 PDFObject::Dict(d) => Some(d),
235 _ => None,
236 }
237 }
238 /// Returns the dictionary if it is one.
239 pub fn to_dict(self) -> Option<Dictionary> {
240 match self {
241 PDFObject::Dict(d) => Some(d),
242 _ => None,
243 }
244 }
245 /// Returns true if the object is an indirect object.
246 pub fn is_object_ref(&self) -> bool {
247 match self {
248 PDFObject::ObjectRef(_, ..) => true,
249 _ => false,
250 }
251 }
252 /// Returns the object reference if it is one.
253 pub fn as_object_ref(&self) -> Option<(u64, u64)> {
254 match self {
255 PDFObject::ObjectRef(n, g) => Some((*n, *g)),
256 _ => None,
257 }
258 }
259
260 /// Returns true if the object is an indirect object.
261 pub fn is_indirect_object(&self) -> bool {
262 match self {
263 PDFObject::IndirectObject(_, _, _) => true,
264 _ => false,
265 }
266 }
267 /// Returns the indirect object if it is one.
268 pub fn as_indirect_object(&self) -> Option<(u64, u64, &PDFObject)> {
269 match self {
270 PDFObject::IndirectObject(n, g, data) => Some((*n, *g, data)),
271 _ => None,
272 }
273 }
274
275 /// Returns true if the object is null.
276 pub fn is_null(&self) -> bool {
277 match self {
278 PDFObject::Null => true,
279 _ => false,
280 }
281 }
282 /// Returns true if the object is a stream.
283 pub fn is_stream(&self)->bool{
284 match self {
285 PDFObject::Stream(_) => true,
286 _ => false,
287 }
288 }
289
290 /// Returns the stream if it is one.
291 pub fn as_stream(&self)->Option<&Stream>{
292 match self {
293 PDFObject::Stream(s) => Some(s),
294 _ => None,
295 }
296 }
297 /// Returns true if the object is a name.
298 pub fn is_name(&self)->bool{
299 match self {
300 PDFObject::Named(_) => true,
301 _ => false,
302 }
303 }
304 /// Returns the name if it is one.
305 pub fn as_name(&self)->Option<&String>{
306 match self {
307 PDFObject::Named(s) => Some(s),
308 _ => None,
309 }
310 }
311
312}
313
314impl Dictionary {
315 /// Creates a new dictionary with the given entries.
316 pub(crate) fn new(entries: HashMap<String, PDFObject>) -> Self {
317 Dictionary { entries }
318 }
319 /// Returns the value of the entry with the given key.
320 pub fn get(&self, key: &str)-> Option<&PDFObject> {
321 self.entries.get(key)
322 }
323
324 /// Removes the entry with the given key.
325 pub fn remove(&mut self,key:&str)->Option<PDFObject>{
326 self.entries.remove(key)
327 }
328 /// Returns true if the dictionary contains the given key.
329 pub fn contain(&self, key: &str)->bool{
330 self.entries.contains_key(key)
331 }
332
333 /// Returns the value of the entry with the given key as a name.
334 pub fn get_named_value(&self, key: &str) -> Option<&String> {
335 self.get(key).and_then(|it| it.as_name())
336 }
337
338
339 /// Returns the value of the entry with the given key as a u64.
340 pub fn get_u64_num(&self, key: &str) -> Option<u64> {
341 self.get(key)
342 .and_then(|it| it.as_number())
343 .and_then(|it| if let PDFNumber::Unsigned(num) = it { Some(*num) } else { None })
344 }
345
346 /// Returns true if the value of the entry with the given key is the given name.
347 pub fn named_value_was(&self, keys: &str,except:&str) -> bool {
348 if let Some(value) = self.get_named_value(keys) {
349 value == except
350 } else {
351 false
352 }
353 }
354
355 /// Returns the value of the entry with the given key as an array.
356 pub fn get_array_value(&self, key: &str) -> Option<&[PDFObject]> {
357 self.get(key).and_then(|it| it.as_array())
358 }
359}
360
361impl XEntry {
362 pub(crate) fn new(obj_num: u64, gen_num: u64, value: u64, using: bool) -> Self {
363 XEntry {
364 obj_num,
365 gen_num,
366 using,
367 value,
368 }
369 }
370 /// Returns the object number of the entry.
371 pub fn get_obj_num(&self)->u64{
372 self.obj_num
373 }
374 /// Returns the generation number of the entry.
375 pub fn get_gen_num(&self)->u64{
376 self.gen_num
377 }
378 /// Returns true if the entry is currently being used.
379 pub fn is_using(&self) -> bool {
380 self.using
381 }
382
383 /// Returns true if the entry is freed.
384 pub fn is_freed(&self)->bool{
385 !self.using
386 }
387 /// Returns the value of the entry.
388 pub fn get_value(&self)->u64{
389 self.value
390 }
391}
392
393impl Stream {
394 /// Creates a new stream with the given metadata.
395 pub(crate) fn new(metadata: Dictionary,buf:Vec<u8>) -> Self {
396 Stream { metadata }
397 }
398}