pdf_rs/objects.rs
1use std::collections::HashMap;
2
3#[derive(PartialEq, Clone)]
4pub enum PDFNumber {
5 Signed(i64),
6 Unsigned(u64),
7 Real(f64),
8}
9
10#[derive(Clone)]
11pub struct XEntry {
12 /// The value of the entry.
13 pub(crate) value: u64,
14 /// The entry is either in use or deleted.
15 pub(crate) using: bool,
16 /// The object number of the entry.
17 pub(crate) obj_num: u64,
18 /// The generation number of the entry.
19 pub(crate) gen_num: u64,
20}
21
22pub struct Dictionary {
23 entries: HashMap<String, Option<PDFObject>>,
24}
25
26pub struct Stream {
27 metadata: Dictionary,
28}
29
30pub enum PDFObject {
31 /// The keywords true and false represent boolean objects with values true and false.
32 Bool(bool),
33 /// ## Numbers
34 /// PDF provides two types of numbers, integer and real. Integers may be specified by
35 /// signed or unsigned constants. Reals may only be in decimal format. Throughout
36 /// this book, number means an object whose type is either integer or real.</br>
37 /// `Note Exponential format for numbers (such as 1.0E3) is not supported.`
38 Number(PDFNumber),
39 /// ## Names
40 /// A name, like a string, is a sequence of characters. It must begin with a slash fol-
41 /// lowed by a letter, followed by a sequence of characters. Names may contain any
42 /// characters except linefeed, carriage return, %, (, ), <, >, [, ], {, and }. Examples of
43 /// names are:
44 /// ```plaintext
45 /// /Name1
46 /// /ASomewhatLongerName2
47 /// /A;Name_With-various***characters?.
48 /// ```
49 Named(String),
50 String(Vec<u8>),
51 /// ## Arrays
52 /// An array is a sequence of PDF objects. An array may contain a mixture of object
53 /// types. An array is represented as a left square bracket ( [ ), followed by a sequence
54 /// of objects, followed by a right square bracket ( ] ). An example of an array is:</br>
55 /// ```plaintext
56 /// [ 0 (Higgs) false 3.14 3 549 /SomeName ]
57 /// ```
58 Array(Vec<PDFObject>),
59 /// A dictionary is an associative table containing pairs of objects. The first element of
60 /// each pair is called the key and the second element is called the value. Unlike dictio-
61 /// naries in the PostScript language, a key must
62 /// be a name. A value can be any kind of object, including a dictionary.
63 /// A dictionary is generally used to collect and tie together the attributes of a complex
64 /// object, with each key–value pair specifying the name and value of an attribute.
65 ///
66 /// A dictionary is represented by two left angle brackets (<<), followed by a sequence
67 /// of key–value pairs, followed by two right angle brackets (>>). For example:
68 /// Example 4.1 Dictionary
69 /// << /Type /Example /Key2 12 /Key3 (a string) >>
70 /// Or, in an example of a dictionary within a dictionary:
71 /// ```plaintext
72 /// << /Type /AlsoAnExample
73 /// /Subtype /Bad
74 /// /Reason (unsure)
75 /// /Version 0.01
76 /// /MyInfo <<
77 /// /Item1 0.4
78 /// /Item2 true
79 /// /LastItem (not!)
80 /// /VeryLastItem (OK)
81 /// >>
82 /// >>
83 /// ```
84 /// Dictionary objects are the main building blocks of a PDF document. Many parts of
85 /// a PDF document, such as pages and fonts, are represented using dictionaries. By
86 /// convention, the **Type** key of such a dictionary specifies the type of object being
87 /// described by the dictionary. Its value is always a name. In some cases, the **Subtype**
88 /// key is used to describe a specialization of a particular type. Its value is always a
89 /// name. For a font, Type is **Font** and four Subtypes exist: Type1, MMType1,
90 /// Type3, and TrueType.
91 Dict(Dictionary),
92 Null,
93 /// Any object used as an element of an array or as a value in a dictionary may be
94 /// specified by either a direct object or an indirect reference. An indirect reference is a
95 /// reference to an indirect object, and consists of the indirect object’s object number,
96 /// generation number, and the **R** keyword:
97 /// ```plaintext
98 /// <indirect reference> ::=
99 /// <object number>
100 /// <generation number>
101 /// R
102 /// ```
103 /// Using an indirect reference to the stream’s length, a stream could be written as:
104 /// ```plaintext
105 /// 7 0 obj
106 /// <<
107 /// /Length 8 0 R
108 /// >>
109 /// stream
110 /// BT
111 /// /F1 12 Tf
112 /// 72 712 Td (A stream with an indirect Length) Tj
113 /// ET
114 /// endstream
115 /// endobj
116 /// 8 0 obj
117 /// 64
118 /// endobj
119 /// ```
120 ObjectRef(u64, u64),
121 /// A direct object is a boolean, number, string, name, array, dictionary, stream, or null,
122 /// as described in the previous sections. An indirect object is an object that has been
123 /// labeled so that it can be referenced by other objects. Any type of object may be an
124 /// indirect object. Indirect objects are very useful; for example, if the length of a
125 /// stream is not known before it is written, the value of the stream’s **Length** key may
126 /// be specified as an indirect object that is stored in the file after the stream.</br>
127 /// An indirect object consists of an object identifier, a direct object, and the **endobj**
128 /// keyword. The object identifier consists of an integer object number, an integer gen-
129 /// eration number, and the **obj** keyword:
130 /// ```plaintext
131 /// <indirect object> ::=
132 /// <object ID> ::=
133 /// <object ID>
134 /// <direct object>
135 /// endobj
136 /// <object number>
137 /// <generation number>
138 /// obj
139 /// ```
140 /// The combination of object number and generation number serves as a unique iden-
141 /// tifier for an indirect object. Throughout its existence, an indirect object retains the
142 /// object number and generation number it was initially assigned, even if the object is
143 /// modified.</br>
144 /// Each indirect object has a unique object number, and indirect objects are often but
145 /// not necessarily numbered sequentially in the file, beginning with o
146 IndirectObject(u64, u64, Box<PDFObject>),
147 /// ## Streams
148 /// A stream, like a string, is a sequence of characters. However, an application can
149 /// read a small portion of a stream at a time, while a string must be read in its entirety.
150 /// For this reason, objects with potentially large amounts of data, such as images and
151 /// page descriptions, are represented as streams.
152 ///
153 /// A stream consists of a dictionary that describes a sequence of characters, followed
154 /// by the keyword stream, followed by one or more lines of characters, followed by
155 /// the keyword endstream.
156 /// ```plaintext
157 /// <stream> ::= <dictionary>
158 /// stream
159 /// {<lines of characters>}*
160 /// endstream
161 /// ```
162 Stream(Stream),
163}
164
165impl PDFObject {
166 /// Returns true if the object is a boolean.
167 pub fn is_bool(&self) -> bool {
168 match self {
169 PDFObject::Bool(_) => true,
170 _ => false,
171 }
172 }
173 /// Returns the boolean value of the object if it is a boolean.
174 pub fn as_bool(&self) -> Option<bool> {
175 match self {
176 PDFObject::Bool(b) => Some(*b),
177 _ => None,
178 }
179 }
180
181 /// Returns true if the object is a number.
182 pub fn is_number(&self) -> bool {
183 match self {
184 PDFObject::Number(_) => true,
185 _ => false,
186 }
187 }
188 /// Returns the number value of the object if it is a number.
189 pub fn as_number(&self) -> Option<&PDFNumber> {
190 match self {
191 PDFObject::Number(n) => Some(n),
192 _ => None,
193 }
194 }
195 /// Returns true if the object is a string.
196 pub fn is_string(&self) -> bool {
197 match self {
198 PDFObject::String(_) => true,
199 _ => false,
200 }
201 }
202 /// Returns the string byte sequence of the object if it is a string.
203 pub fn as_str_bytes(&self) -> Option<&[u8]> {
204 match self {
205 PDFObject::String(buf) => Some(buf),
206 _ => None,
207 }
208 }
209
210 /// Returns the string value of the object if it is a string.
211 pub fn is_array(&self) -> bool {
212 match self {
213 PDFObject::Array(_) => true,
214 _ => false,
215 }
216 }
217 /// Returns the array of objects if it is an array.
218 pub fn as_array(&self) -> Option<&[PDFObject]> {
219 match self {
220 PDFObject::Array(a) => Some(a),
221 _ => None,
222 }
223 }
224 /// Returns true if the object is a dictionary.
225 pub fn is_dict(&self) -> bool {
226 match self {
227 PDFObject::Dict(_) => true,
228 _ => false,
229 }
230 }
231 /// Returns the dictionary if it is one.
232 pub fn as_dict(&self) -> Option<&Dictionary> {
233 match self {
234 PDFObject::Dict(d) => Some(d),
235 _ => None,
236 }
237 }
238 /// Returns true if the object is an indirect object.
239 pub fn is_object_ref(&self) -> bool {
240 match self {
241 PDFObject::ObjectRef(_, ..) => true,
242 _ => false,
243 }
244 }
245 /// Returns the object reference if it is one.
246 pub fn as_object_ref(&self) -> Option<(u64, u64)> {
247 match self {
248 PDFObject::ObjectRef(n, g) => Some((*n, *g)),
249 _ => None,
250 }
251 }
252
253 /// Returns true if the object is an indirect object.
254 pub fn is_indirect_object(&self) -> bool {
255 match self {
256 PDFObject::IndirectObject(_, _, _) => true,
257 _ => false,
258 }
259 }
260 /// Returns the indirect object if it is one.
261 pub fn as_indirect_object(&self) -> Option<(u64, u64, &PDFObject)> {
262 match self {
263 PDFObject::IndirectObject(n, g, data) => Some((*n, *g, data)),
264 _ => None,
265 }
266 }
267
268 /// Returns true if the object is null.
269 pub fn is_null(&self) -> bool {
270 match self {
271 PDFObject::Null => true,
272 _ => false,
273 }
274 }
275 /// Returns true if the object is a stream.
276 pub fn is_stream(&self)->bool{
277 match self {
278 PDFObject::Stream(_) => true,
279 _ => false,
280 }
281 }
282
283 /// Returns the stream if it is one.
284 pub fn as_stream(&self)->Option<&Stream>{
285 match self {
286 PDFObject::Stream(s) => Some(s),
287 _ => None,
288 }
289 }
290
291}
292
293impl Dictionary {
294 /// Creates a new dictionary with the given entries.
295 pub(crate) fn new(entries: HashMap<String, Option<PDFObject>>) -> Self {
296 Dictionary { entries }
297 }
298 /// Returns the value of the entry with the given key.
299 pub fn get(&self, key: &str)-> Option<&PDFObject> {
300 match self.entries.get(key){
301 Some(v) => v.as_ref(),
302 None => None,
303 }
304 }
305}
306
307impl XEntry {
308 pub(crate) fn new(obj_num: u64, gen_num: u64, value: u64, using: bool) -> Self {
309 XEntry {
310 obj_num,
311 gen_num,
312 using,
313 value,
314 }
315 }
316 /// Returns the object number of the entry.
317 pub fn get_obj_num(&self)->u64{
318 self.obj_num
319 }
320 /// Returns the generation number of the entry.
321 pub fn get_gen_num(&self)->u64{
322 self.gen_num
323 }
324 /// Returns true if the entry is currently being used.
325 pub fn is_using(&self) -> bool {
326 self.using
327 }
328
329 /// Returns true if the entry is freed.
330 pub fn is_freed(&self)->bool{
331 !self.using
332 }
333 /// Returns the value of the entry.
334 pub fn get_value(&self)->u64{
335 self.value
336 }
337}
338
339impl Stream {
340 /// Creates a new stream with the given metadata.
341 pub(crate) fn new(metadata: Dictionary,buf:Vec<u8>) -> Self {
342 Stream { metadata }
343 }
344}