pdf_rs/objects.rs
1use std::collections::HashMap;
2use std::iter::Map;
3
4#[derive(PartialEq, Clone)]
5pub enum PDFNumber {
6 Signed(i64),
7 Unsigned(u64),
8 Real(f64),
9}
10
11#[derive(Clone)]
12pub struct XEntry {
13 /// The value of the entry.
14 pub(crate) value: u64,
15 /// The entry is either in use or deleted.
16 pub(crate) using: bool,
17 /// The object number of the entry.
18 pub(crate) obj_num: u64,
19 /// The generation number of the entry.
20 pub(crate) gen_num: u64,
21}
22
23pub struct Dictionary {
24 entries: HashMap<String, Option<PDFObject>>,
25}
26
27pub enum PDFObject {
28 /// The keywords true and false represent boolean objects with values true and false.
29 Bool(bool),
30 /// ## Numbers
31 /// PDF provides two types of numbers, integer and real. Integers may be specified by
32 /// signed or unsigned constants. Reals may only be in decimal format. Throughout
33 /// this book, number means an object whose type is either integer or real.</br>
34 /// `Note Exponential format for numbers (such as 1.0E3) is not supported.`
35 Number(PDFNumber),
36 /// ## Names
37 /// A name, like a string, is a sequence of characters. It must begin with a slash fol-
38 /// lowed by a letter, followed by a sequence of characters. Names may contain any
39 /// characters except linefeed, carriage return, %, (, ), <, >, [, ], {, and }. Examples of
40 /// names are:
41 /// ```plaintext
42 /// /Name1
43 /// /ASomewhatLongerName2
44 /// /A;Name_With-various***characters?.
45 /// ```
46 Named(String),
47 String(Vec<u8>),
48 /// ## Arrays
49 /// An array is a sequence of PDF objects. An array may contain a mixture of object
50 /// types. An array is represented as a left square bracket ( [ ), followed by a sequence
51 /// of objects, followed by a right square bracket ( ] ). An example of an array is:</br>
52 /// ```plaintext
53 /// [ 0 (Higgs) false 3.14 3 549 /SomeName ]
54 /// ```
55 Array(Vec<PDFObject>),
56 /// A dictionary is an associative table containing pairs of objects. The first element of
57 /// each pair is called the key and the second element is called the value. Unlike dictio-
58 /// naries in the PostScript language, a key must
59 /// be a name. A value can be any kind of object, including a dictionary.
60 /// A dictionary is generally used to collect and tie together the attributes of a complex
61 /// object, with each key–value pair specifying the name and value of an attribute.
62 ///
63 /// A dictionary is represented by two left angle brackets (<<), followed by a sequence
64 /// of key–value pairs, followed by two right angle brackets (>>). For example:
65 /// Example 4.1 Dictionary
66 /// << /Type /Example /Key2 12 /Key3 (a string) >>
67 /// Or, in an example of a dictionary within a dictionary:
68 /// ```plaintext
69 /// << /Type /AlsoAnExample
70 /// /Subtype /Bad
71 /// /Reason (unsure)
72 /// /Version 0.01
73 /// /MyInfo <<
74 /// /Item1 0.4
75 /// /Item2 true
76 /// /LastItem (not!)
77 /// /VeryLastItem (OK)
78 /// >>
79 /// >>
80 /// ```
81 /// Dictionary objects are the main building blocks of a PDF document. Many parts of
82 /// a PDF document, such as pages and fonts, are represented using dictionaries. By
83 /// convention, the **Type** key of such a dictionary specifies the type of object being
84 /// described by the dictionary. Its value is always a name. In some cases, the **Subtype**
85 /// key is used to describe a specialization of a particular type. Its value is always a
86 /// name. For a font, Type is **Font** and four Subtypes exist: Type1, MMType1,
87 /// Type3, and TrueType.
88 Dict(Dictionary),
89 Null,
90 /// Any object used as an element of an array or as a value in a dictionary may be
91 /// specified by either a direct object or an indirect reference. An indirect reference is a
92 /// reference to an indirect object, and consists of the indirect object’s object number,
93 /// generation number, and the **R** keyword:
94 /// ```plaintext
95 /// <indirect reference> ::=
96 /// <object number>
97 /// <generation number>
98 /// R
99 /// ```
100 /// Using an indirect reference to the stream’s length, a stream could be written as:
101 /// ```plaintext
102 /// 7 0 obj
103 /// <<
104 /// /Length 8 0 R
105 /// >>
106 /// stream
107 /// BT
108 /// /F1 12 Tf
109 /// 72 712 Td (A stream with an indirect Length) Tj
110 /// ET
111 /// endstream
112 /// endobj
113 /// 8 0 obj
114 /// 64
115 /// endobj
116 /// ```
117 ObjectRef(u64, u64),
118 /// A direct object is a boolean, number, string, name, array, dictionary, stream, or null,
119 /// as described in the previous sections. An indirect object is an object that has been
120 /// labeled so that it can be referenced by other objects. Any type of object may be an
121 /// indirect object. Indirect objects are very useful; for example, if the length of a
122 /// stream is not known before it is written, the value of the stream’s **Length** key may
123 /// be specified as an indirect object that is stored in the file after the stream.</br>
124 /// An indirect object consists of an object identifier, a direct object, and the **endobj**
125 /// keyword. The object identifier consists of an integer object number, an integer gen-
126 /// eration number, and the **obj** keyword:
127 /// ```plaintext
128 /// <indirect object> ::=
129 /// <object ID> ::=
130 /// <object ID>
131 /// <direct object>
132 /// endobj
133 /// <object number>
134 /// <generation number>
135 /// obj
136 /// ```
137 /// The combination of object number and generation number serves as a unique iden-
138 /// tifier for an indirect object. Throughout its existence, an indirect object retains the
139 /// object number and generation number it was initially assigned, even if the object is
140 /// modified.</br>
141 /// Each indirect object has a unique object number, and indirect objects are often but
142 /// not necessarily numbered sequentially in the file, beginning with o
143 IndirectObject(u64, u64, Vec<PDFObject>),
144 Stream,
145}
146
147impl PDFObject {
148 /// Returns true if the object is a boolean.
149 pub fn is_bool(&self) -> bool {
150 match self {
151 PDFObject::Bool(_) => true,
152 _ => false,
153 }
154 }
155 /// Returns the boolean value of the object if it is a boolean.
156 pub fn as_bool(&self) -> Option<bool> {
157 match self {
158 PDFObject::Bool(b) => Some(*b),
159 _ => None,
160 }
161 }
162
163 /// Returns true if the object is a number.
164 pub fn is_number(&self) -> bool {
165 match self {
166 PDFObject::Number(_) => true,
167 _ => false,
168 }
169 }
170 /// Returns the number value of the object if it is a number.
171 pub fn as_number(&self) -> Option<&PDFNumber> {
172 match self {
173 PDFObject::Number(n) => Some(n),
174 _ => None,
175 }
176 }
177 /// Returns true if the object is a string.
178 pub fn is_string(&self) -> bool {
179 match self {
180 PDFObject::String(_) => true,
181 _ => false,
182 }
183 }
184 /// Returns the string byte sequence of the object if it is a string.
185 pub fn as_str_bytes(&self) -> Option<&[u8]> {
186 match self {
187 PDFObject::String(buf) => Some(buf),
188 _ => None,
189 }
190 }
191
192 /// Returns the string value of the object if it is a string.
193 pub fn is_array(&self) -> bool {
194 match self {
195 PDFObject::Array(_) => true,
196 _ => false,
197 }
198 }
199 /// Returns the array of objects if it is an array.
200 pub fn as_array(&self) -> Option<&[PDFObject]> {
201 match self {
202 PDFObject::Array(a) => Some(a),
203 _ => None,
204 }
205 }
206 /// Returns true if the object is a dictionary.
207 pub fn is_dict(&self) -> bool {
208 match self {
209 PDFObject::Dict(_) => true,
210 _ => false,
211 }
212 }
213 /// Returns the dictionary if it is one.
214 pub fn as_dict(&self) -> Option<&Dictionary> {
215 match self {
216 PDFObject::Dict(d) => Some(d),
217 _ => None,
218 }
219 }
220 /// Returns true if the object is an indirect object.
221 pub fn is_object_ref(&self) -> bool {
222 match self {
223 PDFObject::ObjectRef(_, ..) => true,
224 _ => false,
225 }
226 }
227 /// Returns the object reference if it is one.
228 pub fn as_object_ref(&self) -> Option<(u64, u64)> {
229 match self {
230 PDFObject::ObjectRef(n, g) => Some((*n, *g)),
231 _ => None,
232 }
233 }
234
235 /// Returns true if the object is an indirect object.
236 pub fn is_indirect_object(&self) -> bool {
237 match self {
238 PDFObject::IndirectObject(_, _, _) => true,
239 _ => false,
240 }
241 }
242 /// Returns the indirect object if it is one.
243 pub fn as_indirect_object(&self) -> Option<(u64, u64, &[PDFObject])> {
244 match self {
245 PDFObject::IndirectObject(n, g, data) => Some((*n, *g, data)),
246 _ => None,
247 }
248 }
249
250 /// Returns true if the object is null.
251 pub fn is_null(&self) -> bool {
252 match self {
253 PDFObject::Null => true,
254 _ => false,
255 }
256 }
257}
258
259impl Dictionary {
260 /// Creates a new dictionary with the given entries.
261 pub(crate) fn new(entries: HashMap<String, Option<PDFObject>>) -> Self {
262 Dictionary { entries }
263 }
264 /// Returns the value of the entry with the given key.
265 pub fn get(&self, key: &str)-> Option<&PDFObject> {
266 match self.entries.get(key){
267 Some(v) => v.as_ref(),
268 None => None,
269 }
270 }
271}
272
273impl XEntry {
274 pub(crate) fn new(obj_num: u64, gen_num: u64, value: u64, using: bool) -> Self {
275 XEntry {
276 obj_num,
277 gen_num,
278 using,
279 value,
280 }
281 }
282 /// Returns the object number of the entry.
283 pub fn get_obj_num(&self)->u64{
284 self.obj_num
285 }
286 /// Returns the generation number of the entry.
287 pub fn get_gen_num(&self)->u64{
288 self.gen_num
289 }
290 /// Returns true if the entry is currently being used.
291 pub fn is_using(&self) -> bool {
292 self.using
293 }
294
295 /// Returns true if the entry is freed.
296 pub fn is_freed(&self)->bool{
297 !self.using
298 }
299 /// Returns the value of the entry.
300 pub fn get_value(&self)->u64{
301 self.value
302 }
303}