Skip to main content

folio_cos/
object.rs

1//! PDF object types.
2//!
3//! PDF has 8 basic object types: null, boolean, integer, real, name, string,
4//! array, dictionary, and stream. Objects can be "indirect" (referenced by
5//! object number and generation number).
6
7use indexmap::IndexMap;
8use std::fmt;
9
10/// A unique identifier for an indirect object (object number + generation number).
11#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash, PartialOrd, Ord)]
12pub struct ObjectId {
13    pub num: u32,
14    pub gen_num: u16,
15}
16
17impl ObjectId {
18    pub fn new(num: u32, gen_num: u16) -> Self {
19        Self { num, gen_num }
20    }
21}
22
23impl fmt::Display for ObjectId {
24    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
25        write!(f, "{} {} R", self.num, self.gen_num)
26    }
27}
28
29/// A PDF object.
30///
31/// This enum represents all 8 PDF object types plus an indirect reference.
32/// Stream objects are represented separately as they contain both a dictionary
33/// and binary data.
34#[derive(Debug, Clone)]
35pub enum PdfObject {
36    /// The null object.
37    Null,
38    /// A boolean value.
39    Bool(bool),
40    /// An integer value (PDF integers are at least 32-bit).
41    Integer(i64),
42    /// A real (floating-point) value.
43    Real(f64),
44    /// A name object (e.g., /Type, /Page). Stored without the leading '/'.
45    Name(Vec<u8>),
46    /// A string object (literal or hex string).
47    Str(Vec<u8>),
48    /// An array of objects.
49    Array(Vec<PdfObject>),
50    /// A dictionary mapping name keys to object values.
51    Dict(IndexMap<Vec<u8>, PdfObject>),
52    /// A reference to an indirect object.
53    Reference(ObjectId),
54    /// A stream object (dictionary + binary data).
55    /// The data is stored in decoded (uncompressed) form when possible.
56    Stream(PdfStream),
57}
58
59/// A PDF stream object: dictionary + binary data.
60#[derive(Debug, Clone)]
61pub struct PdfStream {
62    /// The stream dictionary (contains /Length, /Filter, etc.)
63    pub dict: IndexMap<Vec<u8>, PdfObject>,
64    /// The raw (possibly still encoded) stream data.
65    pub data: Vec<u8>,
66    /// Whether `data` has been decoded (filters applied).
67    pub decoded: bool,
68}
69
70impl PdfObject {
71    // --- Type checking ---
72
73    pub fn is_null(&self) -> bool {
74        matches!(self, PdfObject::Null)
75    }
76
77    pub fn is_bool(&self) -> bool {
78        matches!(self, PdfObject::Bool(_))
79    }
80
81    pub fn is_integer(&self) -> bool {
82        matches!(self, PdfObject::Integer(_))
83    }
84
85    pub fn is_number(&self) -> bool {
86        matches!(self, PdfObject::Integer(_) | PdfObject::Real(_))
87    }
88
89    pub fn is_name(&self) -> bool {
90        matches!(self, PdfObject::Name(_))
91    }
92
93    pub fn is_string(&self) -> bool {
94        matches!(self, PdfObject::Str(_))
95    }
96
97    pub fn is_array(&self) -> bool {
98        matches!(self, PdfObject::Array(_))
99    }
100
101    pub fn is_dict(&self) -> bool {
102        matches!(self, PdfObject::Dict(_))
103    }
104
105    pub fn is_stream(&self) -> bool {
106        matches!(self, PdfObject::Stream(_))
107    }
108
109    pub fn is_reference(&self) -> bool {
110        matches!(self, PdfObject::Reference(_))
111    }
112
113    // --- Value extraction ---
114
115    pub fn as_bool(&self) -> Option<bool> {
116        match self {
117            PdfObject::Bool(b) => Some(*b),
118            _ => None,
119        }
120    }
121
122    pub fn as_i64(&self) -> Option<i64> {
123        match self {
124            PdfObject::Integer(n) => Some(*n),
125            PdfObject::Real(n) => Some(*n as i64),
126            _ => None,
127        }
128    }
129
130    pub fn as_f64(&self) -> Option<f64> {
131        match self {
132            PdfObject::Integer(n) => Some(*n as f64),
133            PdfObject::Real(n) => Some(*n),
134            _ => None,
135        }
136    }
137
138    pub fn as_name(&self) -> Option<&[u8]> {
139        match self {
140            PdfObject::Name(n) => Some(n),
141            _ => None,
142        }
143    }
144
145    /// Get name as a UTF-8 string (lossy).
146    pub fn as_name_str(&self) -> Option<String> {
147        self.as_name()
148            .map(|n| String::from_utf8_lossy(n).into_owned())
149    }
150
151    pub fn as_str(&self) -> Option<&[u8]> {
152        match self {
153            PdfObject::Str(s) => Some(s),
154            _ => None,
155        }
156    }
157
158    pub fn as_array(&self) -> Option<&[PdfObject]> {
159        match self {
160            PdfObject::Array(a) => Some(a),
161            _ => None,
162        }
163    }
164
165    pub fn as_array_mut(&mut self) -> Option<&mut Vec<PdfObject>> {
166        match self {
167            PdfObject::Array(a) => Some(a),
168            _ => None,
169        }
170    }
171
172    pub fn as_dict(&self) -> Option<&IndexMap<Vec<u8>, PdfObject>> {
173        match self {
174            PdfObject::Dict(d) => Some(d),
175            PdfObject::Stream(s) => Some(&s.dict),
176            _ => None,
177        }
178    }
179
180    pub fn as_dict_mut(&mut self) -> Option<&mut IndexMap<Vec<u8>, PdfObject>> {
181        match self {
182            PdfObject::Dict(d) => Some(d),
183            PdfObject::Stream(s) => Some(&mut s.dict),
184            _ => None,
185        }
186    }
187
188    pub fn as_reference(&self) -> Option<ObjectId> {
189        match self {
190            PdfObject::Reference(id) => Some(*id),
191            _ => None,
192        }
193    }
194
195    pub fn as_stream(&self) -> Option<&PdfStream> {
196        match self {
197            PdfObject::Stream(s) => Some(s),
198            _ => None,
199        }
200    }
201
202    // --- Dictionary helpers ---
203
204    /// Look up a key in a dictionary (or stream dictionary).
205    pub fn dict_get(&self, key: &[u8]) -> Option<&PdfObject> {
206        self.as_dict()?.get(key)
207    }
208
209    /// Look up a key and get it as an integer.
210    pub fn dict_get_i64(&self, key: &[u8]) -> Option<i64> {
211        self.dict_get(key)?.as_i64()
212    }
213
214    /// Look up a key and get it as a float.
215    pub fn dict_get_f64(&self, key: &[u8]) -> Option<f64> {
216        self.dict_get(key)?.as_f64()
217    }
218
219    /// Look up a key and get it as a name.
220    pub fn dict_get_name(&self, key: &[u8]) -> Option<&[u8]> {
221        self.dict_get(key)?.as_name()
222    }
223
224    /// Look up a key and get it as a name string.
225    pub fn dict_get_name_str(&self, key: &[u8]) -> Option<String> {
226        self.dict_get(key)?.as_name_str()
227    }
228
229    /// Look up a key and get it as a boolean.
230    pub fn dict_get_bool(&self, key: &[u8]) -> Option<bool> {
231        self.dict_get(key)?.as_bool()
232    }
233}
234
235impl fmt::Display for PdfObject {
236    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
237        match self {
238            PdfObject::Null => write!(f, "null"),
239            PdfObject::Bool(b) => write!(f, "{}", if *b { "true" } else { "false" }),
240            PdfObject::Integer(n) => write!(f, "{}", n),
241            PdfObject::Real(n) => write!(f, "{}", n),
242            PdfObject::Name(n) => write!(f, "/{}", String::from_utf8_lossy(n)),
243            PdfObject::Str(s) => write!(f, "({})", String::from_utf8_lossy(s)),
244            PdfObject::Array(a) => {
245                write!(f, "[")?;
246                for (i, obj) in a.iter().enumerate() {
247                    if i > 0 {
248                        write!(f, " ")?;
249                    }
250                    write!(f, "{}", obj)?;
251                }
252                write!(f, "]")
253            }
254            PdfObject::Dict(d) => {
255                write!(f, "<< ")?;
256                for (k, v) in d {
257                    write!(f, "/{} {} ", String::from_utf8_lossy(k), v)?;
258                }
259                write!(f, ">>")
260            }
261            PdfObject::Reference(id) => write!(f, "{}", id),
262            PdfObject::Stream(s) => {
263                write!(f, "<< ")?;
264                for (k, v) in &s.dict {
265                    write!(f, "/{} {} ", String::from_utf8_lossy(k), v)?;
266                }
267                write!(f, ">> stream[{}bytes]", s.data.len())
268            }
269        }
270    }
271}
272
273impl Default for PdfObject {
274    fn default() -> Self {
275        PdfObject::Null
276    }
277}
278
279impl PartialEq for PdfObject {
280    fn eq(&self, other: &Self) -> bool {
281        match (self, other) {
282            (PdfObject::Null, PdfObject::Null) => true,
283            (PdfObject::Bool(a), PdfObject::Bool(b)) => a == b,
284            (PdfObject::Integer(a), PdfObject::Integer(b)) => a == b,
285            (PdfObject::Real(a), PdfObject::Real(b)) => a == b,
286            (PdfObject::Name(a), PdfObject::Name(b)) => a == b,
287            (PdfObject::Str(a), PdfObject::Str(b)) => a == b,
288            (PdfObject::Array(a), PdfObject::Array(b)) => a == b,
289            (PdfObject::Reference(a), PdfObject::Reference(b)) => a == b,
290            // Dicts and streams compare by keys/values
291            (PdfObject::Dict(a), PdfObject::Dict(b)) => a == b,
292            _ => false,
293        }
294    }
295}
296
297#[cfg(test)]
298mod tests {
299    use super::*;
300
301    #[test]
302    fn test_object_types() {
303        assert!(PdfObject::Null.is_null());
304        assert!(PdfObject::Bool(true).is_bool());
305        assert!(PdfObject::Integer(42).is_integer());
306        assert!(PdfObject::Integer(42).is_number());
307        assert!(PdfObject::Real(3.14).is_number());
308        assert!(PdfObject::Name(b"Type".to_vec()).is_name());
309        assert!(PdfObject::Str(b"hello".to_vec()).is_string());
310        assert!(PdfObject::Array(vec![]).is_array());
311        assert!(PdfObject::Dict(IndexMap::new()).is_dict());
312        assert!(PdfObject::Reference(ObjectId::new(1, 0)).is_reference());
313    }
314
315    #[test]
316    fn test_value_extraction() {
317        assert_eq!(PdfObject::Bool(true).as_bool(), Some(true));
318        assert_eq!(PdfObject::Integer(42).as_i64(), Some(42));
319        assert_eq!(PdfObject::Integer(42).as_f64(), Some(42.0));
320        assert_eq!(PdfObject::Real(3.14).as_f64(), Some(3.14));
321        assert_eq!(
322            PdfObject::Name(b"Type".to_vec()).as_name(),
323            Some(b"Type".as_slice())
324        );
325    }
326
327    #[test]
328    fn test_dict_helpers() {
329        let mut dict = IndexMap::new();
330        dict.insert(b"Type".to_vec(), PdfObject::Name(b"Page".to_vec()));
331        dict.insert(b"Count".to_vec(), PdfObject::Integer(5));
332        let obj = PdfObject::Dict(dict);
333
334        assert_eq!(obj.dict_get_name_str(b"Type"), Some("Page".to_string()));
335        assert_eq!(obj.dict_get_i64(b"Count"), Some(5));
336        assert_eq!(obj.dict_get(b"Missing"), None);
337    }
338
339    #[test]
340    fn test_display() {
341        assert_eq!(format!("{}", PdfObject::Null), "null");
342        assert_eq!(format!("{}", PdfObject::Bool(true)), "true");
343        assert_eq!(format!("{}", PdfObject::Integer(42)), "42");
344        assert_eq!(format!("{}", PdfObject::Name(b"Type".to_vec())), "/Type");
345        assert_eq!(
346            format!("{}", PdfObject::Reference(ObjectId::new(3, 0))),
347            "3 0 R"
348        );
349    }
350}