Skip to main content

pdf_syntax/object/
mod.rs

1//! Parsing and reading from PDF objects.
2
3pub use crate::object::array::Array;
4pub use crate::object::date::DateTime;
5pub use crate::object::dict::Dict;
6pub use crate::object::name::Name;
7use crate::object::name::skip_name_like;
8pub use crate::object::null::Null;
9pub use crate::object::number::Number;
10pub use crate::object::rect::Rect;
11pub use crate::object::r#ref::{MaybeRef, ObjRef};
12pub use crate::object::stream::Stream;
13pub use crate::object::string::String;
14use crate::reader::Reader;
15use crate::reader::{Readable, ReaderContext, ReaderExt, Skippable};
16use core::fmt::Debug;
17use log::warn;
18
19/// Maximum inline nesting depth for PDF objects (Dict/Array containing Dict/Array...).
20/// Prevents stack overflow on deeply nested malicious PDFs.
21const MAX_INLINE_NESTING_DEPTH: u16 = 256;
22
23mod bool;
24mod date;
25mod null;
26mod number;
27mod rect;
28mod r#ref;
29mod string;
30mod tuple;
31
32pub(crate) mod indirect;
33pub(crate) mod name;
34
35pub mod array;
36pub mod dict;
37pub mod stream;
38
39/// A trait for PDF objects.
40pub(crate) trait ObjectLike<'a>: TryFrom<Object<'a>> + Readable<'a> + Debug + Clone {}
41
42/// A primitive PDF object.
43#[derive(Debug, Clone, PartialEq)]
44pub enum Object<'a> {
45    /// A null object.
46    Null(Null),
47    /// A boolean object.
48    Boolean(bool),
49    /// A number object.
50    Number(Number),
51    /// A string object.
52    String(String),
53    /// A name object.
54    Name(Name),
55    /// A dict object.
56    Dict(Dict<'a>),
57    /// An array object.
58    Array(Array<'a>),
59    /// A stream object.
60    // Can only be an indirect object in theory and thus comes with some caveats,
61    // but we just treat it the same.
62    Stream(Stream<'a>),
63}
64
65impl<'a> Object<'a> {
66    /// Try casting the object to a specific subtype.
67    pub(crate) fn cast<T>(self) -> Option<T>
68    where
69        T: ObjectLike<'a>,
70    {
71        self.try_into().ok()
72    }
73
74    /// Try casting the object to a dict.
75    #[inline(always)]
76    pub fn into_dict(self) -> Option<Dict<'a>> {
77        self.cast()
78    }
79
80    /// Try casting the object to a name.
81    #[inline(always)]
82    pub fn into_name(self) -> Option<Name> {
83        self.cast()
84    }
85
86    /// Try casting the object to the null object.
87    #[inline(always)]
88    pub fn into_null(self) -> Option<Null> {
89        self.cast()
90    }
91
92    /// Try casting the object to a bool.
93    #[inline(always)]
94    pub fn into_bool(self) -> Option<bool> {
95        self.cast()
96    }
97
98    /// Try casting the object to a string.
99    #[inline(always)]
100    pub fn into_string(self) -> Option<String> {
101        self.cast()
102    }
103
104    /// Try casting the object to a stream.
105    #[inline(always)]
106    pub fn into_stream(self) -> Option<Stream<'a>> {
107        self.cast()
108    }
109
110    /// Try casting the object to an array.
111    #[inline(always)]
112    pub fn into_array(self) -> Option<Array<'a>> {
113        self.cast()
114    }
115
116    /// Try casting the object to a u8.
117    #[inline(always)]
118    pub fn into_u8(self) -> Option<u8> {
119        self.cast()
120    }
121
122    /// Try casting the object to a u16.
123    #[inline(always)]
124    pub fn into_u16(self) -> Option<u16> {
125        self.cast()
126    }
127
128    /// Try casting the object to a f32.
129    #[inline(always)]
130    pub fn into_f32(self) -> Option<f32> {
131        self.cast()
132    }
133
134    /// Try casting the object to a i32.
135    #[inline(always)]
136    pub fn into_i32(self) -> Option<i32> {
137        self.cast()
138    }
139
140    /// Try casting the object to a number.
141    #[inline(always)]
142    pub fn into_number(self) -> Option<Number> {
143        self.cast()
144    }
145}
146
147impl<'a> ObjectLike<'a> for Object<'a> {}
148
149impl Skippable for Object<'_> {
150    fn skip(r: &mut Reader<'_>, is_content_stream: bool) -> Option<()> {
151        if r.nesting_depth >= MAX_INLINE_NESTING_DEPTH {
152            warn!("Inline object nesting depth exceeds {MAX_INLINE_NESTING_DEPTH}, aborting skip");
153            return None;
154        }
155        r.nesting_depth += 1;
156        let result = (|| {
157            match r.peek_byte()? {
158                b'n' => Null::skip(r, is_content_stream),
159                b't' | b'f' => bool::skip(r, is_content_stream),
160                b'/' => Name::skip(r, is_content_stream),
161                b'<' => match r.peek_bytes(2)? {
162                    // A stream can never appear in a dict/array, so it should never be skipped.
163                    b"<<" => Dict::skip(r, is_content_stream),
164                    _ => String::skip(r, is_content_stream),
165                },
166                b'(' => String::skip(r, is_content_stream),
167                b'.' | b'+' | b'-' | b'0'..=b'9' => Number::skip(r, is_content_stream),
168                b'[' => Array::skip(r, is_content_stream),
169                // See test case operator-in-TJ-array-0: Be lenient and skip content operators in
170                // array
171                _ => skip_name_like(r, false),
172            }
173        })();
174        r.nesting_depth -= 1;
175        result
176    }
177}
178
179impl<'a> Readable<'a> for Object<'a> {
180    fn read(r: &mut Reader<'a>, ctx: &ReaderContext<'a>) -> Option<Self> {
181        if r.nesting_depth >= MAX_INLINE_NESTING_DEPTH {
182            warn!("Inline object nesting depth exceeds {MAX_INLINE_NESTING_DEPTH}, aborting read");
183            return None;
184        }
185        r.nesting_depth += 1;
186        let result = (|| -> Option<Self> {
187            let object = match r.peek_byte()? {
188                b'n' => Self::Null(Null::read(r, ctx)?),
189                b't' | b'f' => Self::Boolean(bool::read(r, ctx)?),
190                b'/' => Self::Name(Name::read(r, ctx)?),
191                b'<' => match r.peek_bytes(2)? {
192                    b"<<" => {
193                        let mut cloned = r.clone();
194                        let dict = Dict::read(&mut cloned, ctx)?;
195                        cloned.skip_white_spaces_and_comments();
196
197                        if cloned.forward_tag(b"stream").is_some() {
198                            Object::Stream(Stream::read(r, ctx)?)
199                        } else {
200                            r.jump(cloned.offset());
201
202                            Object::Dict(dict)
203                        }
204                    }
205                    _ => Self::String(String::read(r, ctx)?),
206                },
207                b'(' => Self::String(String::read(r, ctx)?),
208                b'.' | b'+' | b'-' | b'0'..=b'9' => Self::Number(Number::read(r, ctx)?),
209                b'[' => Self::Array(Array::read(r, ctx)?),
210                // See the comment in `skip`.
211                _ => {
212                    skip_name_like(r, false)?;
213                    Self::Null(Null)
214                }
215            };
216
217            Some(object)
218        })();
219        r.nesting_depth -= 1;
220        result
221    }
222}
223
224/// A trait for objects that can be parsed from a simple byte stream.
225pub trait FromBytes<'a>: Sized {
226    /// Try to read the object from the given bytes.
227    fn from_bytes(b: &'a [u8]) -> Option<Self>;
228}
229
230impl<'a, T: Readable<'a>> FromBytes<'a> for T {
231    fn from_bytes(b: &'a [u8]) -> Option<Self> {
232        Self::from_bytes_impl(b)
233    }
234}
235
236/// An identifier for a PDF object.
237#[derive(Copy, Clone, Debug, Eq, PartialEq, Hash, PartialOrd, Ord)]
238pub struct ObjectIdentifier {
239    /// The object number.
240    pub obj_number: i32,
241    /// The generation number.
242    pub gen_number: i32,
243}
244
245impl ObjectIdentifier {
246    /// Create a new `ObjectIdentifier`.
247    pub fn new(obj_number: i32, gen_number: i32) -> Self {
248        Self {
249            obj_number,
250            gen_number,
251        }
252    }
253}
254
255impl Readable<'_> for ObjectIdentifier {
256    fn read(r: &mut Reader<'_>, _: &ReaderContext<'_>) -> Option<Self> {
257        let obj_number = r.read_without_context::<i32>()?;
258        r.skip_white_spaces_and_comments();
259        let gen_number = r.read_without_context::<i32>()?;
260        r.skip_white_spaces_and_comments();
261        r.forward_tag(b"obj")?;
262
263        Some(Self {
264            obj_number,
265            gen_number,
266        })
267    }
268}
269
270impl Skippable for ObjectIdentifier {
271    fn skip(r: &mut Reader<'_>, _: bool) -> Option<()> {
272        r.skip_in_content_stream::<i32>()?;
273        r.skip_white_spaces_and_comments();
274        r.skip_in_content_stream::<i32>()?;
275        r.skip_white_spaces_and_comments();
276        r.forward_tag(b"obj")?;
277
278        Some(())
279    }
280}
281
282/// A convenience function that extracts a dict and a stream from an object.
283/// If the object is just a dictionary, it will return `None` for the stream.
284/// If the object is a stream, it will return its dictionary as well as the stream
285/// itself.
286pub fn dict_or_stream<'a>(obj: &Object<'a>) -> Option<(Dict<'a>, Option<Stream<'a>>)> {
287    if let Some(stream) = obj.clone().cast::<Stream<'a>>() {
288        Some((stream.dict().clone(), Some(stream)))
289    } else {
290        obj.clone().cast::<Dict<'a>>().map(|dict| (dict, None))
291    }
292}
293
294mod macros {
295    macro_rules! object {
296        ($t:ident $(<$l:lifetime>),*, $s:ident) => {
297            impl<'a> TryFrom<Object<'a>> for $t$(<$l>),* {
298                type Error = ();
299
300                fn try_from(value: Object<'a>) -> core::result::Result<Self, Self::Error> {
301                    match value {
302                        Object::$s(b) => Ok(b),
303                        _ => Err(()),
304                    }
305                }
306            }
307
308            impl<'a> crate::object::ObjectLike<'a> for $t$(<$l>),* {}
309        };
310    }
311
312    pub(crate) use object;
313}
314
315#[cfg(test)]
316mod tests {
317    use crate::object::{Array, Dict, Name, Null, Number, Object, Stream, String};
318    use crate::reader::Reader;
319    use crate::reader::{ReaderContext, ReaderExt};
320
321    fn object_impl(data: &[u8]) -> Option<Object<'_>> {
322        let mut r = Reader::new(data);
323        r.read_with_context::<Object<'_>>(&ReaderContext::dummy())
324    }
325
326    #[test]
327    fn null() {
328        assert!(matches!(object_impl(b"null").unwrap(), Object::Null(_)));
329    }
330
331    #[test]
332    fn bool() {
333        assert!(matches!(object_impl(b"true").unwrap(), Object::Boolean(_)));
334    }
335
336    #[test]
337    fn number() {
338        assert!(matches!(object_impl(b"34.5").unwrap(), Object::Number(_)));
339    }
340
341    #[test]
342    fn string_1() {
343        assert!(matches!(object_impl(b"(Hi)").unwrap(), Object::String(_)));
344    }
345
346    #[test]
347    fn string_2() {
348        assert!(matches!(object_impl(b"<34>").unwrap(), Object::String(_)));
349    }
350
351    #[test]
352    fn name() {
353        assert!(matches!(object_impl(b"/Name").unwrap(), Object::Name(_)));
354    }
355
356    #[test]
357    fn dict() {
358        assert!(matches!(
359            object_impl(b"<</Entry 45>>").unwrap(),
360            Object::Dict(_)
361        ));
362    }
363
364    #[test]
365    fn array() {
366        assert!(matches!(object_impl(b"[45]").unwrap(), Object::Array(_)));
367    }
368
369    #[test]
370    fn stream() {
371        assert!(matches!(
372            object_impl(b"<< /Length 3 >> stream\nabc\nendstream").unwrap(),
373            Object::Stream(_)
374        ));
375    }
376
377    #[test]
378    #[cfg(target_pointer_width = "64")]
379    fn object_sizes() {
380        assert_eq!(size_of::<Object<'_>>(), 40);
381        assert_eq!(size_of::<Array<'_>>(), 32);
382        assert_eq!(size_of::<Dict<'_>>(), 8);
383        assert_eq!(size_of::<Name>(), 32);
384        assert_eq!(size_of::<Null>(), 0);
385        assert_eq!(size_of::<Number>(), 16);
386        assert_eq!(size_of::<Stream<'_>>(), 8);
387        assert_eq!(size_of::<String>(), 32);
388    }
389}