Skip to main content

hayro_syntax/object/
mod.rs

1//! Parsing and reading from PDF objects.
2
3pub use crate::object::array::Array;
4pub use crate::object::date::DateTime;
5pub use crate::object::dict::Dict;
6pub use crate::object::name::Name;
7use crate::object::name::skip_name_like;
8pub use crate::object::null::Null;
9pub use crate::object::number::Number;
10pub use crate::object::rect::Rect;
11pub use crate::object::r#ref::{MaybeRef, ObjRef};
12pub use crate::object::stream::Stream;
13pub use crate::object::string::String;
14use crate::reader::Reader;
15use crate::reader::{Readable, ReaderContext, ReaderExt, Skippable};
16use core::fmt::Debug;
17
18mod bool;
19mod date;
20mod null;
21mod number;
22mod rect;
23mod r#ref;
24mod string;
25mod tuple;
26
27pub(crate) mod indirect;
28pub(crate) mod name;
29
30pub mod array;
31pub mod dict;
32pub mod stream;
33
34/// A trait for PDF objects.
35pub(crate) trait ObjectLike<'a>: TryFrom<Object<'a>> + Readable<'a> + Debug + Clone {}
36
37/// A trait for PDF objects that can be borrowed directly from an [`Object`].
38pub(crate) trait ObjectRefLike<'a>: Sized {
39    fn cast_ref<'b>(obj: &'b Object<'a>) -> Option<&'b Self>;
40}
41
42/// A primitive PDF object.
43#[derive(Debug, Clone, PartialEq)]
44pub enum Object<'a> {
45    /// A null object.
46    Null(Null),
47    /// A boolean object.
48    Boolean(bool),
49    /// A number object.
50    Number(Number),
51    /// A string object.
52    String(String<'a>),
53    /// A name object.
54    Name(Name<'a>),
55    /// A dict object.
56    Dict(Dict<'a>),
57    /// An array object.
58    Array(Array<'a>),
59    /// A stream object.
60    // Can only be an indirect object in theory and thus comes with some caveats,
61    // but we just treat it the same.
62    Stream(Stream<'a>),
63}
64
65impl<'a> Object<'a> {
66    /// Try casting the object to a specific subtype.
67    pub(crate) fn cast<T>(self) -> Option<T>
68    where
69        T: ObjectLike<'a>,
70    {
71        self.try_into().ok()
72    }
73
74    /// Try casting the object to a reference of the specific type.
75    pub(crate) fn cast_ref<T>(&self) -> Option<&T>
76    where
77        T: ObjectRefLike<'a>,
78    {
79        T::cast_ref(self)
80    }
81
82    /// Try casting the object to a dict.
83    #[inline(always)]
84    pub fn into_dict(self) -> Option<Dict<'a>> {
85        self.cast()
86    }
87
88    /// Try casting the object to a name.
89    #[inline(always)]
90    pub fn into_name(self) -> Option<Name<'a>> {
91        self.cast()
92    }
93
94    /// Try casting the object to the null object.
95    #[inline(always)]
96    pub fn into_null(self) -> Option<Null> {
97        self.cast()
98    }
99
100    /// Try casting the object to a bool.
101    #[inline(always)]
102    pub fn into_bool(self) -> Option<bool> {
103        self.cast()
104    }
105
106    /// Try casting the object to a string.
107    #[inline(always)]
108    pub fn into_string(self) -> Option<String<'a>> {
109        self.cast()
110    }
111
112    /// Try casting the object to a stream.
113    #[inline(always)]
114    pub fn into_stream(self) -> Option<Stream<'a>> {
115        self.cast()
116    }
117
118    /// Try casting the object to an array.
119    #[inline(always)]
120    pub fn into_array(self) -> Option<Array<'a>> {
121        self.cast()
122    }
123
124    /// Try casting the object to a u8.
125    #[inline(always)]
126    pub fn into_u8(self) -> Option<u8> {
127        self.cast()
128    }
129
130    /// Try casting the object to a u16.
131    #[inline(always)]
132    pub fn into_u16(self) -> Option<u16> {
133        self.cast()
134    }
135
136    /// Try casting the object to a f32.
137    #[inline(always)]
138    pub fn into_f32(self) -> Option<f32> {
139        self.cast()
140    }
141
142    /// Try casting the object to a i32.
143    #[inline(always)]
144    pub fn into_i32(self) -> Option<i32> {
145        self.cast()
146    }
147
148    /// Try casting the object to a number.
149    #[inline(always)]
150    pub fn into_number(self) -> Option<Number> {
151        self.cast()
152    }
153}
154
155impl<'a> ObjectLike<'a> for Object<'a> {}
156impl<'a> ObjectRefLike<'a> for Object<'a> {
157    fn cast_ref(obj: &Self) -> Option<&Self> {
158        Some(obj)
159    }
160}
161
162impl Skippable for Object<'_> {
163    fn skip(r: &mut Reader<'_>, is_content_stream: bool) -> Option<()> {
164        match r.peek_byte()? {
165            b'n' => Null::skip(r, is_content_stream),
166            b't' | b'f' => bool::skip(r, is_content_stream),
167            b'/' => Name::skip(r, is_content_stream),
168            b'<' => match r.peek_bytes(2)? {
169                // A stream can never appear in a dict/array, so it should never be skipped.
170                b"<<" => Dict::skip(r, is_content_stream),
171                _ => String::skip(r, is_content_stream),
172            },
173            b'(' => String::skip(r, is_content_stream),
174            b'.' | b'+' | b'-' | b'0'..=b'9' => Number::skip(r, is_content_stream),
175            b'[' => Array::skip(r, is_content_stream),
176            // See test case operator-in-TJ-array-0: Be lenient and skip content operators in
177            // array
178            _ => skip_name_like(r, false),
179        }
180    }
181}
182
183impl<'a> Readable<'a> for Object<'a> {
184    fn read(r: &mut Reader<'a>, ctx: &ReaderContext<'a>) -> Option<Self> {
185        let object = match r.peek_byte()? {
186            b'n' => Self::Null(Null::read(r, ctx)?),
187            b't' | b'f' => Self::Boolean(bool::read(r, ctx)?),
188            b'/' => Self::Name(Name::read(r, ctx)?),
189            b'<' => match r.peek_bytes(2)? {
190                b"<<" => {
191                    let mut cloned = r.clone();
192                    let dict = Dict::read(&mut cloned, ctx)?;
193                    cloned.skip_white_spaces_and_comments();
194
195                    if cloned.forward_tag(b"stream").is_some() {
196                        Object::Stream(Stream::read(r, ctx)?)
197                    } else {
198                        r.jump(cloned.offset());
199
200                        Object::Dict(dict)
201                    }
202                }
203                _ => Self::String(String::read(r, ctx)?),
204            },
205            b'(' => Self::String(String::read(r, ctx)?),
206            b'.' | b'+' | b'-' | b'0'..=b'9' => Self::Number(Number::read(r, ctx)?),
207            b'[' => Self::Array(Array::read(r, ctx)?),
208            // See the comment in `skip`.
209            _ => {
210                skip_name_like(r, false)?;
211                Self::Null(Null)
212            }
213        };
214
215        Some(object)
216    }
217}
218
219/// A trait for objects that can be parsed from a simple byte stream.
220pub trait FromBytes<'a>: Sized {
221    /// Try to read the object from the given bytes.
222    fn from_bytes(b: &'a [u8]) -> Option<Self>;
223}
224
225impl<'a, T: Readable<'a>> FromBytes<'a> for T {
226    fn from_bytes(b: &'a [u8]) -> Option<Self> {
227        Self::from_bytes_impl(b)
228    }
229}
230
231/// An identifier for a PDF object.
232#[derive(Copy, Clone, Debug, Eq, PartialEq, Hash, PartialOrd, Ord)]
233pub struct ObjectIdentifier {
234    /// The object number.
235    pub obj_number: i32,
236    /// The generation number.
237    pub gen_number: i32,
238}
239
240impl ObjectIdentifier {
241    /// Create a new `ObjectIdentifier`.
242    pub fn new(obj_number: i32, gen_number: i32) -> Self {
243        Self {
244            obj_number,
245            gen_number,
246        }
247    }
248}
249
250impl Readable<'_> for ObjectIdentifier {
251    fn read(r: &mut Reader<'_>, _: &ReaderContext<'_>) -> Option<Self> {
252        let obj_number = r.read_without_context::<i32>()?;
253        r.skip_white_spaces_and_comments();
254        let gen_number = r.read_without_context::<i32>()?;
255        r.skip_white_spaces_and_comments();
256        r.forward_tag(b"obj")?;
257
258        Some(Self {
259            obj_number,
260            gen_number,
261        })
262    }
263}
264
265impl Skippable for ObjectIdentifier {
266    fn skip(r: &mut Reader<'_>, _: bool) -> Option<()> {
267        r.skip_in_content_stream::<i32>()?;
268        r.skip_white_spaces_and_comments();
269        r.skip_in_content_stream::<i32>()?;
270        r.skip_white_spaces_and_comments();
271        r.forward_tag(b"obj")?;
272
273        Some(())
274    }
275}
276
277/// A convenience function that extracts a dict and a stream from an object.
278/// If the object is just a dictionary, it will return `None` for the stream.
279/// If the object is a stream, it will return its dictionary as well as the stream
280/// itself.
281pub fn dict_or_stream<'a, 'b>(
282    obj: &'b Object<'a>,
283) -> Option<(&'b Dict<'a>, Option<&'b Stream<'a>>)> {
284    if let Some(stream) = obj.cast_ref::<Stream<'a>>() {
285        Some((stream.dict(), Some(stream)))
286    } else {
287        obj.cast_ref::<Dict<'a>>().map(|dict| (dict, None))
288    }
289}
290
291mod macros {
292    macro_rules! object {
293        ($t:ident $(<$l:lifetime>),*, $s:ident) => {
294            impl<'a> TryFrom<Object<'a>> for $t$(<$l>),* {
295                type Error = ();
296
297                fn try_from(value: Object<'a>) -> core::result::Result<Self, Self::Error> {
298                    match value {
299                        Object::$s(b) => Ok(b),
300                        _ => Err(()),
301                    }
302                }
303            }
304
305            impl<'a> crate::object::ObjectLike<'a> for $t$(<$l>),* {}
306
307            impl<'a> crate::object::ObjectRefLike<'a> for $t$(<$l>),* {
308                fn cast_ref<'b>(obj: &'b crate::object::Object<'a>) -> Option<&'b Self> {
309                    match obj {
310                        crate::object::Object::$s(b) => Some(b),
311                        _ => None,
312                    }
313                }
314            }
315        };
316    }
317
318    pub(crate) use object;
319}
320
321#[cfg(test)]
322mod tests {
323    use crate::object::{Array, Dict, Name, Null, Number, Object, Stream, String};
324    use crate::reader::Reader;
325    use crate::reader::{ReaderContext, ReaderExt};
326
327    fn object_impl(data: &[u8]) -> Option<Object<'_>> {
328        let mut r = Reader::new(data);
329        r.read_with_context::<Object<'_>>(&ReaderContext::dummy())
330    }
331
332    #[test]
333    fn null() {
334        assert!(matches!(object_impl(b"null").unwrap(), Object::Null(_)));
335    }
336
337    #[test]
338    fn bool() {
339        assert!(matches!(object_impl(b"true").unwrap(), Object::Boolean(_)));
340    }
341
342    #[test]
343    fn number() {
344        assert!(matches!(object_impl(b"34.5").unwrap(), Object::Number(_)));
345    }
346
347    #[test]
348    fn string_1() {
349        assert!(matches!(object_impl(b"(Hi)").unwrap(), Object::String(_)));
350    }
351
352    #[test]
353    fn string_2() {
354        assert!(matches!(object_impl(b"<34>").unwrap(), Object::String(_)));
355    }
356
357    #[test]
358    fn name() {
359        assert!(matches!(object_impl(b"/Name").unwrap(), Object::Name(_)));
360    }
361
362    #[test]
363    fn dict() {
364        assert!(matches!(
365            object_impl(b"<</Entry 45>>").unwrap(),
366            Object::Dict(_)
367        ));
368    }
369
370    #[test]
371    fn array() {
372        assert!(matches!(object_impl(b"[45]").unwrap(), Object::Array(_)));
373    }
374
375    #[test]
376    fn stream() {
377        assert!(matches!(
378            object_impl(b"<< /Length 3 >> stream\nabc\nendstream").unwrap(),
379            Object::Stream(_)
380        ));
381    }
382
383    #[test]
384    #[cfg(target_pointer_width = "64")]
385    fn object_sizes() {
386        assert_eq!(size_of::<Object<'_>>(), 40);
387        assert_eq!(size_of::<Array<'_>>(), 32);
388        assert_eq!(size_of::<Dict<'_>>(), 8);
389        assert_eq!(size_of::<Name<'_>>(), 32);
390        assert_eq!(size_of::<Null>(), 0);
391        assert_eq!(size_of::<Number>(), 16);
392        assert_eq!(size_of::<Stream<'_>>(), 24);
393        assert_eq!(size_of::<String<'_>>(), 32);
394    }
395}