hayro_syntax/object/
mod.rs

1//! Parsing and reading from PDF objects.
2
3use crate::object::array::Array;
4use crate::object::dict::Dict;
5use crate::object::name::{Name, skip_name_like};
6use crate::object::null::Null;
7use crate::object::number::Number;
8use crate::object::stream::Stream;
9use crate::reader::{Readable, Reader, Skippable};
10use crate::xref::XRef;
11use std::fmt::Debug;
12
13pub mod array;
14pub mod bool;
15pub mod dict;
16pub(crate) mod indirect;
17pub mod name;
18pub mod null;
19pub mod number;
20pub mod rect;
21pub mod r#ref;
22pub mod stream;
23pub mod string;
24mod tuple;
25
26/// A trait for PDF objects.
27pub(crate) trait ObjectLike<'a>: TryFrom<Object<'a>> + Readable<'a> + Debug + Clone {}
28
29/// A primitive PDF object.
30#[derive(Debug, Clone, PartialEq)]
31pub enum Object<'a> {
32    /// A null object.
33    Null(Null),
34    /// A boolean object.
35    Boolean(bool),
36    /// A number object.
37    Number(Number),
38    /// A string object.
39    String(string::String<'a>),
40    /// A name object.
41    Name(Name<'a>),
42    /// A dict object.
43    Dict(Dict<'a>),
44    /// An array object.
45    Array(Array<'a>),
46    /// A stream object.
47    // Can only be an indirect object in theory and thus comes with some caveats,
48    // but we just treat it the same.
49    Stream(Stream<'a>),
50}
51
52impl<'a> Object<'a> {
53    /// Try casting the object to a specific subtype.
54    pub(crate) fn cast<T>(self) -> Option<T>
55    where
56        T: ObjectLike<'a>,
57    {
58        self.try_into().ok()
59    }
60
61    /// Try casting the object to a dict.
62    #[inline(always)]
63    pub fn into_dict(self) -> Option<Dict<'a>> {
64        self.cast()
65    }
66
67    /// Try casting the object to a name.
68    #[inline(always)]
69    pub fn into_name(self) -> Option<Name<'a>> {
70        self.cast()
71    }
72
73    /// Try casting the object to the null object.
74    #[inline(always)]
75    pub fn into_null(self) -> Option<Null> {
76        self.cast()
77    }
78
79    /// Try casting the object to a bool.
80    #[inline(always)]
81    pub fn into_bool(self) -> Option<bool> {
82        self.cast()
83    }
84
85    /// Try casting the object to a string.
86    #[inline(always)]
87    pub fn into_string(self) -> Option<string::String<'a>> {
88        self.cast()
89    }
90
91    /// Try casting the object to a stream.
92    #[inline(always)]
93    pub fn into_stream(self) -> Option<Stream<'a>> {
94        self.cast()
95    }
96
97    /// Try casting the object to an array.
98    #[inline(always)]
99    pub fn into_array(self) -> Option<Array<'a>> {
100        self.cast()
101    }
102
103    /// Try casting the object to a u8.
104    #[inline(always)]
105    pub fn into_u8(self) -> Option<u8> {
106        self.cast()
107    }
108
109    /// Try casting the object to a u16.
110    #[inline(always)]
111    pub fn into_u16(self) -> Option<u16> {
112        self.cast()
113    }
114
115    /// Try casting the object to a f32.
116    #[inline(always)]
117    pub fn into_f32(self) -> Option<f32> {
118        self.cast()
119    }
120
121    /// Try casting the object to a i32.
122    #[inline(always)]
123    pub fn into_i32(self) -> Option<i32> {
124        self.cast()
125    }
126
127    /// Try casting the object to a number.
128    #[inline(always)]
129    pub fn into_number(self) -> Option<Number> {
130        self.cast()
131    }
132}
133
134impl<'a> ObjectLike<'a> for Object<'a> {}
135
136impl Skippable for Object<'_> {
137    fn skip<const PLAIN: bool>(r: &mut Reader<'_>) -> Option<()> {
138        match r.peek_byte()? {
139            b'n' => Null::skip::<PLAIN>(r),
140            b't' | b'f' => bool::skip::<PLAIN>(r),
141            b'/' => Name::skip::<PLAIN>(r),
142            b'<' => match r.peek_bytes(2)? {
143                // A stream can never appear in a dict/array, so it should never be skipped.
144                b"<<" => Dict::skip::<PLAIN>(r),
145                _ => string::String::skip::<PLAIN>(r),
146            },
147            b'(' => string::String::skip::<PLAIN>(r),
148            b'.' | b'+' | b'-' | b'0'..=b'9' => Number::skip::<PLAIN>(r),
149            b'[' => Array::skip::<PLAIN>(r),
150            // See test case operator-in-TJ-array-0: Be lenient and skip content operators in
151            // array
152            _ => skip_name_like(r, false),
153        }
154    }
155}
156
157impl<'a> Readable<'a> for Object<'a> {
158    fn read<const PLAIN: bool>(r: &mut Reader<'a>, xref: &'a XRef) -> Option<Self> {
159        let object = match r.peek_byte()? {
160            b'n' => Self::Null(Null::read::<PLAIN>(r, xref)?),
161            b't' | b'f' => Self::Boolean(bool::read::<PLAIN>(r, xref)?),
162            b'/' => Self::Name(Name::read::<PLAIN>(r, xref)?),
163            b'<' => match r.peek_bytes(2)? {
164                b"<<" => {
165                    let mut cloned = r.clone();
166                    let dict = Dict::read::<PLAIN>(&mut cloned, xref)?;
167                    cloned.skip_white_spaces_and_comments();
168
169                    if cloned.forward_tag(b"stream").is_some() {
170                        Object::Stream(Stream::read::<PLAIN>(r, xref)?)
171                    } else {
172                        r.jump(cloned.offset());
173
174                        Object::Dict(dict)
175                    }
176                }
177                _ => Self::String(string::String::read::<PLAIN>(r, xref)?),
178            },
179            b'(' => Self::String(string::String::read::<PLAIN>(r, xref)?),
180            b'.' | b'+' | b'-' | b'0'..=b'9' => Self::Number(Number::read::<PLAIN>(r, xref)?),
181            b'[' => Self::Array(Array::read::<PLAIN>(r, xref)?),
182            // See the comment in `skip`.
183            _ => {
184                skip_name_like(r, false)?;
185                Self::Null(Null)
186            }
187        };
188
189        Some(object)
190    }
191}
192
193/// An identifier for a PDF object.
194#[derive(Copy, Clone, Debug, Eq, PartialEq, Hash)]
195pub struct ObjectIdentifier {
196    pub(crate) obj_num: i32,
197    pub(crate) gen_num: i32,
198}
199
200impl ObjectIdentifier {
201    /// Create a new `ObjectIdentifier`.
202    pub fn new(obj_num: i32, gen_num: i32) -> Self {
203        Self { obj_num, gen_num }
204    }
205}
206
207impl Readable<'_> for ObjectIdentifier {
208    fn read<const PLAIN: bool>(r: &mut Reader<'_>, _: &XRef) -> Option<Self> {
209        let obj_num = r.read_without_xref::<i32>()?;
210        r.skip_white_spaces_and_comments();
211        let gen_num = r.read_without_xref::<i32>()?;
212        r.skip_white_spaces_and_comments();
213        r.forward_tag(b"obj")?;
214
215        Some(ObjectIdentifier { obj_num, gen_num })
216    }
217}
218
219impl Skippable for ObjectIdentifier {
220    fn skip<const PLAIN: bool>(r: &mut Reader<'_>) -> Option<()> {
221        r.skip_plain::<i32>()?;
222        r.skip_white_spaces_and_comments();
223        r.skip_plain::<i32>()?;
224        r.skip_white_spaces_and_comments();
225        r.forward_tag(b"obj")?;
226
227        Some(())
228    }
229}
230
231/// A convenience function that extracts a dict and a stream from an object.
232/// If the object is just a dictionary, it will return `None` for the stream.
233/// If the object is a stream, it will return it's dictionary as well as the stream
234/// itself.
235pub fn dict_or_stream<'a>(obj: &Object<'a>) -> Option<(Dict<'a>, Option<Stream<'a>>)> {
236    if let Some(stream) = obj.clone().cast::<Stream>() {
237        Some((stream.dict().clone(), Some(stream)))
238    } else if let Some(dict) = obj.clone().cast::<Dict>() {
239        Some((dict, None))
240    } else {
241        None
242    }
243}
244
245mod macros {
246    macro_rules! object {
247        ($t:ident $(<$l:lifetime>),*, $s:ident) => {
248            impl<'a> TryFrom<Object<'a>> for $t$(<$l>),* {
249                type Error = ();
250
251                fn try_from(value: Object<'a>) -> std::result::Result<Self, Self::Error> {
252                    match value {
253                        Object::$s(b) => Ok(b),
254                        _ => Err(()),
255                    }
256                }
257            }
258
259            impl<'a> crate::object::ObjectLike<'a> for $t$(<$l>),* {}
260        };
261    }
262
263    pub(crate) use object;
264}
265
266#[cfg(test)]
267mod tests {
268    use crate::object::Object;
269    use crate::reader::Reader;
270    use crate::xref::XRef;
271
272    fn object_impl(data: &[u8]) -> Option<Object> {
273        let mut r = Reader::new(data);
274        r.read_with_xref::<Object>(&XRef::dummy())
275    }
276
277    #[test]
278    fn null() {
279        assert!(matches!(object_impl(b"null").unwrap(), Object::Null(_)))
280    }
281
282    #[test]
283    fn bool() {
284        assert!(matches!(object_impl(b"true").unwrap(), Object::Boolean(_)))
285    }
286
287    #[test]
288    fn number() {
289        assert!(matches!(object_impl(b"34.5").unwrap(), Object::Number(_)))
290    }
291
292    #[test]
293    fn string_1() {
294        assert!(matches!(object_impl(b"(Hi)").unwrap(), Object::String(_)))
295    }
296
297    #[test]
298    fn string_2() {
299        assert!(matches!(object_impl(b"<34>").unwrap(), Object::String(_)))
300    }
301
302    #[test]
303    fn name() {
304        assert!(matches!(object_impl(b"/Name").unwrap(), Object::Name(_)))
305    }
306
307    #[test]
308    fn dict() {
309        assert!(matches!(
310            object_impl(b"<</Entry 45>>").unwrap(),
311            Object::Dict(_)
312        ))
313    }
314
315    #[test]
316    fn array() {
317        assert!(matches!(object_impl(b"[45]").unwrap(), Object::Array(_)))
318    }
319
320    #[test]
321    fn stream() {
322        assert!(matches!(
323            object_impl(b"<< /Length 3 >> stream\nabc\nendstream").unwrap(),
324            Object::Stream(_)
325        ))
326    }
327}