hayro_syntax/object/
mod.rs

1//! Parsing and reading from PDF objects.
2
3pub use crate::object::array::Array;
4pub use crate::object::date::DateTime;
5pub use crate::object::dict::Dict;
6pub use crate::object::name::Name;
7use crate::object::name::skip_name_like;
8pub use crate::object::null::Null;
9pub use crate::object::number::Number;
10pub use crate::object::rect::Rect;
11pub use crate::object::r#ref::{MaybeRef, ObjRef};
12pub use crate::object::stream::Stream;
13pub use crate::object::string::String;
14use crate::reader::Reader;
15use crate::reader::{Readable, ReaderContext, ReaderExt, Skippable};
16use std::fmt::Debug;
17
18mod bool;
19mod date;
20mod null;
21mod number;
22mod rect;
23mod r#ref;
24mod string;
25mod tuple;
26
27pub(crate) mod indirect;
28pub(crate) mod name;
29
30pub mod array;
31pub mod dict;
32pub mod stream;
33
34/// A trait for PDF objects.
35pub(crate) trait ObjectLike<'a>: TryFrom<Object<'a>> + Readable<'a> + Debug + Clone {}
36
37/// A primitive PDF object.
38#[derive(Debug, Clone, PartialEq)]
39pub enum Object<'a> {
40    /// A null object.
41    Null(Null),
42    /// A boolean object.
43    Boolean(bool),
44    /// A number object.
45    Number(Number),
46    /// A string object.
47    String(String<'a>),
48    /// A name object.
49    Name(Name<'a>),
50    /// A dict object.
51    Dict(Dict<'a>),
52    /// An array object.
53    Array(Array<'a>),
54    /// A stream object.
55    // Can only be an indirect object in theory and thus comes with some caveats,
56    // but we just treat it the same.
57    Stream(Stream<'a>),
58}
59
60impl<'a> Object<'a> {
61    /// Try casting the object to a specific subtype.
62    pub(crate) fn cast<T>(self) -> Option<T>
63    where
64        T: ObjectLike<'a>,
65    {
66        self.try_into().ok()
67    }
68
69    /// Try casting the object to a dict.
70    #[inline(always)]
71    pub fn into_dict(self) -> Option<Dict<'a>> {
72        self.cast()
73    }
74
75    /// Try casting the object to a name.
76    #[inline(always)]
77    pub fn into_name(self) -> Option<Name<'a>> {
78        self.cast()
79    }
80
81    /// Try casting the object to the null object.
82    #[inline(always)]
83    pub fn into_null(self) -> Option<Null> {
84        self.cast()
85    }
86
87    /// Try casting the object to a bool.
88    #[inline(always)]
89    pub fn into_bool(self) -> Option<bool> {
90        self.cast()
91    }
92
93    /// Try casting the object to a string.
94    #[inline(always)]
95    pub fn into_string(self) -> Option<String<'a>> {
96        self.cast()
97    }
98
99    /// Try casting the object to a stream.
100    #[inline(always)]
101    pub fn into_stream(self) -> Option<Stream<'a>> {
102        self.cast()
103    }
104
105    /// Try casting the object to an array.
106    #[inline(always)]
107    pub fn into_array(self) -> Option<Array<'a>> {
108        self.cast()
109    }
110
111    /// Try casting the object to a u8.
112    #[inline(always)]
113    pub fn into_u8(self) -> Option<u8> {
114        self.cast()
115    }
116
117    /// Try casting the object to a u16.
118    #[inline(always)]
119    pub fn into_u16(self) -> Option<u16> {
120        self.cast()
121    }
122
123    /// Try casting the object to a f32.
124    #[inline(always)]
125    pub fn into_f32(self) -> Option<f32> {
126        self.cast()
127    }
128
129    /// Try casting the object to a i32.
130    #[inline(always)]
131    pub fn into_i32(self) -> Option<i32> {
132        self.cast()
133    }
134
135    /// Try casting the object to a number.
136    #[inline(always)]
137    pub fn into_number(self) -> Option<Number> {
138        self.cast()
139    }
140}
141
142impl<'a> ObjectLike<'a> for Object<'a> {}
143
144impl Skippable for Object<'_> {
145    fn skip(r: &mut Reader<'_>, is_content_stream: bool) -> Option<()> {
146        match r.peek_byte()? {
147            b'n' => Null::skip(r, is_content_stream),
148            b't' | b'f' => bool::skip(r, is_content_stream),
149            b'/' => Name::skip(r, is_content_stream),
150            b'<' => match r.peek_bytes(2)? {
151                // A stream can never appear in a dict/array, so it should never be skipped.
152                b"<<" => Dict::skip(r, is_content_stream),
153                _ => String::skip(r, is_content_stream),
154            },
155            b'(' => String::skip(r, is_content_stream),
156            b'.' | b'+' | b'-' | b'0'..=b'9' => Number::skip(r, is_content_stream),
157            b'[' => Array::skip(r, is_content_stream),
158            // See test case operator-in-TJ-array-0: Be lenient and skip content operators in
159            // array
160            _ => skip_name_like(r, false),
161        }
162    }
163}
164
165impl<'a> Readable<'a> for Object<'a> {
166    fn read(r: &mut Reader<'a>, ctx: &ReaderContext<'a>) -> Option<Self> {
167        let object = match r.peek_byte()? {
168            b'n' => Self::Null(Null::read(r, ctx)?),
169            b't' | b'f' => Self::Boolean(bool::read(r, ctx)?),
170            b'/' => Self::Name(Name::read(r, ctx)?),
171            b'<' => match r.peek_bytes(2)? {
172                b"<<" => {
173                    let mut cloned = r.clone();
174                    let dict = Dict::read(&mut cloned, ctx)?;
175                    cloned.skip_white_spaces_and_comments();
176
177                    if cloned.forward_tag(b"stream").is_some() {
178                        Object::Stream(Stream::read(r, ctx)?)
179                    } else {
180                        r.jump(cloned.offset());
181
182                        Object::Dict(dict)
183                    }
184                }
185                _ => Self::String(String::read(r, ctx)?),
186            },
187            b'(' => Self::String(String::read(r, ctx)?),
188            b'.' | b'+' | b'-' | b'0'..=b'9' => Self::Number(Number::read(r, ctx)?),
189            b'[' => Self::Array(Array::read(r, ctx)?),
190            // See the comment in `skip`.
191            _ => {
192                skip_name_like(r, false)?;
193                Self::Null(Null)
194            }
195        };
196
197        Some(object)
198    }
199}
200
201/// A trait for objects that can be parsed from a simple byte stream.
202pub trait FromBytes<'a>: Sized {
203    /// Try to read the object from the given bytes.
204    fn from_bytes(b: &'a [u8]) -> Option<Self>;
205}
206
207impl<'a, T: Readable<'a>> FromBytes<'a> for T {
208    fn from_bytes(b: &'a [u8]) -> Option<Self> {
209        Self::from_bytes_impl(b)
210    }
211}
212
213/// An identifier for a PDF object.
214#[derive(Copy, Clone, Debug, Eq, PartialEq, Hash, PartialOrd, Ord)]
215pub struct ObjectIdentifier {
216    pub(crate) obj_num: i32,
217    pub(crate) gen_num: i32,
218}
219
220impl ObjectIdentifier {
221    /// Create a new `ObjectIdentifier`.
222    pub fn new(obj_num: i32, gen_num: i32) -> Self {
223        Self { obj_num, gen_num }
224    }
225}
226
227impl Readable<'_> for ObjectIdentifier {
228    fn read(r: &mut Reader<'_>, _: &ReaderContext<'_>) -> Option<Self> {
229        let obj_num = r.read_without_context::<i32>()?;
230        r.skip_white_spaces_and_comments();
231        let gen_num = r.read_without_context::<i32>()?;
232        r.skip_white_spaces_and_comments();
233        r.forward_tag(b"obj")?;
234
235        Some(Self { obj_num, gen_num })
236    }
237}
238
239impl Skippable for ObjectIdentifier {
240    fn skip(r: &mut Reader<'_>, _: bool) -> Option<()> {
241        r.skip_in_content_stream::<i32>()?;
242        r.skip_white_spaces_and_comments();
243        r.skip_in_content_stream::<i32>()?;
244        r.skip_white_spaces_and_comments();
245        r.forward_tag(b"obj")?;
246
247        Some(())
248    }
249}
250
251/// A convenience function that extracts a dict and a stream from an object.
252/// If the object is just a dictionary, it will return `None` for the stream.
253/// If the object is a stream, it will return its dictionary as well as the stream
254/// itself.
255pub fn dict_or_stream<'a>(obj: &Object<'a>) -> Option<(Dict<'a>, Option<Stream<'a>>)> {
256    if let Some(stream) = obj.clone().cast::<Stream<'a>>() {
257        Some((stream.dict().clone(), Some(stream)))
258    } else {
259        obj.clone().cast::<Dict<'a>>().map(|dict| (dict, None))
260    }
261}
262
263mod macros {
264    macro_rules! object {
265        ($t:ident $(<$l:lifetime>),*, $s:ident) => {
266            impl<'a> TryFrom<Object<'a>> for $t$(<$l>),* {
267                type Error = ();
268
269                fn try_from(value: Object<'a>) -> std::result::Result<Self, Self::Error> {
270                    match value {
271                        Object::$s(b) => Ok(b),
272                        _ => Err(()),
273                    }
274                }
275            }
276
277            impl<'a> crate::object::ObjectLike<'a> for $t$(<$l>),* {}
278        };
279    }
280
281    pub(crate) use object;
282}
283
284#[cfg(test)]
285mod tests {
286    use crate::object::Object;
287    use crate::reader::Reader;
288    use crate::reader::{ReaderContext, ReaderExt};
289
290    fn object_impl(data: &[u8]) -> Option<Object<'_>> {
291        let mut r = Reader::new(data);
292        r.read_with_context::<Object<'_>>(&ReaderContext::dummy())
293    }
294
295    #[test]
296    fn null() {
297        assert!(matches!(object_impl(b"null").unwrap(), Object::Null(_)));
298    }
299
300    #[test]
301    fn bool() {
302        assert!(matches!(object_impl(b"true").unwrap(), Object::Boolean(_)));
303    }
304
305    #[test]
306    fn number() {
307        assert!(matches!(object_impl(b"34.5").unwrap(), Object::Number(_)));
308    }
309
310    #[test]
311    fn string_1() {
312        assert!(matches!(object_impl(b"(Hi)").unwrap(), Object::String(_)));
313    }
314
315    #[test]
316    fn string_2() {
317        assert!(matches!(object_impl(b"<34>").unwrap(), Object::String(_)));
318    }
319
320    #[test]
321    fn name() {
322        assert!(matches!(object_impl(b"/Name").unwrap(), Object::Name(_)));
323    }
324
325    #[test]
326    fn dict() {
327        assert!(matches!(
328            object_impl(b"<</Entry 45>>").unwrap(),
329            Object::Dict(_)
330        ));
331    }
332
333    #[test]
334    fn array() {
335        assert!(matches!(object_impl(b"[45]").unwrap(), Object::Array(_)));
336    }
337
338    #[test]
339    fn stream() {
340        assert!(matches!(
341            object_impl(b"<< /Length 3 >> stream\nabc\nendstream").unwrap(),
342            Object::Stream(_)
343        ));
344    }
345}