hayro_syntax/object/
mod.rs

1//! Parsing and reading from PDF objects.
2
3use crate::object::name::skip_name_like;
4use crate::reader::{Readable, Reader, ReaderContext, Skippable};
5use std::fmt::Debug;
6
7pub use crate::object::array::Array;
8pub use crate::object::dict::Dict;
9pub use crate::object::name::Name;
10pub use crate::object::null::Null;
11pub use crate::object::number::Number;
12pub use crate::object::rect::Rect;
13pub use crate::object::r#ref::{MaybeRef, ObjRef};
14pub use crate::object::stream::Stream;
15pub use crate::object::string::String;
16
17mod bool;
18mod null;
19mod number;
20mod rect;
21mod r#ref;
22mod string;
23mod tuple;
24
25pub(crate) mod indirect;
26pub(crate) mod name;
27
28pub mod array;
29pub mod dict;
30pub mod stream;
31
32/// A trait for PDF objects.
33pub(crate) trait ObjectLike<'a>: TryFrom<Object<'a>> + Readable<'a> + Debug + Clone {}
34
35/// A primitive PDF object.
36#[derive(Debug, Clone, PartialEq)]
37pub enum Object<'a> {
38    /// A null object.
39    Null(Null),
40    /// A boolean object.
41    Boolean(bool),
42    /// A number object.
43    Number(Number),
44    /// A string object.
45    String(string::String<'a>),
46    /// A name object.
47    Name(Name<'a>),
48    /// A dict object.
49    Dict(Dict<'a>),
50    /// An array object.
51    Array(Array<'a>),
52    /// A stream object.
53    // Can only be an indirect object in theory and thus comes with some caveats,
54    // but we just treat it the same.
55    Stream(Stream<'a>),
56}
57
58impl<'a> Object<'a> {
59    /// Try casting the object to a specific subtype.
60    pub(crate) fn cast<T>(self) -> Option<T>
61    where
62        T: ObjectLike<'a>,
63    {
64        self.try_into().ok()
65    }
66
67    /// Try casting the object to a dict.
68    #[inline(always)]
69    pub fn into_dict(self) -> Option<Dict<'a>> {
70        self.cast()
71    }
72
73    /// Try casting the object to a name.
74    #[inline(always)]
75    pub fn into_name(self) -> Option<Name<'a>> {
76        self.cast()
77    }
78
79    /// Try casting the object to the null object.
80    #[inline(always)]
81    pub fn into_null(self) -> Option<Null> {
82        self.cast()
83    }
84
85    /// Try casting the object to a bool.
86    #[inline(always)]
87    pub fn into_bool(self) -> Option<bool> {
88        self.cast()
89    }
90
91    /// Try casting the object to a string.
92    #[inline(always)]
93    pub fn into_string(self) -> Option<string::String<'a>> {
94        self.cast()
95    }
96
97    /// Try casting the object to a stream.
98    #[inline(always)]
99    pub fn into_stream(self) -> Option<Stream<'a>> {
100        self.cast()
101    }
102
103    /// Try casting the object to an array.
104    #[inline(always)]
105    pub fn into_array(self) -> Option<Array<'a>> {
106        self.cast()
107    }
108
109    /// Try casting the object to a u8.
110    #[inline(always)]
111    pub fn into_u8(self) -> Option<u8> {
112        self.cast()
113    }
114
115    /// Try casting the object to a u16.
116    #[inline(always)]
117    pub fn into_u16(self) -> Option<u16> {
118        self.cast()
119    }
120
121    /// Try casting the object to a f32.
122    #[inline(always)]
123    pub fn into_f32(self) -> Option<f32> {
124        self.cast()
125    }
126
127    /// Try casting the object to a i32.
128    #[inline(always)]
129    pub fn into_i32(self) -> Option<i32> {
130        self.cast()
131    }
132
133    /// Try casting the object to a number.
134    #[inline(always)]
135    pub fn into_number(self) -> Option<Number> {
136        self.cast()
137    }
138}
139
140impl<'a> ObjectLike<'a> for Object<'a> {}
141
142impl Skippable for Object<'_> {
143    fn skip(r: &mut Reader<'_>, is_content_stream: bool) -> Option<()> {
144        match r.peek_byte()? {
145            b'n' => Null::skip(r, is_content_stream),
146            b't' | b'f' => bool::skip(r, is_content_stream),
147            b'/' => Name::skip(r, is_content_stream),
148            b'<' => match r.peek_bytes(2)? {
149                // A stream can never appear in a dict/array, so it should never be skipped.
150                b"<<" => Dict::skip(r, is_content_stream),
151                _ => string::String::skip(r, is_content_stream),
152            },
153            b'(' => string::String::skip(r, is_content_stream),
154            b'.' | b'+' | b'-' | b'0'..=b'9' => Number::skip(r, is_content_stream),
155            b'[' => Array::skip(r, is_content_stream),
156            // See test case operator-in-TJ-array-0: Be lenient and skip content operators in
157            // array
158            _ => skip_name_like(r, false),
159        }
160    }
161}
162
163impl<'a> Readable<'a> for Object<'a> {
164    fn read(r: &mut Reader<'a>, ctx: &ReaderContext<'a>) -> Option<Self> {
165        let object = match r.peek_byte()? {
166            b'n' => Self::Null(Null::read(r, ctx)?),
167            b't' | b'f' => Self::Boolean(bool::read(r, ctx)?),
168            b'/' => Self::Name(Name::read(r, ctx)?),
169            b'<' => match r.peek_bytes(2)? {
170                b"<<" => {
171                    let mut cloned = r.clone();
172                    let dict = Dict::read(&mut cloned, ctx)?;
173                    cloned.skip_white_spaces_and_comments();
174
175                    if cloned.forward_tag(b"stream").is_some() {
176                        Object::Stream(Stream::read(r, ctx)?)
177                    } else {
178                        r.jump(cloned.offset());
179
180                        Object::Dict(dict)
181                    }
182                }
183                _ => Self::String(string::String::read(r, ctx)?),
184            },
185            b'(' => Self::String(string::String::read(r, ctx)?),
186            b'.' | b'+' | b'-' | b'0'..=b'9' => Self::Number(Number::read(r, ctx)?),
187            b'[' => Self::Array(Array::read(r, ctx)?),
188            // See the comment in `skip`.
189            _ => {
190                skip_name_like(r, false)?;
191                Self::Null(Null)
192            }
193        };
194
195        Some(object)
196    }
197}
198
199/// A trait for objects that can be parsed from a simple byte stream.
200pub trait FromBytes<'a>: Sized {
201    /// Try to read the object from the given bytes.
202    fn from_bytes(b: &'a [u8]) -> Option<Self>;
203}
204
205impl<'a, T: Readable<'a>> FromBytes<'a> for T {
206    fn from_bytes(b: &'a [u8]) -> Option<Self> {
207        Self::from_bytes_impl(b)
208    }
209}
210
211/// An identifier for a PDF object.
212#[derive(Copy, Clone, Debug, Eq, PartialEq, Hash)]
213pub struct ObjectIdentifier {
214    pub(crate) obj_num: i32,
215    pub(crate) gen_num: i32,
216}
217
218impl ObjectIdentifier {
219    /// Create a new `ObjectIdentifier`.
220    pub fn new(obj_num: i32, gen_num: i32) -> Self {
221        Self { obj_num, gen_num }
222    }
223}
224
225impl Readable<'_> for ObjectIdentifier {
226    fn read(r: &mut Reader<'_>, _: &ReaderContext) -> Option<Self> {
227        let obj_num = r.read_without_context::<i32>()?;
228        r.skip_white_spaces_and_comments();
229        let gen_num = r.read_without_context::<i32>()?;
230        r.skip_white_spaces_and_comments();
231        r.forward_tag(b"obj")?;
232
233        Some(ObjectIdentifier { obj_num, gen_num })
234    }
235}
236
237impl Skippable for ObjectIdentifier {
238    fn skip(r: &mut Reader<'_>, _: bool) -> Option<()> {
239        r.skip_in_content_stream::<i32>()?;
240        r.skip_white_spaces_and_comments();
241        r.skip_in_content_stream::<i32>()?;
242        r.skip_white_spaces_and_comments();
243        r.forward_tag(b"obj")?;
244
245        Some(())
246    }
247}
248
249/// A convenience function that extracts a dict and a stream from an object.
250/// If the object is just a dictionary, it will return `None` for the stream.
251/// If the object is a stream, it will return its dictionary as well as the stream
252/// itself.
253pub fn dict_or_stream<'a>(obj: &Object<'a>) -> Option<(Dict<'a>, Option<Stream<'a>>)> {
254    if let Some(stream) = obj.clone().cast::<Stream>() {
255        Some((stream.dict().clone(), Some(stream)))
256    } else {
257        obj.clone().cast::<Dict>().map(|dict| (dict, None))
258    }
259}
260
261mod macros {
262    macro_rules! object {
263        ($t:ident $(<$l:lifetime>),*, $s:ident) => {
264            impl<'a> TryFrom<Object<'a>> for $t$(<$l>),* {
265                type Error = ();
266
267                fn try_from(value: Object<'a>) -> std::result::Result<Self, Self::Error> {
268                    match value {
269                        Object::$s(b) => Ok(b),
270                        _ => Err(()),
271                    }
272                }
273            }
274
275            impl<'a> crate::object::ObjectLike<'a> for $t$(<$l>),* {}
276        };
277    }
278
279    pub(crate) use object;
280}
281
282#[cfg(test)]
283mod tests {
284    use crate::object::Object;
285    use crate::reader::{Reader, ReaderContext};
286
287    fn object_impl(data: &[u8]) -> Option<Object<'_>> {
288        let mut r = Reader::new(data);
289        r.read_with_context::<Object>(&ReaderContext::dummy())
290    }
291
292    #[test]
293    fn null() {
294        assert!(matches!(object_impl(b"null").unwrap(), Object::Null(_)))
295    }
296
297    #[test]
298    fn bool() {
299        assert!(matches!(object_impl(b"true").unwrap(), Object::Boolean(_)))
300    }
301
302    #[test]
303    fn number() {
304        assert!(matches!(object_impl(b"34.5").unwrap(), Object::Number(_)))
305    }
306
307    #[test]
308    fn string_1() {
309        assert!(matches!(object_impl(b"(Hi)").unwrap(), Object::String(_)))
310    }
311
312    #[test]
313    fn string_2() {
314        assert!(matches!(object_impl(b"<34>").unwrap(), Object::String(_)))
315    }
316
317    #[test]
318    fn name() {
319        assert!(matches!(object_impl(b"/Name").unwrap(), Object::Name(_)))
320    }
321
322    #[test]
323    fn dict() {
324        assert!(matches!(
325            object_impl(b"<</Entry 45>>").unwrap(),
326            Object::Dict(_)
327        ))
328    }
329
330    #[test]
331    fn array() {
332        assert!(matches!(object_impl(b"[45]").unwrap(), Object::Array(_)))
333    }
334
335    #[test]
336    fn stream() {
337        assert!(matches!(
338            object_impl(b"<< /Length 3 >> stream\nabc\nendstream").unwrap(),
339            Object::Stream(_)
340        ))
341    }
342}