hayro_syntax/
reader.rs

1//! Reading bytes and PDF objects from data.
2
3use crate::object::ObjectIdentifier;
4use crate::trivia::{Comment, is_eol_character, is_white_space_character};
5use crate::xref::XRef;
6use smallvec::{SmallVec, smallvec};
7
8pub use crate::byte_reader::Reader;
9
10/// Extension trait for the `Reader` struct.
11pub trait ReaderExt<'a> {
12    fn read<T: Readable<'a>>(&mut self, ctx: &ReaderContext<'a>) -> Option<T>;
13    fn read_with_context<T: Readable<'a>>(&mut self, ctx: &ReaderContext<'a>) -> Option<T>;
14    fn read_without_context<T: Readable<'a>>(&mut self) -> Option<T>;
15    fn skip<T: Skippable>(&mut self, is_content_stream: bool) -> Option<&'a [u8]>;
16    fn skip_not_in_content_stream<T: Skippable>(&mut self) -> Option<&'a [u8]>;
17    fn skip_in_content_stream<T: Skippable>(&mut self) -> Option<&'a [u8]>;
18    fn skip_white_spaces(&mut self);
19    fn read_white_space(&mut self) -> Option<()>;
20    fn skip_eol_characters(&mut self);
21    fn skip_white_spaces_and_comments(&mut self);
22}
23
24impl<'a> ReaderExt<'a> for Reader<'a> {
25    // Note: If `PLAIN` is true, it means that the data we are about to read _might_ contain
26    // an object reference instead of an actual object. if `PLAIN` is false, then an object
27    // reference cannot occur. The main reason we make this distinction is that when parsing
28    // a number, we cannot unambiguously distinguish whether it's a real number or the
29    // start of an object reference. In content streams, object references cannot appear,
30    // so in order to speed this up we set `PLAIN` to false, meaning that as soon as we
31    // encounter a number we know it's a number, and don't need to do a look-ahead to ensure
32    // that it's not an object reference.
33    #[inline]
34    fn read<T: Readable<'a>>(&mut self, ctx: &ReaderContext<'a>) -> Option<T> {
35        let old_offset = self.offset;
36
37        T::read(self, ctx).or_else(|| {
38            self.offset = old_offset;
39
40            None
41        })
42    }
43
44    #[inline]
45    fn read_with_context<T: Readable<'a>>(&mut self, ctx: &ReaderContext<'a>) -> Option<T> {
46        self.read::<T>(ctx)
47    }
48
49    #[inline]
50    fn read_without_context<T: Readable<'a>>(&mut self) -> Option<T> {
51        self.read::<T>(&ReaderContext::new(XRef::dummy(), true))
52    }
53
54    #[inline]
55    fn skip<T: Skippable>(&mut self, is_content_stream: bool) -> Option<&'a [u8]> {
56        let old_offset = self.offset;
57
58        T::skip(self, is_content_stream).or_else(|| {
59            self.offset = old_offset;
60            None
61        })?;
62
63        self.data.get(old_offset..self.offset)
64    }
65
66    #[inline]
67    fn skip_not_in_content_stream<T: Skippable>(&mut self) -> Option<&'a [u8]> {
68        self.skip::<T>(false)
69    }
70
71    #[inline]
72    fn skip_in_content_stream<T: Skippable>(&mut self) -> Option<&'a [u8]> {
73        self.skip::<T>(false)
74    }
75
76    #[inline]
77    fn skip_white_spaces(&mut self) {
78        while let Some(b) = self.peek_byte() {
79            if is_white_space_character(b) {
80                self.forward();
81            } else {
82                return;
83            }
84        }
85    }
86
87    #[inline]
88    fn read_white_space(&mut self) -> Option<()> {
89        if self.peek_byte()?.is_ascii_whitespace() {
90            let w = self.read_byte()?;
91
92            if w == b'\r' && self.peek_byte().is_some_and(|b| b == b'\n') {
93                self.read_byte()?;
94            }
95
96            return Some(());
97        }
98
99        None
100    }
101
102    #[inline]
103    fn skip_eol_characters(&mut self) {
104        while let Some(b) = self.peek_byte() {
105            if is_eol_character(b) {
106                self.forward();
107            } else {
108                return;
109            }
110        }
111    }
112
113    #[inline]
114    fn skip_white_spaces_and_comments(&mut self) {
115        while let Some(b) = self.peek_byte() {
116            if is_white_space_character(b) {
117                self.skip_white_spaces();
118            } else if b == b'%' {
119                Comment::skip(self, true);
120            } else {
121                return;
122            }
123        }
124    }
125}
126
127#[derive(Clone, Debug)]
128pub struct ReaderContext<'a> {
129    pub(crate) xref: &'a XRef,
130    pub(crate) in_content_stream: bool,
131    pub(crate) in_object_stream: bool,
132    pub(crate) obj_number: Option<ObjectIdentifier>,
133    pub(crate) parent_chain: SmallVec<[ObjectIdentifier; 4]>,
134}
135
136impl<'a> ReaderContext<'a> {
137    pub(crate) fn new(xref: &'a XRef, in_content_stream: bool) -> Self {
138        Self {
139            xref,
140            in_content_stream,
141            obj_number: None,
142            in_object_stream: false,
143            parent_chain: smallvec![],
144        }
145    }
146
147    pub fn dummy() -> Self {
148        Self::new(XRef::dummy(), false)
149    }
150}
151
152pub trait Readable<'a>: Sized {
153    fn read(r: &mut Reader<'a>, ctx: &ReaderContext<'a>) -> Option<Self>;
154    fn from_bytes_impl(b: &'a [u8]) -> Option<Self> {
155        let mut r = Reader::new(b);
156        let xref = XRef::dummy();
157
158        Self::read(&mut r, &ReaderContext::new(xref, false))
159    }
160}
161
162pub trait Skippable {
163    fn skip(r: &mut Reader<'_>, is_content_stream: bool) -> Option<()>;
164}