hayro_syntax/
reader.rs

1//! Reading bytes and PDF objects from data.
2
3use crate::trivia::{Comment, is_eol_character, is_white_space_character};
4use crate::xref::XRef;
5use std::ops::Range;
6
7/// A reader for reading bytes and PDF objects.
8#[derive(Clone, Debug)]
9pub(crate) struct Reader<'a> {
10    data: &'a [u8],
11    offset: usize,
12}
13
14impl<'a> Reader<'a> {
15    #[inline]
16    pub(crate) fn new(data: &'a [u8]) -> Self {
17        Self { data, offset: 0 }
18    }
19    #[inline]
20    pub(crate) fn new_with(data: &'a [u8], offset: usize) -> Self {
21        Self { data, offset }
22    }
23
24    #[inline]
25    pub(crate) fn at_end(&self) -> bool {
26        self.offset >= self.data.len()
27    }
28
29    #[inline]
30    pub(crate) fn jump_to_end(&mut self) {
31        self.offset = self.data.len();
32    }
33
34    #[inline]
35    pub(crate) fn jump(&mut self, offset: usize) {
36        self.offset = offset;
37    }
38
39    #[inline]
40    pub(crate) fn tail(&mut self) -> Option<&'a [u8]> {
41        self.data.get(self.offset..)
42    }
43
44    #[inline]
45    pub(crate) fn len(&self) -> usize {
46        self.data.len()
47    }
48
49    #[inline]
50    pub(crate) fn range(&self, range: Range<usize>) -> Option<&'a [u8]> {
51        self.data.get(range)
52    }
53
54    #[inline]
55    pub(crate) fn offset(&self) -> usize {
56        self.offset
57    }
58
59    #[inline]
60    pub(crate) fn read_bytes(&mut self, len: usize) -> Option<&'a [u8]> {
61        let v = self.peek_bytes(len)?;
62        self.offset += len;
63
64        Some(v)
65    }
66
67    #[inline]
68    pub(crate) fn read_byte(&mut self) -> Option<u8> {
69        let v = self.peek_byte()?;
70        self.offset += 1;
71
72        Some(v)
73    }
74
75    // Note: If `PLAIN` is true, it means that the data we are about to read _might_ contain
76    // an object reference instead of an actual object. if `PLAIN` is false, then an object
77    // reference cannot occur. The main reason we make this distinction is that when parsing
78    // a number, we cannot unambiguously distinguish whether it's a real number or the
79    // start of an object reference. In content streams, object references cannot appear,
80    // so in order to speed this up we set `PLAIN` to false, meaning that as soon as we
81    // encounter a number we know it's a number, and don't need to do a look-ahead to ensure
82    // that it's not an object reference.
83    #[inline]
84    pub(crate) fn read<const PLAIN: bool, T: Readable<'a>>(&mut self, xref: &'a XRef) -> Option<T> {
85        let old_offset = self.offset;
86
87        T::read::<PLAIN>(self, &xref).or_else(|| {
88            self.offset = old_offset;
89
90            None
91        })
92    }
93
94    #[inline]
95    pub(crate) fn read_with_xref<T: Readable<'a>>(&mut self, xref: &'a XRef) -> Option<T> {
96        self.read::<false, T>(xref)
97    }
98
99    #[inline]
100    pub(crate) fn read_without_xref<T: Readable<'a>>(&mut self) -> Option<T> {
101        self.read::<true, T>(&XRef::dummy())
102    }
103
104    #[inline]
105    pub(crate) fn skip<const PLAIN: bool, T: Skippable>(&mut self) -> Option<&'a [u8]> {
106        let old_offset = self.offset;
107
108        T::skip::<PLAIN>(self).or_else(|| {
109            self.offset = old_offset;
110            None
111        })?;
112
113        self.data.get(old_offset..self.offset)
114    }
115
116    #[inline]
117    pub(crate) fn skip_non_plain<T: Skippable>(&mut self) -> Option<&'a [u8]> {
118        self.skip::<false, T>()
119    }
120
121    #[inline]
122    pub(crate) fn skip_plain<T: Skippable>(&mut self) -> Option<&'a [u8]> {
123        self.skip::<true, T>()
124    }
125
126    #[inline]
127    pub(crate) fn skip_bytes(&mut self, len: usize) -> Option<()> {
128        self.read_bytes(len).map(|_| {})
129    }
130
131    #[inline]
132    pub(crate) fn peek_bytes(&self, len: usize) -> Option<&'a [u8]> {
133        self.data.get(self.offset..self.offset + len)
134    }
135
136    #[inline]
137    pub(crate) fn peek_byte(&self) -> Option<u8> {
138        self.data.get(self.offset).copied()
139    }
140
141    #[inline]
142    pub(crate) fn eat(&mut self, f: impl Fn(u8) -> bool) -> Option<u8> {
143        let val = self.peek_byte()?;
144        if f(val) {
145            self.forward();
146            Some(val)
147        } else {
148            None
149        }
150    }
151
152    #[inline]
153    pub(crate) fn forward(&mut self) {
154        self.offset += 1;
155    }
156
157    #[inline]
158    pub(crate) fn forward_if(&mut self, f: impl Fn(u8) -> bool) -> Option<()> {
159        if f(self.peek_byte()?) {
160            self.forward();
161
162            Some(())
163        } else {
164            None
165        }
166    }
167
168    #[inline]
169    pub(crate) fn forward_while_1(&mut self, f: impl Fn(u8) -> bool) -> Option<()> {
170        self.eat(&f)?;
171        self.forward_while(f);
172        Some(())
173    }
174
175    #[inline]
176    pub(crate) fn forward_tag(&mut self, tag: &[u8]) -> Option<()> {
177        self.peek_tag(tag)?;
178        self.offset += tag.len();
179
180        Some(())
181    }
182
183    #[inline]
184    pub(crate) fn forward_while(&mut self, f: impl Fn(u8) -> bool) {
185        while let Some(b) = self.peek_byte() {
186            if f(b) {
187                self.forward();
188            } else {
189                break;
190            }
191        }
192    }
193
194    #[inline]
195    pub(crate) fn peek_tag(&self, tag: &[u8]) -> Option<()> {
196        let mut cloned = self.clone();
197
198        for b in tag.iter().copied() {
199            if cloned.peek_byte() == Some(b) {
200                cloned.forward();
201            } else {
202                return None;
203            }
204        }
205
206        Some(())
207    }
208
209    #[inline]
210    pub(crate) fn skip_white_spaces(&mut self) {
211        while let Some(b) = self.peek_byte() {
212            if is_white_space_character(b) {
213                self.forward();
214            } else {
215                return;
216            }
217        }
218    }
219
220    #[inline]
221    pub(crate) fn skip_eol_characters(&mut self) {
222        while let Some(b) = self.peek_byte() {
223            if is_eol_character(b) {
224                self.forward();
225            } else {
226                return;
227            }
228        }
229    }
230
231    #[inline]
232    pub(crate) fn skip_white_spaces_and_comments(&mut self) {
233        while let Some(b) = self.peek_byte() {
234            if is_white_space_character(b) {
235                self.skip_white_spaces()
236            } else if b == b'%' {
237                Comment::skip::<true>(self);
238            } else {
239                return;
240            }
241        }
242    }
243}
244
245pub(crate) trait Readable<'a>: Sized {
246    fn read<const PLAIN: bool>(r: &mut Reader<'a>, xref: &'a XRef) -> Option<Self>;
247    fn from_bytes(b: &'a [u8]) -> Option<Self> {
248        let mut r = Reader::new(b);
249        let xref = XRef::dummy();
250
251        Self::read::<false>(&mut r, &xref)
252    }
253}
254
255pub(crate) trait Skippable {
256    fn skip<const PLAIN: bool>(r: &mut Reader<'_>) -> Option<()>;
257}