Skip to main content

justpdf_core/tokenizer/
reader.rs

1/// A byte-level reader over a PDF byte slice, with position tracking.
2pub struct PdfReader<'a> {
3    data: &'a [u8],
4    pos: usize,
5}
6
7impl<'a> PdfReader<'a> {
8    pub fn new(data: &'a [u8]) -> Self {
9        Self { data, pos: 0 }
10    }
11
12    /// Create a reader starting at a specific offset.
13    pub fn new_at(data: &'a [u8], pos: usize) -> Self {
14        Self { data, pos }
15    }
16
17    /// Current byte offset in the data.
18    #[inline]
19    pub fn pos(&self) -> usize {
20        self.pos
21    }
22
23    /// Total length of the underlying data.
24    #[inline]
25    pub fn len(&self) -> usize {
26        self.data.len()
27    }
28
29    /// Whether the underlying data is empty.
30    #[inline]
31    pub fn is_empty(&self) -> bool {
32        self.data.is_empty()
33    }
34
35    /// Whether we've reached the end.
36    #[inline]
37    pub fn is_eof(&self) -> bool {
38        self.pos >= self.data.len()
39    }
40
41    /// Peek at the current byte without consuming.
42    #[inline]
43    pub fn peek(&self) -> Option<u8> {
44        self.data.get(self.pos).copied()
45    }
46
47    /// Peek at the byte at offset `pos + n`.
48    #[inline]
49    pub fn peek_at(&self, n: usize) -> Option<u8> {
50        self.data.get(self.pos + n).copied()
51    }
52
53    /// Consume and return the current byte.
54    #[inline]
55    pub fn next_byte(&mut self) -> Option<u8> {
56        let b = self.data.get(self.pos).copied();
57        if b.is_some() {
58            self.pos += 1;
59        }
60        b
61    }
62
63    /// Advance position by `n` bytes.
64    #[inline]
65    pub fn advance(&mut self, n: usize) {
66        self.pos = (self.pos + n).min(self.data.len());
67    }
68
69    /// Set position to an absolute offset.
70    #[inline]
71    pub fn seek(&mut self, pos: usize) {
72        self.pos = pos.min(self.data.len());
73    }
74
75    /// Return a slice from the underlying data.
76    pub fn slice(&self, start: usize, end: usize) -> &'a [u8] {
77        let end = end.min(self.data.len());
78        let start = start.min(end);
79        &self.data[start..end]
80    }
81
82    /// Return remaining bytes from current position.
83    pub fn remaining(&self) -> &'a [u8] {
84        if self.pos >= self.data.len() {
85            &[]
86        } else {
87            &self.data[self.pos..]
88        }
89    }
90
91    /// The full underlying data.
92    pub fn data(&self) -> &'a [u8] {
93        self.data
94    }
95
96    /// Skip PDF whitespace characters: \0, \t, \n, \x0C, \r, \x20.
97    pub fn skip_whitespace(&mut self) {
98        while let Some(b) = self.peek() {
99            if is_pdf_whitespace(b) {
100                self.pos += 1;
101            } else {
102                break;
103            }
104        }
105    }
106
107    /// Skip whitespace and comments (% to end of line).
108    pub fn skip_whitespace_and_comments(&mut self) {
109        loop {
110            self.skip_whitespace();
111            if self.peek() == Some(b'%') {
112                // Skip to end of line
113                while let Some(b) = self.next_byte() {
114                    if b == b'\n' || b == b'\r' {
115                        break;
116                    }
117                }
118            } else {
119                break;
120            }
121        }
122    }
123}
124
125/// Check if a byte is PDF whitespace.
126#[inline]
127pub fn is_pdf_whitespace(b: u8) -> bool {
128    matches!(b, b'\0' | b'\t' | b'\n' | b'\x0C' | b'\r' | b' ')
129}
130
131/// Check if a byte is a PDF delimiter.
132#[inline]
133pub fn is_pdf_delimiter(b: u8) -> bool {
134    matches!(
135        b,
136        b'(' | b')' | b'<' | b'>' | b'[' | b']' | b'{' | b'}' | b'/' | b'%'
137    )
138}
139
140/// Check if a byte is a regular character (not whitespace or delimiter).
141#[inline]
142pub fn is_pdf_regular(b: u8) -> bool {
143    !is_pdf_whitespace(b) && !is_pdf_delimiter(b)
144}