1use std::fmt;
8
9pub(crate) struct Lexer<'a> {
13 src: &'a [u8],
14 input: &'a str,
15 pos: usize,
16}
17
18#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash)]
20pub struct ParseError {
21 pos: usize,
22 msg: &'static str,
23}
24
25impl ParseError {
26 pub(crate) fn new(pos: usize, msg: &'static str) -> Self {
28 ParseError { pos, msg }
29 }
30
31 #[must_use]
41 #[allow(dead_code)] pub fn position(&self) -> usize {
43 self.pos
44 }
45
46 #[must_use]
56 #[allow(dead_code)] pub fn message(&self) -> &'static str {
58 self.msg
59 }
60}
61
62impl fmt::Display for ParseError {
63 fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
64 write!(f, "parse error at byte {}: {}", self.pos, self.msg)
65 }
66}
67
68impl std::error::Error for ParseError {}
69
70impl<'a> Lexer<'a> {
71 pub(crate) fn new(input: &'a str) -> Self {
73 Lexer {
74 src: input.as_bytes(),
75 input,
76 pos: 0,
77 }
78 }
79
80 #[inline]
86 #[must_use]
87 pub(crate) fn pos(&self) -> usize {
88 self.pos
89 }
90
91 #[inline]
93 #[must_use]
94 pub(crate) fn is_eof(&self) -> bool {
95 self.pos >= self.src.len()
96 }
97
98 #[inline]
100 #[must_use]
101 pub(crate) fn peek(&self) -> u8 {
102 if self.pos < self.src.len() {
103 self.src[self.pos]
104 } else {
105 0
106 }
107 }
108
109 #[inline]
111 #[must_use]
112 pub(crate) fn peek_at(&self, offset: usize) -> u8 {
113 let i = self.pos + offset;
114 if i < self.src.len() { self.src[i] } else { 0 }
115 }
116
117 #[inline]
119 #[must_use]
120 pub(crate) fn slice(&self, start: usize) -> &'a str {
121 &self.input[start..self.pos]
122 }
123
124 #[inline]
130 #[must_use]
131 pub(crate) fn slice_range(&self, start: usize, end: usize) -> &'a str {
132 debug_assert!(
133 start <= end && end <= self.src.len(),
134 "slice_range({start}, {end}): len={}",
135 self.src.len()
136 );
137 &self.input[start..end]
138 }
139
140 #[inline]
142 #[must_use]
143 pub(crate) fn remaining(&self) -> &'a str {
144 &self.input[self.pos..]
145 }
146
147 #[inline]
153 pub(crate) fn set_pos(&mut self, pos: usize) {
154 self.pos = pos;
155 }
156
157 #[inline]
159 pub(crate) fn bump(&mut self) {
160 self.pos += 1;
161 }
162
163 #[inline]
165 pub(crate) fn bump_n(&mut self, n: usize) {
166 self.pos += n;
167 }
168
169 #[inline]
171 pub(crate) fn eat(&mut self, b: u8) -> bool {
172 if self.peek() == b {
173 self.pos += 1;
174 true
175 } else {
176 false
177 }
178 }
179
180 pub(crate) fn eat_str(&mut self, s: &[u8]) -> bool {
182 if self.pos + s.len() <= self.src.len() && &self.src[self.pos..self.pos + s.len()] == s {
183 self.pos += s.len();
184 true
185 } else {
186 false
187 }
188 }
189
190 pub(crate) fn skip_blanks(&mut self) {
196 while self.pos < self.src.len() {
197 match self.src[self.pos] {
198 b' ' | b'\t' => self.pos += 1,
199 _ => break,
200 }
201 }
202 }
203
204 pub(crate) fn skip_comment(&mut self) {
206 if self.peek() == b'#' {
207 while self.pos < self.src.len() && self.src[self.pos] != b'\n' {
208 self.pos += 1;
209 }
210 }
211 }
212
213 #[must_use]
220 pub(crate) fn read_name(&mut self) -> &'a str {
221 let start = self.pos;
222 if self.pos < self.src.len()
223 && (self.src[self.pos].is_ascii_alphabetic() || self.src[self.pos] == b'_')
224 {
225 self.pos += 1;
226 while self.pos < self.src.len()
227 && (self.src[self.pos].is_ascii_alphanumeric() || self.src[self.pos] == b'_')
228 {
229 self.pos += 1;
230 }
231 }
232 self.slice(start)
233 }
234
235 #[must_use]
237 pub(crate) fn read_number(&mut self) -> &'a str {
238 let start = self.pos;
239 while self.pos < self.src.len() && self.src[self.pos].is_ascii_digit() {
240 self.pos += 1;
241 }
242 self.slice(start)
243 }
244
245 pub(crate) fn scan_squote(&mut self) -> Result<&'a str, ParseError> {
252 let start = self.pos;
253 while self.pos < self.src.len() {
254 if self.src[self.pos] == b'\'' {
255 let content = self.slice(start);
256 self.pos += 1;
257 return Ok(content);
258 }
259 self.pos += 1;
260 }
261 Err(self.err("unterminated single quote"))
262 }
263
264 #[must_use]
270 pub(crate) fn at_keyword(&self, kw: &[u8]) -> bool {
271 let end = self.pos + kw.len();
272 if end > self.src.len() {
273 return false;
274 }
275 if &self.src[self.pos..end] != kw {
276 return false;
277 }
278 if kw.len() == 1 && is_meta(kw[0]) {
280 return true;
281 }
282 end >= self.src.len() || is_meta(self.src[end])
284 }
285
286 #[must_use]
288 pub(crate) fn at_any_keyword(&self, keywords: &[&[u8]]) -> bool {
289 keywords.iter().any(|kw| self.at_keyword(kw))
290 }
291
292 pub(crate) fn err(&self, msg: &'static str) -> ParseError {
298 ParseError::new(self.pos, msg)
299 }
300}
301
302#[inline]
304#[must_use]
305pub(crate) const fn is_meta(b: u8) -> bool {
306 matches!(
307 b,
308 b' ' | b'\t' | b'\n' | b';' | b'&' | b'|' | b'(' | b')' | b'<' | b'>' | b'\0'
309 )
310}
311
312#[cfg(test)]
313mod tests {
314 use super::*;
315
316 #[test]
317 fn peek_and_eof() {
318 let lex = Lexer::new("");
319 assert!(lex.is_eof());
320 assert_eq!(lex.peek(), 0);
321
322 let lex = Lexer::new("a");
323 assert!(!lex.is_eof());
324 assert_eq!(lex.peek(), b'a');
325 }
326
327 #[test]
328 fn eat_and_bump() {
329 let mut lex = Lexer::new("ab");
330 assert!(lex.eat(b'a'));
331 assert!(!lex.eat(b'a'));
332 assert!(lex.eat(b'b'));
333 assert!(lex.is_eof());
334 }
335
336 #[test]
337 fn eat_str() {
338 let mut lex = Lexer::new("then done");
339 assert!(lex.eat_str(b"then"));
340 assert_eq!(lex.peek(), b' ');
341 lex.bump();
342 assert!(lex.eat_str(b"done"));
343 assert!(lex.is_eof());
344 }
345
346 #[test]
347 fn skip_blanks_not_newlines() {
348 let mut lex = Lexer::new(" \t\nfoo");
349 lex.skip_blanks();
350 assert_eq!(lex.peek(), b'\n');
351 }
352
353 #[test]
354 fn read_name() {
355 let mut lex = Lexer::new("FOO_bar123 rest");
356 assert_eq!(lex.read_name(), "FOO_bar123");
357 assert_eq!(lex.peek(), b' ');
358 }
359
360 #[test]
361 fn read_name_underscore_start() {
362 let mut lex = Lexer::new("_private");
363 assert_eq!(lex.read_name(), "_private");
364 }
365
366 #[test]
367 fn read_name_no_match() {
368 let mut lex = Lexer::new("123abc");
369 assert_eq!(lex.read_name(), "");
370 assert_eq!(lex.pos(), 0);
371 }
372
373 #[test]
374 fn read_number() {
375 let mut lex = Lexer::new("42rest");
376 assert_eq!(lex.read_number(), "42");
377 }
378
379 #[test]
380 fn scan_squote() {
381 let mut lex = Lexer::new("hello world'rest");
382 let content = lex.scan_squote().unwrap();
383 assert_eq!(content, "hello world");
384 assert_eq!(lex.peek(), b'r');
385 }
386
387 #[test]
388 fn at_keyword() {
389 let lex = Lexer::new("then ");
390 assert!(lex.at_keyword(b"then"));
391 assert!(!lex.at_keyword(b"the"));
392 }
393
394 #[test]
395 fn at_keyword_eof() {
396 let lex = Lexer::new("fi");
397 assert!(lex.at_keyword(b"fi"));
398 }
399
400 #[test]
401 fn at_keyword_no_boundary() {
402 let lex = Lexer::new("done_stuff");
403 assert!(!lex.at_keyword(b"done"));
404 }
405
406 #[test]
407 fn skip_comment() {
408 let mut lex = Lexer::new("# this is a comment\nnext");
409 lex.skip_comment();
410 assert_eq!(lex.peek(), b'\n');
411 }
412
413 #[test]
414 fn parse_error_accessors() {
415 let err = ParseError::new(42, "test error");
416 assert_eq!(err.position(), 42);
417 assert_eq!(err.message(), "test error");
418 }
419}