surql_parser/upstream/syn/lexer/
mod.rs1mod byte;
2mod char;
3pub mod compound;
4mod ident;
5pub mod keywords;
6mod reader;
7mod strings;
8mod unicode;
9use crate::upstream::syn::error::{SyntaxError, bail};
10use crate::upstream::syn::token::{Span, Token, TokenKind};
11pub use reader::{BytesReader, CharError};
12pub struct Lexer<'a> {
27 pub(super) reader: BytesReader<'a>,
29 last_offset: u32,
31 pub(super) error: Option<SyntaxError>,
32}
33impl<'a> Lexer<'a> {
34 pub fn new(source: &'a [u8]) -> Lexer<'a> {
38 assert!(
39 source.len() <= u32::MAX as usize,
40 "source code exceeded maximum size"
41 );
42 let reader = BytesReader::new(source);
43 Lexer {
44 reader,
45 last_offset: 0,
46 error: None,
47 }
48 }
49 pub fn reset(&mut self) {
53 self.last_offset = 0;
54 self.error = None;
55 }
56 pub fn change_source<'b>(self, source: &'b [u8]) -> Lexer<'b> {
63 assert!(
64 source.len() <= u32::MAX as usize,
65 "source code exceeded maximum size"
66 );
67 let reader = BytesReader::<'b>::new(source);
68 Lexer {
69 reader,
70 last_offset: 0,
71 error: self.error,
72 }
73 }
74 pub fn next_token(&mut self) -> Token {
79 let Some(byte) = self.reader.next() else {
80 return self.eof_token();
81 };
82 if byte.is_ascii() {
83 self.lex_ascii(byte)
84 } else {
85 self.lex_char(byte)
86 }
87 }
88 fn eof_token(&mut self) -> Token {
93 Token {
94 kind: TokenKind::Eof,
95 span: Span {
96 offset: self.last_offset,
97 len: 0,
98 },
99 }
100 }
101 fn invalid_token(&mut self, error: SyntaxError) -> Token {
103 self.error = Some(error);
104 self.finish_token(TokenKind::Invalid)
105 }
106 pub fn current_span(&self) -> Span {
107 let new_offset = self.reader.offset();
108 let len = new_offset - self.last_offset;
109 Span {
110 offset: self.last_offset,
111 len,
112 }
113 }
114 pub fn span_since(&self, offset: u32) -> Span {
115 let new_offset = self.reader.offset();
116 let len = new_offset - offset;
117 Span { offset, len }
118 }
119 fn advance_span(&mut self) -> Span {
120 let span = self.current_span();
121 self.last_offset = self.reader.offset();
122 span
123 }
124 fn finish_token(&mut self, kind: TokenKind) -> Token {
128 Token {
129 kind,
130 span: self.advance_span(),
131 }
132 }
133 pub fn backup_after(&mut self, span: Span) {
139 let offset = span.offset + span.len;
140 self.reader.backup(offset);
141 self.last_offset = offset;
142 }
143 fn eat(&mut self, byte: u8) -> bool {
148 if self.reader.peek() == Some(byte) {
149 self.reader.next();
150 true
151 } else {
152 false
153 }
154 }
155 fn eat_when<F: FnOnce(u8) -> bool>(&mut self, f: F) -> bool {
160 let Some(x) = self.reader.peek() else {
161 return false;
162 };
163 if f(x) {
164 self.reader.next();
165 true
166 } else {
167 false
168 }
169 }
170 fn expect(&mut self, c: char) -> Result<(), SyntaxError> {
171 match self.reader.peek() {
172 Some(x) => {
173 let offset = self.reader.offset();
174 self.reader.next();
175 let char = self.reader.convert_to_char(x)?;
176 if char == c {
177 return Ok(());
178 }
179 let len = self.reader.offset() - offset;
180 bail!(
181 "Unexpected character `{char}` expected `{c}`", @ Span { offset, len
182 }
183 )
184 }
185 None => {
186 bail!(
187 "Unexpected end of file, expected character `{c}`", @ self
188 .current_span()
189 )
190 }
191 }
192 }
193 pub fn span_str(&self, span: Span) -> &'a str {
197 std::str::from_utf8(self.span_bytes(span)).expect("invalid span segment for source")
198 }
199 pub fn span_bytes(&self, span: Span) -> &'a [u8] {
203 self.reader.span(span)
204 }
205 pub fn assert_finished(&self) -> Result<(), SyntaxError> {
207 if !self.reader.is_empty() {
208 let offset = self.reader.offset();
209 let len = self.reader.remaining().len() as u32;
210 let span = Span { offset, len };
211 bail!("Trailing characters", @ span)
212 }
213 Ok(())
214 }
215}
216impl Iterator for Lexer<'_> {
217 type Item = Token;
218 fn next(&mut self) -> Option<Self::Item> {
219 let token = self.next_token();
220 if token.is_eof() {
221 return None;
222 }
223 Some(token)
224 }
225}