1use std::ops;
4
5use squawk_lexer::tokenize;
6
7use crate::SyntaxKind;
8
9pub struct LexedStr<'a> {
10 text: &'a str,
11 kind: Vec<SyntaxKind>,
12 start: Vec<u32>,
13 error: Vec<LexError>,
14}
15
16struct LexError {
17 msg: String,
18 token: u32,
19}
20
21impl<'a> LexedStr<'a> {
22 pub fn new(text: &'a str) -> LexedStr<'a> {
25 let mut conv = Converter::new(text);
26
27 for token in tokenize(&text[conv.offset..]) {
28 let token_text = &text[conv.offset..][..token.len as usize];
29
30 conv.extend_token(&token.kind, token_text);
31 }
32
33 conv.finalize_with_eof()
34 }
35
36 pub(crate) fn len(&self) -> usize {
59 self.kind.len() - 1
60 }
61
62 pub(crate) fn kind(&self, i: usize) -> SyntaxKind {
67 assert!(i < self.len());
68 self.kind[i]
69 }
70
71 pub(crate) fn text(&self, i: usize) -> &str {
72 self.range_text(i..i + 1)
73 }
74
75 pub(crate) fn range_text(&self, r: ops::Range<usize>) -> &str {
76 assert!(r.start < r.end && r.end <= self.len());
77 let lo = self.start[r.start] as usize;
78 let hi = self.start[r.end] as usize;
79 &self.text[lo..hi]
80 }
81
82 pub fn text_range(&self, i: usize) -> ops::Range<usize> {
84 assert!(i < self.len());
85 let lo = self.start[i] as usize;
86 let hi = self.start[i + 1] as usize;
87 lo..hi
88 }
89 pub fn text_start(&self, i: usize) -> usize {
90 assert!(i <= self.len());
91 self.start[i] as usize
92 }
93 pub fn errors(&self) -> impl Iterator<Item = (usize, &str)> + '_ {
109 self.error
110 .iter()
111 .map(|it| (it.token as usize, it.msg.as_str()))
112 }
113
114 fn push(&mut self, kind: SyntaxKind, offset: usize) {
115 self.kind.push(kind);
116 self.start.push(offset as u32);
117 }
118}
119
120struct Converter<'a> {
121 res: LexedStr<'a>,
122 offset: usize,
123}
124
125impl<'a> Converter<'a> {
126 fn new(text: &'a str) -> Self {
127 Self {
128 res: LexedStr {
129 text,
130 kind: Vec::new(),
131 start: Vec::new(),
132 error: Vec::new(),
133 },
134 offset: 0,
135 }
136 }
137
138 fn finalize_with_eof(mut self) -> LexedStr<'a> {
139 self.res.push(SyntaxKind::EOF, self.offset);
140 self.res
141 }
142
143 fn push(&mut self, kind: SyntaxKind, len: usize, err: Option<&str>) {
144 self.res.push(kind, self.offset);
145 self.offset += len;
146
147 if let Some(err) = err {
148 let token = self.res.len() as u32;
149 let msg = err.to_owned();
150 self.res.error.push(LexError { msg, token });
151 }
152 }
153
154 fn extend_token(&mut self, kind: &squawk_lexer::TokenKind, token_text: &str) {
155 let mut err = "";
160
161 let syntax_kind = {
162 match kind {
163 squawk_lexer::TokenKind::LineComment => SyntaxKind::COMMENT,
164 squawk_lexer::TokenKind::BlockComment { terminated } => {
165 if !terminated {
166 err = "Missing trailing `*/` symbols to terminate the block comment";
167 }
168 SyntaxKind::COMMENT
169 }
170
171 squawk_lexer::TokenKind::Whitespace => SyntaxKind::WHITESPACE,
172 squawk_lexer::TokenKind::Ident => {
173 SyntaxKind::from_keyword(token_text).unwrap_or(SyntaxKind::IDENT)
174 }
175 squawk_lexer::TokenKind::Literal { kind, .. } => {
176 self.extend_literal(token_text, kind);
177 return;
178 }
179 squawk_lexer::TokenKind::Semi => SyntaxKind::SEMICOLON,
180 squawk_lexer::TokenKind::Comma => SyntaxKind::COMMA,
181 squawk_lexer::TokenKind::Dot => SyntaxKind::DOT,
182 squawk_lexer::TokenKind::OpenParen => SyntaxKind::L_PAREN,
183 squawk_lexer::TokenKind::CloseParen => SyntaxKind::R_PAREN,
184 squawk_lexer::TokenKind::OpenBracket => SyntaxKind::L_BRACK,
185 squawk_lexer::TokenKind::CloseBracket => SyntaxKind::R_BRACK,
186 squawk_lexer::TokenKind::OpenCurly => SyntaxKind::L_CURLY,
187 squawk_lexer::TokenKind::CloseCurly => SyntaxKind::R_CURLY,
188 squawk_lexer::TokenKind::At => SyntaxKind::AT,
189 squawk_lexer::TokenKind::Pound => SyntaxKind::POUND,
190 squawk_lexer::TokenKind::Tilde => SyntaxKind::TILDE,
191 squawk_lexer::TokenKind::Question => SyntaxKind::QUESTION,
192 squawk_lexer::TokenKind::Colon => SyntaxKind::COLON,
193 squawk_lexer::TokenKind::Eq => SyntaxKind::EQ,
194 squawk_lexer::TokenKind::Bang => SyntaxKind::BANG,
195 squawk_lexer::TokenKind::Lt => SyntaxKind::L_ANGLE,
196 squawk_lexer::TokenKind::Gt => SyntaxKind::R_ANGLE,
197 squawk_lexer::TokenKind::Minus => SyntaxKind::MINUS,
198 squawk_lexer::TokenKind::And => SyntaxKind::AMP,
199 squawk_lexer::TokenKind::Or => SyntaxKind::PIPE,
200 squawk_lexer::TokenKind::Plus => SyntaxKind::PLUS,
201 squawk_lexer::TokenKind::Star => SyntaxKind::STAR,
202 squawk_lexer::TokenKind::Slash => SyntaxKind::SLASH,
203 squawk_lexer::TokenKind::Caret => SyntaxKind::CARET,
204 squawk_lexer::TokenKind::Percent => SyntaxKind::PERCENT,
205 squawk_lexer::TokenKind::Unknown => SyntaxKind::ERROR,
206 squawk_lexer::TokenKind::UnknownPrefix => {
207 err = "unknown literal prefix";
208 SyntaxKind::IDENT
209 }
210 squawk_lexer::TokenKind::Eof => SyntaxKind::EOF,
211 squawk_lexer::TokenKind::Backtick => SyntaxKind::BACKTICK,
212 squawk_lexer::TokenKind::PositionalParam => SyntaxKind::POSITIONAL_PARAM,
213 squawk_lexer::TokenKind::QuotedIdent { terminated } => {
214 if !terminated {
215 err = "Missing trailing \" to terminate the quoted identifier"
216 }
217 SyntaxKind::IDENT
218 }
219 }
220 };
221
222 let err = if err.is_empty() { None } else { Some(err) };
223 self.push(syntax_kind, token_text.len(), err);
224 }
225
226 fn extend_literal(&mut self, token_text: &str, kind: &squawk_lexer::LiteralKind) {
227 let mut err: Option<String> = None;
228
229 let syntax_kind = match *kind {
230 squawk_lexer::LiteralKind::Int { empty_int, base: _ } => {
231 if empty_int {
232 err = Some("Missing digits after the integer base prefix".into());
233 }
234 SyntaxKind::INT_NUMBER
235 }
236 squawk_lexer::LiteralKind::Float {
237 empty_exponent,
238 base: _,
239 } => {
240 if empty_exponent {
241 err = Some("Missing digits after the exponent symbol".into());
242 }
243 SyntaxKind::FLOAT_NUMBER
244 }
245 squawk_lexer::LiteralKind::Str { terminated } => {
246 if !terminated {
247 err =
248 Some("Missing trailing `'` symbol to terminate the string literal".into());
249 }
250 SyntaxKind::STRING
251 }
252 squawk_lexer::LiteralKind::ByteStr { terminated } => {
253 if !terminated {
254 err = Some(
255 "Missing trailing `'` symbol to terminate the hex bit string literal"
256 .into(),
257 );
258 }
259 SyntaxKind::BYTE_STRING
261 }
262 squawk_lexer::LiteralKind::BitStr { terminated } => {
263 if !terminated {
264 err = Some(
265 "Missing trailing `'` symbol to terminate the bit string literal".into(),
266 );
267 }
268 SyntaxKind::BIT_STRING
270 }
271 squawk_lexer::LiteralKind::DollarQuotedString { terminated } => {
272 if !terminated {
273 err = Some("Unterminated dollar quoted string literal".into());
275 }
276 SyntaxKind::DOLLAR_QUOTED_STRING
277 }
278 squawk_lexer::LiteralKind::UnicodeEscStr { terminated } => {
279 if !terminated {
280 err = Some(
281 "Missing trailing `'` symbol to terminate the unicode escape string literal"
282 .into(),
283 );
284 }
285 SyntaxKind::UNICODE_ESC_STRING
287 }
288 squawk_lexer::LiteralKind::EscStr { terminated } => {
289 if !terminated {
290 err = Some(
291 "Missing trailing `'` symbol to terminate the escape string literal".into(),
292 );
293 }
294 SyntaxKind::ESC_STRING
296 }
297 };
298
299 self.push(syntax_kind, token_text.len(), err.as_deref());
300 }
301}
302
303#[cfg(test)]
304mod tests {
305 use annotate_snippets::{AnnotationKind, Level, Renderer, Snippet, renderer::DecorStyle};
306 use insta::assert_snapshot;
307
308 use super::LexedStr;
309
310 fn lex(text: &str) -> String {
311 let lexed = LexedStr::new(text);
312 let renderer = Renderer::plain().decor_style(DecorStyle::Unicode);
313 let mut res = String::new();
314
315 for (token, msg) in lexed.errors() {
316 let group = Level::ERROR.primary_title(msg).element(
317 Snippet::source(text)
318 .fold(true)
319 .annotation(AnnotationKind::Primary.span(lexed.text_range(token))),
320 );
321 res.push_str(&renderer.render(&[group]).to_string());
322 res.push('\n');
323 }
324
325 res
326 }
327
328 #[test]
329 fn empty_int_error() {
330 assert_snapshot!(lex("select 0x;"), @"
331 error: Missing digits after the integer base prefix
332 ╭▸
333 1 │ select 0x;
334 ╰╴ ━━
335 ");
336 }
337
338 #[test]
339 fn empty_exponent_error() {
340 assert_snapshot!(lex("select 1e;"), @"
341 error: Missing digits after the exponent symbol
342 ╭▸
343 1 │ select 1e;
344 ╰╴ ━━
345 ");
346 }
347
348 #[test]
349 fn unterminated_string_error() {
350 assert_snapshot!(lex("select 'hello;"), @"
351 error: Missing trailing `'` symbol to terminate the string literal
352 ╭▸
353 1 │ select 'hello;
354 ╰╴ ━━━━━━━
355 ");
356 }
357
358 #[test]
359 fn unterminated_hex_bit_string_error() {
360 assert_snapshot!(lex("select X'1F;"), @"
361 error: Missing trailing `'` symbol to terminate the hex bit string literal
362 ╭▸
363 1 │ select X'1F;
364 ╰╴ ━━━━━
365 ");
366 }
367
368 #[test]
369 fn unterminated_bit_string_error() {
370 assert_snapshot!(lex("select B'101;"), @"
371 error: Missing trailing `'` symbol to terminate the bit string literal
372 ╭▸
373 1 │ select B'101;
374 ╰╴ ━━━━━━
375 ");
376 }
377
378 #[test]
379 fn unterminated_dollar_quoted_string_error() {
380 assert_snapshot!(lex("select $tag$hello;"), @"
381 error: Unterminated dollar quoted string literal
382 ╭▸
383 1 │ select $tag$hello;
384 ╰╴ ━━━━━━━━━━━
385 ");
386 }
387
388 #[test]
389 fn unterminated_unicode_escape_string_error() {
390 assert_snapshot!(lex("select U&'hello;"), @"
391 error: Missing trailing `'` symbol to terminate the unicode escape string literal
392 ╭▸
393 1 │ select U&'hello;
394 ╰╴ ━━━━━━━━━
395 ");
396 }
397
398 #[test]
399 fn unterminated_escape_string_error() {
400 assert_snapshot!(lex("select E'hello;"), @"
401 error: Missing trailing `'` symbol to terminate the escape string literal
402 ╭▸
403 1 │ select E'hello;
404 ╰╴ ━━━━━━━━
405 ");
406 }
407}