plotnik_lib/parser/
lexer.rs

1//! Lexer for the query language.
2//!
3//! Produces span-based tokens without storing text - text is sliced from source only when needed.
4//!
5//! ## Error handling
6//!
7//! The lexer coalesces consecutive error characters into single `Garbage` tokens rather
8//! than producing one error per character. This keeps the token stream manageable for malformed input.
9
10use logos::Logos;
11use rowan::TextRange;
12use std::ops::Range;
13
14use super::cst::SyntaxKind;
15
16/// Zero-copy token: kind + span, text retrieved via [`token_text`] when needed.
17#[derive(Debug, Clone, Copy, PartialEq, Eq)]
18pub struct Token {
19    pub kind: SyntaxKind,
20    pub span: TextRange,
21}
22
23impl Token {
24    #[inline]
25    pub fn new(kind: SyntaxKind, span: TextRange) -> Self {
26        Self { kind, span }
27    }
28}
29
30fn range_to_text_range(range: Range<usize>) -> TextRange {
31    TextRange::new((range.start as u32).into(), (range.end as u32).into())
32}
33
34/// Tokenizes source into a vector of span-based tokens.
35///
36/// Post-processes the Logos output:
37/// - Coalesces consecutive lexer errors into single `Garbage` tokens
38/// - Splits `StringLiteral` tokens into quote + content + quote
39pub fn lex(source: &str) -> Vec<Token> {
40    let mut tokens = Vec::new();
41    let mut lexer = SyntaxKind::lexer(source);
42    let mut error_start: Option<usize> = None;
43
44    loop {
45        match lexer.next() {
46            Some(Ok(kind)) => {
47                if let Some(start) = error_start.take() {
48                    let end = lexer.span().start;
49                    tokens.push(Token::new(
50                        SyntaxKind::Garbage,
51                        range_to_text_range(start..end),
52                    ));
53                }
54
55                let span = lexer.span();
56                if kind == SyntaxKind::StringLiteral {
57                    split_string_literal(source, span, &mut tokens);
58                } else {
59                    tokens.push(Token::new(kind, range_to_text_range(span)));
60                }
61            }
62            Some(Err(())) => {
63                if error_start.is_none() {
64                    error_start = Some(lexer.span().start);
65                }
66            }
67            None => {
68                if let Some(start) = error_start.take() {
69                    tokens.push(Token::new(
70                        SyntaxKind::Garbage,
71                        range_to_text_range(start..source.len()),
72                    ));
73                }
74                break;
75            }
76        }
77    }
78
79    tokens
80}
81
82/// Splits a string literal token into: quote + content + quote
83fn split_string_literal(source: &str, span: Range<usize>, tokens: &mut Vec<Token>) {
84    let text = &source[span.clone()];
85    let quote_char = text.chars().next().unwrap();
86    let quote_kind = if quote_char == '"' {
87        SyntaxKind::DoubleQuote
88    } else {
89        SyntaxKind::SingleQuote
90    };
91
92    let start = span.start;
93    let end = span.end;
94
95    tokens.push(Token::new(
96        quote_kind,
97        range_to_text_range(start..start + 1),
98    ));
99
100    if end - start > 2 {
101        tokens.push(Token::new(
102            SyntaxKind::StrVal,
103            range_to_text_range(start + 1..end - 1),
104        ));
105    }
106
107    tokens.push(Token::new(quote_kind, range_to_text_range(end - 1..end)));
108}
109
110/// Retrieves the text slice for a token. O(1) slice into source.
111#[inline]
112pub fn token_text<'q>(source: &'q str, token: &Token) -> &'q str {
113    &source[std::ops::Range::<usize>::from(token.span)]
114}