graphql_query/ast/
lexer.rs

1use logos::{internal::LexerInternal, Lexer, Logos};
2
3#[derive(Clone)]
4pub struct Extras<'a> {
5    pub arena: &'a bumpalo::Bump,
6}
7
8#[derive(Logos, Debug, PartialEq)]
9#[logos(extras = Extras<'s>)]
10pub enum Token<'a> {
11    #[token("[")]
12    BracketOpen,
13
14    #[token("]")]
15    BracketClose,
16
17    #[token("{")]
18    BraceOpen,
19
20    #[token("}")]
21    BraceClose,
22
23    #[token("(")]
24    ParenOpen,
25
26    #[token(")")]
27    ParenClose,
28
29    #[token(":")]
30    Colon,
31
32    #[token("=")]
33    Equal,
34
35    #[token("!")]
36    Exclam,
37
38    #[token("...")]
39    Ellipsis,
40
41    #[regex(r"\$[_a-zA-Z][_0-9a-zA-Z]*", |lex| &lex.slice()[1..])]
42    VariableName(&'a str),
43
44    #[regex(r"@[_a-zA-Z][_0-9a-zA-Z]*", |lex| &lex.slice()[1..])]
45    DirectiveName(&'a str),
46
47    #[regex(r"[_a-zA-Z][_0-9a-zA-Z]*", |lex| lex.slice())]
48    Name(&'a str),
49
50    #[regex(r"-?([1-9][0-9]*|0)[.][0-9]+([eE][+-]?[0-9]+)?")]
51    #[regex("-?([1-9][0-9]*|0)[eE][+-]?[0-9]+")]
52    Float(&'a str),
53
54    #[regex(r"-?([1-9][0-9]*|0)")]
55    Integer(&'a str),
56
57    #[regex(r#"""?"?"#, parse_string)]
58    String(&'a str),
59
60    #[error]
61    #[regex(r"([ ,\t\n\r\f]+|#[^\n\r]*)+", logos::skip)]
62    Error,
63
64    /// Token indicates the end of the input
65    End,
66}
67
68#[derive(Logos, Debug, PartialEq)]
69pub(crate) enum BlockPart {
70    #[regex(r#"[^"\\\r\n]+"#)]
71    #[regex(r#""+"#)]
72    Text,
73
74    #[regex(r"(\r|\n|\r\n)[\t ]*")]
75    Newline,
76
77    #[regex(r#"\\""""#)]
78    EscapedEndBlock,
79
80    #[regex(r#"\\."#)]
81    EscapedSequence,
82
83    #[token(r#"""""#)]
84    EndBlock,
85
86    #[error]
87    Error,
88}
89
90#[derive(Logos, Debug, PartialEq)]
91pub(crate) enum StringPart {
92    #[regex(r#"[^\n\r\\"]+"#)]
93    Text,
94
95    #[regex(r"\\u[0-9a-fA-F][0-9a-fA-F][0-9a-fA-F][0-9a-fA-F]")]
96    EscapedCodepoint,
97
98    #[token(r#"\""#)]
99    EscapedQuote,
100    #[token(r#"\\"#)]
101    EscapedBackslash,
102    #[token(r#"\/"#)]
103    EscapedSlash,
104    #[token(r#"\b"#)]
105    EscapedBackspace,
106    #[token(r#"\f"#)]
107    EscapedLinefeed,
108    #[token(r#"\n"#)]
109    EscapedNewline,
110    #[token(r#"\r"#)]
111    EscapedReturn,
112    #[token(r#"\t"#)]
113    EscapedTab,
114
115    #[token("\"")]
116    EndString,
117
118    #[error]
119    Error,
120}
121
122#[inline]
123fn lex_string<'a>(lex: &mut Lexer<'a, Token<'a>>, mut output: String) -> Option<&'a str> {
124    let mut sublex = StringPart::lexer(lex.remainder());
125    while let Some(token) = sublex.next() {
126        match token {
127            StringPart::Error => break,
128            StringPart::Text => output.push_str(sublex.slice()),
129            StringPart::EscapedQuote => output.push('"'),
130            StringPart::EscapedBackslash => output.push('\\'),
131            StringPart::EscapedSlash => output.push('/'),
132            StringPart::EscapedBackspace => output.push(8 as char),
133            StringPart::EscapedLinefeed => output.push(12 as char),
134            StringPart::EscapedNewline => output.push('\n'),
135            StringPart::EscapedReturn => output.push('\r'),
136            StringPart::EscapedTab => output.push('\t'),
137            StringPart::EscapedCodepoint => {
138                use lexical_core::*;
139                const FORMAT: u128 = NumberFormatBuilder::hexadecimal();
140                const OPTIONS: ParseIntegerOptions = ParseIntegerOptions::new();
141                output.push(
142                    parse_with_options::<_, FORMAT>(sublex.slice()[2..].as_bytes(), &OPTIONS)
143                        .ok()
144                        .and_then(std::char::from_u32)?,
145                );
146            }
147            StringPart::EndString => {
148                lex.bump_unchecked(sublex.span().end);
149                return Some(lex.extras.arena.alloc(output));
150            }
151        }
152    }
153    None
154}
155
156#[inline]
157fn lex_block_string<'a>(lex: &mut Lexer<'a, Token<'a>>) -> Option<&'a str> {
158    let mut output = String::new();
159    let mut sublex = BlockPart::lexer(lex.remainder());
160    let mut min_indent: usize = usize::MAX;
161    while let Some(token) = sublex.next() {
162        match token {
163            BlockPart::EscapedSequence | BlockPart::Text => output.push_str(sublex.slice()),
164            BlockPart::EscapedEndBlock => output.push_str("\"\"\""),
165            BlockPart::Newline => {
166                let mut slice = &sublex.slice()[1..];
167                if !slice.is_empty() && &slice[0..1] == "\n" {
168                    slice = &slice[1..];
169                };
170                let indent = slice.len();
171                if indent > 0 && indent < min_indent {
172                    min_indent = indent;
173                }
174                output.push('\n');
175                output.push_str(slice);
176            }
177            BlockPart::EndBlock => {
178                lex.bump_unchecked(sublex.span().end);
179                if min_indent == usize::MAX {
180                    min_indent = 0;
181                }
182                let mut lines = output.lines();
183                let mut output = String::with_capacity(output.len());
184                if let Some(first) = lines.next() {
185                    let stripped = first.trim();
186                    if !stripped.is_empty() {
187                        output.push_str(stripped);
188                        output.push('\n');
189                    }
190                }
191                let mut last_line = 0;
192                for line in lines {
193                    last_line = output.len();
194                    if line.len() > min_indent {
195                        output.push_str(&line[min_indent..]);
196                    }
197                    output.push('\n');
198                }
199                if output[last_line..].trim().is_empty() {
200                    output.truncate(last_line);
201                }
202                return Some(lex.extras.arena.alloc(output));
203            }
204            BlockPart::Error => break,
205        }
206    }
207    None
208}
209
210#[inline]
211fn parse_string<'a>(lex: &mut Lexer<'a, Token<'a>>) -> Option<&'a str> {
212    match lex.slice() {
213        r#""""# => Some(""),
214        r#"""""# => lex_block_string(lex),
215        "\"" => {
216            // We can optimize lexing strings by avoiding the full StringPart lexer
217            // for when we encounter "complex strings" by scanning for escape codes
218            let remainder = lex.remainder();
219            for (i, c) in remainder.char_indices() {
220                match c {
221                    '\n' | '\r' => return None,
222                    '\\' => {
223                        lex.bump_unchecked(i);
224                        return lex_string(lex, remainder[0..i].to_string());
225                    }
226                    '"' => {
227                        lex.bump_unchecked(i + 1);
228                        return Some(&remainder[0..i]);
229                    }
230                    _ => {}
231                }
232            }
233            None
234        }
235        _ => None,
236    }
237}
238
239#[cfg(test)]
240mod tests {
241    use super::{Extras, Token};
242    use bumpalo::Bump;
243    use logos::Logos;
244
245    #[test]
246    fn empty() {
247        let arena = Bump::new();
248        let mut lex = Token::lexer_with_extras("", Extras { arena: &arena });
249        assert_eq!(lex.next(), None);
250        let mut lex = Token::lexer_with_extras(",,       # comment\n", Extras { arena: &arena });
251        assert_eq!(lex.next(), None);
252    }
253
254    #[test]
255    fn symbols() {
256        let arena = Bump::new();
257        let extras = Extras { arena: &arena };
258        let mut lex = Token::lexer_with_extras("[]{}()=:...!", extras);
259        assert_eq!(lex.next(), Some(Token::BracketOpen));
260        assert_eq!(lex.next(), Some(Token::BracketClose));
261        assert_eq!(lex.next(), Some(Token::BraceOpen));
262        assert_eq!(lex.next(), Some(Token::BraceClose));
263        assert_eq!(lex.next(), Some(Token::ParenOpen));
264        assert_eq!(lex.next(), Some(Token::ParenClose));
265        assert_eq!(lex.next(), Some(Token::Equal));
266        assert_eq!(lex.next(), Some(Token::Colon));
267        assert_eq!(lex.next(), Some(Token::Ellipsis));
268        assert_eq!(lex.next(), Some(Token::Exclam));
269        assert_eq!(lex.next(), None);
270    }
271
272    #[test]
273    fn names() {
274        let arena = Bump::new();
275        let mut lex = Token::lexer_with_extras("hello world", Extras { arena: &arena });
276        assert_eq!(lex.next(), Some(Token::Name("hello")));
277        assert_eq!(lex.next(), Some(Token::Name("world")));
278        assert_eq!(lex.next(), None);
279
280        let mut lex = Token::lexer_with_extras("# comment\n hello", Extras { arena: &arena });
281        assert_eq!(lex.next(), Some(Token::Name("hello")));
282        assert_eq!(lex.next(), None);
283    }
284
285    #[test]
286    fn variables() {
287        let arena = Bump::new();
288        let mut lex = Token::lexer_with_extras("$var", Extras { arena: &arena });
289        assert_eq!(lex.next(), Some(Token::VariableName("var")));
290        assert_eq!(lex.next(), None);
291    }
292
293    #[test]
294    fn directives() {
295        let arena = Bump::new();
296        let mut lex = Token::lexer_with_extras("@directive", Extras { arena: &arena });
297        assert_eq!(lex.next(), Some(Token::DirectiveName("directive")));
298        assert_eq!(lex.next(), None);
299    }
300
301    #[test]
302    fn integers() {
303        let arena = Bump::new();
304        let mut lex = Token::lexer_with_extras("1 -1 123 -123 0", Extras { arena: &arena });
305        assert_eq!(lex.next(), Some(Token::Integer("1")));
306        assert_eq!(lex.next(), Some(Token::Integer("-1")));
307        assert_eq!(lex.next(), Some(Token::Integer("123")));
308        assert_eq!(lex.next(), Some(Token::Integer("-123")));
309        assert_eq!(lex.next(), Some(Token::Integer("0")));
310        assert_eq!(lex.next(), None);
311    }
312
313    #[test]
314    fn floats() {
315        let arena = Bump::new();
316        let mut lex = Token::lexer_with_extras(
317            "1.0 -10.10 -10.10E10 1.1e-1 1e1 0.0",
318            Extras { arena: &arena },
319        );
320        assert_eq!(lex.next(), Some(Token::Float("1.0")));
321        assert_eq!(lex.next(), Some(Token::Float("-10.10")));
322        // TODO: is this acceptable? verify with gql.js
323        assert_eq!(lex.next(), Some(Token::Float("-10.10E10")));
324        assert_eq!(lex.next(), Some(Token::Float("1.1e-1")));
325        assert_eq!(lex.next(), Some(Token::Float("1e1")));
326        assert_eq!(lex.next(), Some(Token::Float("0.0")));
327        assert_eq!(lex.next(), None);
328    }
329
330    #[test]
331    fn strings() {
332        let arena = Bump::new();
333        let mut lex = Token::lexer_with_extras("\"hello world\"", Extras { arena: &arena });
334        assert_eq!(lex.next(), Some(Token::String("hello world")));
335        assert_eq!(lex.next(), None);
336        let mut lex = Token::lexer_with_extras("\"\"", Extras { arena: &arena });
337        assert_eq!(lex.next(), Some(Token::String("")));
338        assert_eq!(lex.next(), None);
339        let mut lex =
340            Token::lexer_with_extras("\"hello \\\" \\n world\"", Extras { arena: &arena });
341        assert_eq!(lex.next(), Some(Token::String("hello \" \n world")));
342        assert_eq!(lex.next(), None);
343        let mut lex = Token::lexer_with_extras("\"\"\"hello block\"\"\"", Extras { arena: &arena });
344        assert_eq!(lex.next(), Some(Token::String("hello block\n")));
345        assert_eq!(lex.next(), None);
346        let mut lex = Token::lexer_with_extras("\"\"\"\"\"\"", Extras { arena: &arena });
347        assert_eq!(lex.next(), Some(Token::String("")));
348        assert_eq!(lex.next(), None);
349        let mut lex = Token::lexer_with_extras(
350            "\"\"\"hello\n\r\t #test\n \\\"\"\" block\"\"\"",
351            Extras { arena: &arena },
352        );
353        assert_eq!(
354            lex.next(),
355            Some(Token::String("hello\n\n #test\n\"\"\" block\n"))
356        );
357        assert_eq!(lex.next(), None);
358    }
359
360    #[test]
361    fn bad_strings() {
362        let arena = Bump::new();
363        let mut lex = Token::lexer_with_extras("\"\\ \"", Extras { arena: &arena });
364        assert_eq!(lex.next(), Some(Token::Error));
365        let mut lex = Token::lexer_with_extras("\"\n\"", Extras { arena: &arena });
366        assert_eq!(lex.next(), Some(Token::Error));
367        let mut lex = Token::lexer_with_extras("\"\r\"", Extras { arena: &arena });
368        assert_eq!(lex.next(), Some(Token::Error));
369    }
370}