promql_parser/parser/
lex.rs

1// Copyright 2023 Greptime Team
2//
3// Licensed under the Apache License, Version 2.0 (the "License");
4// you may not use this file except in compliance with the License.
5// You may obtain a copy of the License at
6//
7//     http://www.apache.org/licenses/LICENSE-2.0
8//
9// Unless required by applicable law or agreed to in writing, software
10// distributed under the License is distributed on an "AS IS" BASIS,
11// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12// See the License for the specific language governing permissions and
13// limitations under the License.
14
15use crate::parser::token::*;
16use cfgrammar::NewlineCache;
17use lrlex::{DefaultLexeme, DefaultLexerTypes, LRNonStreamingLexer};
18use lrpar::Lexeme;
19use std::fmt::Debug;
20
21const ESCAPE_SYMBOLS: &str = r"abfnrtv\01234567xuU";
22const STRING_SYMBOLS: &str = r#"'"`"#;
23
24pub(crate) type LexemeType = DefaultLexeme<TokenId>;
25
26pub fn lexer(s: &str) -> Result<LRNonStreamingLexer<'_, '_, DefaultLexerTypes<TokenId>>, String> {
27    let lexemes: Vec<Result<LexemeType, String>> = Lexer::new(s).collect();
28    match lexemes.last() {
29        Some(Err(info)) => Err(info.into()),
30        Some(Ok(_)) => {
31            // TODO: use better error mechanism, instead of filtering the err.
32            let lexemes = lexemes.into_iter().filter_map(|l| l.ok()).map(Ok).collect();
33            Ok(LRNonStreamingLexer::new(s, lexemes, NewlineCache::new()))
34        }
35        None => Err(format!("no expression found in input: '{s}'")),
36    }
37}
38
39#[derive(Debug)]
40enum State {
41    Start,
42    End,
43    Lexeme(TokenId),
44    Identifier,
45    KeywordOrIdentifier,
46    NumberOrDuration,
47    InsideBrackets,
48    InsideBraces,
49    LineComment,
50    Space,
51    String(char), // char is the symbol, ' or " or `
52    Escape(char), // Escape happens inside String. char is the symbol, ' or " or `
53    Err(String),
54}
55
56#[derive(Debug)]
57struct Context {
58    // TODO: use &str instead of Vec<char> for better performance.
59    chars: Vec<char>,
60    idx: usize,   // Current position in the Vec, increment by 1.
61    start: usize, // Start position of one Token, increment by char.len_utf8.
62    pos: usize,   // Current position in the input, increment by char.len_utf8.
63
64    paren_depth: usize, // Nesting depth of ( ) exprs, 0 means no parens.
65    brace_open: bool,   // Whether a { is opened.
66    bracket_open: bool, // Whether a [ is opened.
67    got_colon: bool,    // Whether we got a ':' after [ was opened.
68    eof: bool,          // Whether we got end of file
69}
70
71impl Context {
72    fn new(input: &str) -> Context {
73        Self {
74            chars: input.chars().collect(),
75            idx: 0,
76            start: 0,
77            pos: 0,
78
79            paren_depth: 0,
80            brace_open: false,
81            bracket_open: false,
82            got_colon: false,
83            eof: false,
84        }
85    }
86
87    /// pop the first char.
88    fn pop(&mut self) -> Option<char> {
89        let ch = self.peek()?;
90        self.pos += ch.len_utf8();
91        self.idx += 1;
92        Some(ch)
93    }
94
95    /// backup steps back one char. If cursor is at the beginning, it does nothing.
96    /// caller should pay attention if the backup is successful or not.
97    fn backup(&mut self) -> bool {
98        if let Some(ch) = self.chars.get(self.idx - 1) {
99            self.pos -= ch.len_utf8();
100            self.idx -= 1;
101            return true;
102        };
103        false
104    }
105
106    /// get the char at the pos to check, this won't consume it.
107    fn peek(&self) -> Option<char> {
108        self.chars.get(self.idx).copied()
109    }
110
111    /// string lexeme SHOULD trim the surrounding string symbols, ' or " or `
112    fn lexeme(&mut self, token_id: TokenId) -> LexemeType {
113        let start = self.start;
114        let len = self.pos - self.start;
115        DefaultLexeme::new(token_id, start, len)
116    }
117
118    /// ignore the text between start and pos
119    fn ignore(&mut self) {
120        self.start = self.pos;
121    }
122
123    // TODO: refactor needed, details in Issues/15.
124    fn lexeme_string(&self) -> String {
125        let mut s = String::from("");
126        if self.idx == 0 {
127            return s;
128        }
129
130        let mut pos = self.pos;
131        let mut idx = self.idx;
132        while pos > self.start {
133            if let Some(&ch) = self.chars.get(idx - 1) {
134                pos -= ch.len_utf8();
135                idx -= 1;
136                s.push(ch);
137            };
138        }
139        s.chars().rev().collect()
140    }
141}
142
143#[derive(Debug)]
144struct Lexer {
145    state: State,
146    ctx: Context,
147}
148
149/// block for context operations.
150impl Lexer {
151    fn new(input: &str) -> Self {
152        let ctx = Context::new(input);
153        let state = State::Start;
154        Self { state, ctx }
155    }
156
157    fn is_inside_braces(&self) -> bool {
158        self.ctx.brace_open
159    }
160
161    fn jump_outof_braces(&mut self) {
162        self.ctx.brace_open = false;
163    }
164
165    fn dive_into_braces(&mut self) {
166        self.ctx.brace_open = true;
167    }
168
169    fn is_inside_brackets(&self) -> bool {
170        self.ctx.bracket_open
171    }
172
173    fn jump_outof_brackets(&mut self) {
174        self.ctx.bracket_open = false;
175    }
176
177    fn dive_into_brackets(&mut self) {
178        self.ctx.bracket_open = true;
179    }
180
181    fn is_colon_scanned(&self) -> bool {
182        self.ctx.got_colon
183    }
184
185    fn set_colon_scanned(&mut self) {
186        self.ctx.got_colon = true;
187    }
188
189    fn reset_colon_scanned(&mut self) {
190        self.ctx.got_colon = false;
191    }
192
193    /// true only if paren depth less than MAX
194    fn inc_paren_depth(&mut self) -> bool {
195        if self.ctx.paren_depth < usize::MAX {
196            self.ctx.paren_depth += 1;
197            return true;
198        }
199        false
200    }
201
202    /// true only if paren depth larger than 1
203    fn dec_paren_depth(&mut self) -> bool {
204        if self.ctx.paren_depth >= 1 {
205            self.ctx.paren_depth -= 1;
206            return true;
207        }
208        false
209    }
210
211    fn is_paren_balanced(&self) -> bool {
212        self.ctx.paren_depth == 0
213    }
214
215    fn pop(&mut self) -> Option<char> {
216        self.ctx.pop()
217    }
218
219    fn backup(&mut self) -> bool {
220        self.ctx.backup()
221    }
222
223    fn peek(&self) -> Option<char> {
224        self.ctx.peek()
225    }
226
227    /// lexeme() consumes the Span, which means consecutive lexeme() call
228    /// will get wrong Span unless Lexer shifts its State.
229    fn lexeme(&mut self, token_id: TokenId) -> LexemeType {
230        let lexeme = self.ctx.lexeme(token_id);
231        self.ctx.ignore();
232        lexeme
233    }
234
235    fn lexeme_string(&self) -> String {
236        self.ctx.lexeme_string()
237    }
238
239    fn ignore(&mut self) {
240        self.ctx.ignore();
241    }
242
243    fn is_eof(&self) -> bool {
244        self.ctx.eof
245    }
246
247    fn set_eof(&mut self) {
248        self.ctx.eof = true;
249    }
250}
251
252/// block for state operations.
253impl Lexer {
254    fn shift(&mut self) {
255        // NOTE: the design of the match arms's order is of no importance.
256        // If different orders result in different states, then it has to be fixed.
257        self.state = match self.state {
258            State::Start => self.start(),
259            State::End => State::Err("End state can not shift forward.".into()),
260            State::Lexeme(_) => State::Start,
261            State::String(ch) => self.accept_string(ch),
262            State::KeywordOrIdentifier => self.accept_keyword_or_identifier(),
263            State::Identifier => self.accept_identifier(),
264            State::NumberOrDuration => self.accept_number_or_duration(),
265            State::InsideBrackets => self.inside_brackets(),
266            State::InsideBraces => self.inside_braces(),
267            State::LineComment => self.ignore_comment_line(),
268            State::Escape(ch) => self.accept_escape(ch),
269            State::Space => self.ignore_space(),
270            State::Err(_) => State::End,
271        };
272    }
273
274    fn start(&mut self) -> State {
275        if self.is_inside_braces() {
276            return State::InsideBraces;
277        }
278
279        if self.is_inside_brackets() {
280            return State::InsideBrackets;
281        }
282
283        let c = match self.pop() {
284            None => {
285                if !self.is_paren_balanced() {
286                    return State::Err("unclosed left parenthesis".into());
287                }
288
289                if !self.is_eof() {
290                    self.set_eof();
291                    return State::Lexeme(T_EOF);
292                }
293
294                return State::End;
295            }
296            Some(ch) => ch,
297        };
298
299        // NOTE: the design of the match arms's order is of no importance.
300        // If different orders result in different states, then it has to be fixed.
301        match c {
302            '#' => State::LineComment,
303            '@' => State::Lexeme(T_AT),
304            ',' => State::Lexeme(T_COMMA),
305            '*' => State::Lexeme(T_MUL),
306            '/' => State::Lexeme(T_DIV),
307            '%' => State::Lexeme(T_MOD),
308            '+' => State::Lexeme(T_ADD),
309            '-' => State::Lexeme(T_SUB),
310            '^' => State::Lexeme(T_POW),
311            '=' => match self.peek() {
312                Some('=') => {
313                    self.pop();
314                    State::Lexeme(T_EQLC)
315                }
316                // =~ (label matcher) MUST be in brace
317                Some('~') => State::Err("unexpected character after '=': '~'".into()),
318                _ => State::Lexeme(T_EQL),
319            },
320            '!' => match self.pop() {
321                Some('=') => State::Lexeme(T_NEQ),
322                Some(ch) => State::Err(format!("unexpected character after '!': '{ch}'")),
323                None => State::Err("'!' can not be at the end".into()),
324            },
325            '<' => match self.peek() {
326                Some('=') => {
327                    self.pop();
328                    State::Lexeme(T_LTE)
329                }
330                _ => State::Lexeme(T_LSS),
331            },
332            '>' => match self.peek() {
333                Some('=') => {
334                    self.pop();
335                    State::Lexeme(T_GTE)
336                }
337                _ => State::Lexeme(T_GTR),
338            },
339            ch if ch.is_ascii_whitespace() => self.ignore_space(),
340            ch if ch.is_ascii_digit() => State::NumberOrDuration,
341            '.' => match self.peek() {
342                Some(ch) if ch.is_ascii_digit() => State::NumberOrDuration,
343                Some(ch) => State::Err(format!("unexpected character after '.': '{ch}'")),
344                None => State::Err("unexpected character: '.'".into()),
345            },
346            ch if is_alpha(ch) || ch == ':' => State::KeywordOrIdentifier,
347            ch if STRING_SYMBOLS.contains(ch) => State::String(ch),
348            '(' => {
349                if self.inc_paren_depth() {
350                    return State::Lexeme(T_LEFT_PAREN);
351                }
352                State::Err("too many left parentheses".into())
353            }
354            ')' => {
355                if self.is_paren_balanced() {
356                    return State::Err("unexpected right parenthesis ')'".into());
357                }
358                if self.dec_paren_depth() {
359                    return State::Lexeme(T_RIGHT_PAREN);
360                }
361                State::Err("unexpected right parenthesis ')'".into())
362            }
363            '{' => {
364                self.dive_into_braces();
365                State::Lexeme(T_LEFT_BRACE)
366            }
367            // the matched } has been consumed inside braces
368            '}' => State::Err("unexpected right brace '}'".into()),
369            '[' => {
370                self.reset_colon_scanned();
371                self.dive_into_brackets();
372                State::Lexeme(T_LEFT_BRACKET)
373            }
374            // the matched ] has been consumed inside brackets
375            ']' => State::Err("unexpected right bracket ']'".into()),
376            ch => State::Err(format!("unexpected character: {ch:?}")),
377        }
378    }
379
380    /// the first number has been consumed, so first backup.
381    fn accept_number_or_duration(&mut self) -> State {
382        self.backup();
383        if self.scan_number() {
384            return State::Lexeme(T_NUMBER);
385        }
386
387        // Next two chars must be a valid unit and a non-alphanumeric.
388        if self.accept_remaining_duration() {
389            return State::Lexeme(T_DURATION);
390        }
391
392        // the next char is invalid, so it should be captured in the err info.
393        self.pop();
394        State::Err(format!(
395            "bad number or duration syntax: {}",
396            self.lexeme_string()
397        ))
398    }
399
400    /// the first alphabetic character has been consumed, and no need to backup.
401    fn accept_keyword_or_identifier(&mut self) -> State {
402        while let Some(ch) = self.peek() {
403            if is_alpha_numeric(ch) || ch == ':' {
404                self.pop();
405            } else {
406                break;
407            }
408        }
409
410        let s = self.lexeme_string();
411        let s_lower = s.to_lowercase();
412        match get_keyword_token(&s_lower) {
413            Some(token_id) => {
414                // fill, fill_left, fill_right can be used as metric identifiers
415                // if not followed by a left parenthesis
416                if token_id == T_FILL || token_id == T_FILL_LEFT || token_id == T_FILL_RIGHT {
417                    // Look ahead to see if next non-whitespace char is '('
418                    let mut idx = self.ctx.idx;
419                    let mut found_lparen = false;
420                    while let Some(&ch) = self.ctx.chars.get(idx) {
421                        if ch.is_ascii_whitespace() {
422                            idx += 1;
423                        } else if ch == '(' {
424                            found_lparen = true;
425                            break;
426                        } else {
427                            break;
428                        }
429                    }
430                    if !found_lparen {
431                        // Not followed by (, treat as metric identifier
432                        return State::Lexeme(T_IDENTIFIER);
433                    }
434                }
435                State::Lexeme(token_id)
436            }
437            None if s.contains(':') => State::Lexeme(T_METRIC_IDENTIFIER),
438            _ => State::Lexeme(T_IDENTIFIER),
439        }
440    }
441
442    /// # has already been consumed.
443    fn ignore_comment_line(&mut self) -> State {
444        while let Some(ch) = self.pop() {
445            if ch == '\r' || ch == '\n' {
446                break;
447            }
448        }
449        self.ignore();
450        State::Start
451    }
452
453    /// accept consumes the next char if f(ch) returns true.
454    fn accept<F>(&mut self, f: F) -> bool
455    where
456        F: Fn(char) -> bool,
457    {
458        if let Some(ch) = self.peek() {
459            if f(ch) {
460                self.pop();
461                return true;
462            }
463        }
464        false
465    }
466
467    /// accept_run consumes a run of char from the valid set.
468    fn accept_run<F>(&mut self, f: F)
469    where
470        F: Fn(char) -> bool,
471    {
472        while let Some(ch) = self.peek() {
473            if f(ch) {
474                self.pop();
475            } else {
476                break;
477            }
478        }
479    }
480
481    /// consumes a run of space, and ignore them.
482    fn ignore_space(&mut self) -> State {
483        self.backup(); // backup to include the already spanned space
484        self.accept_run(|ch| ch.is_ascii_whitespace());
485        self.ignore();
486        State::Start
487    }
488
489    /// scan_number scans numbers of different formats. The scanned Item is
490    /// not necessarily a valid number. This case is caught by the parser.
491    fn scan_number(&mut self) -> bool {
492        let mut hex_digit = false;
493        if self.accept(|ch| ch == '0') && self.accept(|ch| ch == 'x' || ch == 'X') {
494            hex_digit = true;
495        }
496        let is_valid_digit = |ch: char| -> bool {
497            if hex_digit {
498                ch.is_ascii_hexdigit()
499            } else {
500                ch.is_ascii_digit()
501            }
502        };
503
504        self.accept_run(is_valid_digit);
505        if self.accept(|ch| ch == '.') {
506            self.accept_run(is_valid_digit);
507        }
508        if self.accept(|ch| ch == 'e' || ch == 'E') {
509            self.accept(|ch| ch == '+' || ch == '-');
510            self.accept_run(|ch| ch.is_ascii_digit());
511        }
512
513        // Next thing must not be alpha or '.'
514        // if alpha: it maybe a duration
515        // if '.': invalid number
516        !matches!(self.peek(), Some(ch) if is_alpha(ch) || ch == '.')
517    }
518
519    /// number part has already been scanned.
520    /// true only if the char after duration is not alphanumeric.
521    fn accept_remaining_duration(&mut self) -> bool {
522        // Next two char must be a valid duration.
523        if !self.accept(|ch| "smhdwy".contains(ch)) {
524            return false;
525        }
526        // Support for ms. Bad units like hs, ys will be caught when we actually
527        // parse the duration.
528        self.accept(|ch| ch == 's');
529
530        // Next char can be another number then a unit.
531        while self.accept(|ch| ch.is_ascii_digit()) {
532            self.accept_run(|ch| ch.is_ascii_digit());
533            // y is no longer in the list as it should always come first in durations.
534            if !self.accept(|ch| "smhdw".contains(ch)) {
535                return false;
536            }
537            // Support for ms. Bad units like hs, ys will be caught when we actually
538            // parse the duration.
539            self.accept(|ch| ch == 's');
540        }
541
542        !matches!(self.peek(), Some(ch) if is_alpha_numeric(ch))
543    }
544
545    /// scans a string escape sequence. The initial escaping character (\)
546    /// has already been consumed.
547    // TODO: checking the validity of code point is NOT supported yet.
548    fn accept_escape(&mut self, symbol: char) -> State {
549        match self.pop() {
550            Some(ch) if ch == symbol || ESCAPE_SYMBOLS.contains(ch) => State::String(symbol),
551            Some(ch) => State::Err(format!("unknown escape sequence '{ch}'")),
552            None => State::Err("escape sequence not terminated".into()),
553        }
554    }
555
556    /// scans a quoted string. The initial quote has already been consumed.
557    fn accept_string(&mut self, symbol: char) -> State {
558        while let Some(ch) = self.pop() {
559            if ch == '\\' {
560                return State::Escape(symbol);
561            }
562
563            if ch == symbol {
564                return State::Lexeme(T_STRING);
565            }
566        }
567
568        State::Err(format!("unterminated quoted string {symbol}"))
569    }
570
571    /// scans the inside of a vector selector. Keywords are ignored and
572    /// scanned as identifiers.
573    fn inside_braces(&mut self) -> State {
574        match self.pop() {
575            Some('#') => State::LineComment,
576            Some(',') => State::Lexeme(T_COMMA),
577            Some('o') | Some('O') => {
578                if let Some('r') | Some('R') = self.peek() {
579                    self.pop();
580                    if let Some(' ') = self.peek() {
581                        State::Lexeme(T_LOR)
582                    } else {
583                        State::Identifier
584                    }
585                } else {
586                    State::Identifier
587                }
588            }
589            Some(ch) if ch.is_ascii_whitespace() => State::Space,
590            Some(ch) if is_alpha(ch) => State::Identifier,
591            Some(ch) if STRING_SYMBOLS.contains(ch) => State::String(ch),
592            Some('=') => match self.peek() {
593                Some('~') => {
594                    self.pop();
595                    State::Lexeme(T_EQL_REGEX)
596                }
597                _ => State::Lexeme(T_EQL),
598            },
599            Some('!') => match self.pop() {
600                Some('~') => State::Lexeme(T_NEQ_REGEX),
601                Some('=') => State::Lexeme(T_NEQ),
602                Some(ch) => State::Err(format!(
603                    "unexpected character after '!' inside braces: '{ch}'"
604                )),
605                None => State::Err("'!' can not be at the end".into()),
606            },
607            Some('{') => State::Err("unexpected left brace '{' inside braces".into()),
608            Some('}') => {
609                self.jump_outof_braces();
610                State::Lexeme(T_RIGHT_BRACE)
611            }
612            Some(ch) => State::Err(format!("unexpected character inside braces: '{ch}'")),
613            None => State::Err("unexpected end of input inside braces".into()),
614        }
615    }
616
617    // this won't affect the cursor.
618    fn last_char_matches<F>(&mut self, f: F) -> bool
619    where
620        F: Fn(char) -> bool,
621    {
622        // if cursor is at the beginning, then do nothing.
623        if !self.backup() {
624            return false;
625        }
626        let matched = matches!(self.peek(), Some(ch) if f(ch));
627        self.pop();
628        matched
629    }
630
631    // this won't affect the cursor.
632    fn is_colon_the_first_char_in_brackets(&mut self) -> bool {
633        // note: colon has already been consumed, so first backup
634        self.backup();
635        let matched = self.last_char_matches(|ch| ch == '[');
636        self.pop();
637        matched
638    }
639
640    // left brackets has already be consumed.
641    fn inside_brackets(&mut self) -> State {
642        match self.pop() {
643            Some(ch) if ch.is_ascii_whitespace() => State::Space,
644            Some(':') => {
645                if self.is_colon_scanned() {
646                    return State::Err("unexpected second colon(:) in brackets".into());
647                }
648
649                if self.is_colon_the_first_char_in_brackets() {
650                    return State::Err("expect duration before first colon(:) in brackets".into());
651                }
652
653                self.set_colon_scanned();
654                State::Lexeme(T_COLON)
655            }
656            Some(ch) if ch.is_ascii_digit() => self.accept_number_or_duration(),
657            Some(']') => {
658                self.jump_outof_brackets();
659                self.reset_colon_scanned();
660                State::Lexeme(T_RIGHT_BRACKET)
661            }
662            Some('[') => State::Err("unexpected left brace '[' inside brackets".into()),
663            Some(ch) => State::Err(format!("unexpected character inside brackets: '{ch}'")),
664            None => State::Err("unexpected end of input inside brackets".into()),
665        }
666    }
667
668    // scans an alphanumeric identifier. The next character
669    // is known to be a letter.
670    fn accept_identifier(&mut self) -> State {
671        self.accept_run(is_alpha_numeric);
672        State::Lexeme(T_IDENTIFIER)
673    }
674}
675
676// TODO: reference iterator
677impl Iterator for Lexer {
678    type Item = Result<LexemeType, String>;
679
680    fn next(&mut self) -> Option<Self::Item> {
681        self.shift();
682        match &self.state {
683            State::Lexeme(token_id) => Some(Ok(self.lexeme(*token_id))),
684            State::Err(info) => Some(Err(info.clone())),
685            State::End => None,
686            _ => self.next(),
687        }
688    }
689}
690
691fn is_alpha_numeric(ch: char) -> bool {
692    is_alpha(ch) || ch.is_ascii_digit()
693}
694
695fn is_alpha(ch: char) -> bool {
696    ch == '_' || ch.is_ascii_alphabetic()
697}
698
699pub(crate) fn is_label(s: &str) -> bool {
700    if s.is_empty() {
701        return false;
702    }
703    let mut chars = s.chars();
704    match chars.next() {
705        None => false,
706        Some(ch) if !is_alpha(ch) => false,
707        Some(_) => {
708            for ch in chars {
709                if !is_alpha_numeric(ch) {
710                    return false;
711                }
712            }
713            true
714        }
715    }
716}
717
718#[cfg(test)]
719mod tests {
720    use super::*;
721
722    type LexemeTuple = (TokenId, usize, usize);
723    /// - MatchTuple.0 is input
724    /// - MatchTuple.1 is the expected generated Lexemes
725    /// - MatchTuple.2 is the Err info if the input is invalid PromQL query
726    type MatchTuple = (&'static str, Vec<LexemeTuple>, Option<&'static str>);
727
728    type Case = (
729        &'static str,
730        Vec<Result<LexemeType, String>>,
731        Vec<Result<LexemeType, String>>,
732    );
733
734    fn assert_matches(v: Vec<MatchTuple>) {
735        let cases: Vec<Case> = v
736            .into_iter()
737            .map(|(input, lexemes, err)| {
738                let mut expected: Vec<Result<LexemeType, String>> = lexemes
739                    .into_iter()
740                    .map(|(token_id, start, len)| Ok(LexemeType::new(token_id, start, len)))
741                    .collect();
742
743                if let Some(s) = err {
744                    expected.push(Err(s.to_string()));
745                }
746
747                let actual: Vec<Result<LexemeType, String>> = Lexer::new(input)
748                    // in lex test cases, we don't compare the EOF token
749                    .filter(|r| !matches!(r, Ok(l) if l.tok_id() == T_EOF))
750                    .collect();
751                (input, expected, actual)
752            })
753            .collect();
754
755        for (input, expected, actual) in cases.iter() {
756            assert_eq!(expected, actual, "\n<input>: {input}");
757        }
758    }
759
760    #[test]
761    fn test_common() {
762        let cases = vec![
763            (",", vec![(T_COMMA, 0, 1)], None),
764            (
765                "()",
766                vec![(T_LEFT_PAREN, 0, 1), (T_RIGHT_PAREN, 1, 1)],
767                None,
768            ),
769            (
770                "{}",
771                vec![(T_LEFT_BRACE, 0, 1), (T_RIGHT_BRACE, 1, 1)],
772                None,
773            ),
774            (
775                "[5m]",
776                vec![
777                    (T_LEFT_BRACKET, 0, 1),
778                    (T_DURATION, 1, 2),
779                    (T_RIGHT_BRACKET, 3, 1),
780                ],
781                None,
782            ),
783            (
784                "[ 5m]",
785                vec![
786                    (T_LEFT_BRACKET, 0, 1),
787                    (T_DURATION, 2, 2),
788                    (T_RIGHT_BRACKET, 4, 1),
789                ],
790                None,
791            ),
792            (
793                "[  5m]",
794                vec![
795                    (T_LEFT_BRACKET, 0, 1),
796                    (T_DURATION, 3, 2),
797                    (T_RIGHT_BRACKET, 5, 1),
798                ],
799                None,
800            ),
801            (
802                "[  5m ]",
803                vec![
804                    (T_LEFT_BRACKET, 0, 1),
805                    (T_DURATION, 3, 2),
806                    (T_RIGHT_BRACKET, 6, 1),
807                ],
808                None,
809            ),
810            ("\r\n\r", vec![], None),
811        ];
812
813        assert_matches(cases);
814    }
815
816    #[test]
817    fn test_numbers() {
818        let cases = vec![
819            ("1", vec![(T_NUMBER, 0, 1)], None),
820            ("4.23", vec![(T_NUMBER, 0, 4)], None),
821            (".3", vec![(T_NUMBER, 0, 2)], None),
822            ("5.", vec![(T_NUMBER, 0, 2)], None),
823            ("NaN", vec![(T_NUMBER, 0, 3)], None),
824            ("nAN", vec![(T_NUMBER, 0, 3)], None),
825            ("NaN 123", vec![(T_NUMBER, 0, 3), (T_NUMBER, 4, 3)], None),
826            ("NaN123", vec![(T_IDENTIFIER, 0, 6)], None),
827            ("iNf", vec![(T_NUMBER, 0, 3)], None),
828            ("Inf", vec![(T_NUMBER, 0, 3)], None),
829            ("+Inf", vec![(T_ADD, 0, 1), (T_NUMBER, 1, 3)], None),
830            (
831                "+Inf 123",
832                vec![(T_ADD, 0, 1), (T_NUMBER, 1, 3), (T_NUMBER, 5, 3)],
833                None,
834            ),
835            (
836                "-Inf 123",
837                vec![(T_SUB, 0, 1), (T_NUMBER, 1, 3), (T_NUMBER, 5, 3)],
838                None,
839            ),
840            ("Infoo", vec![(T_IDENTIFIER, 0, 5)], None),
841            ("-Inf123", vec![(T_SUB, 0, 1), (T_IDENTIFIER, 1, 6)], None),
842            (
843                "-Inf 123",
844                vec![(T_SUB, 0, 1), (T_NUMBER, 1, 3), (T_NUMBER, 5, 3)],
845                None,
846            ),
847            ("0x123", vec![(T_NUMBER, 0, 5)], None),
848        ];
849        assert_matches(cases);
850    }
851
852    #[test]
853    fn test_strings() {
854        let cases = vec![
855            ("\"test\\tsequence\"", vec![(T_STRING, 0, 16)], None),
856            ("\"test\\\\.expression\"", vec![(T_STRING, 0, 19)], None),
857            (
858                "\"test\\.expression\"",
859                vec![],
860                Some("unknown escape sequence '.'"),
861            ),
862            (
863                "`test\\.expression`",
864                vec![],
865                Some("unknown escape sequence '.'"),
866            ),
867            (".٩", vec![], Some("unexpected character after '.': '٩'")),
868            // TODO: accept_escape SHOULD support invalid escape character
869            // "\xff"
870            // `\xff`
871        ];
872        assert_matches(cases);
873    }
874
875    #[test]
876    fn test_durations() {
877        let cases = vec![
878            ("5s", vec![(T_DURATION, 0, 2)], None),
879            ("123m", vec![(T_DURATION, 0, 4)], None),
880            ("1h", vec![(T_DURATION, 0, 2)], None),
881            ("3w", vec![(T_DURATION, 0, 2)], None),
882            ("1y", vec![(T_DURATION, 0, 2)], None),
883        ];
884        assert_matches(cases);
885    }
886
887    #[test]
888    fn test_identifiers() {
889        let cases = vec![
890            ("abc", vec![(T_IDENTIFIER, 0, 3)], None),
891            ("a:bc", vec![(T_METRIC_IDENTIFIER, 0, 4)], None),
892            (
893                "abc d",
894                vec![(T_IDENTIFIER, 0, 3), (T_IDENTIFIER, 4, 1)],
895                None,
896            ),
897            (":bc", vec![(T_METRIC_IDENTIFIER, 0, 3)], None),
898            ("0a:bc", vec![], Some("bad number or duration syntax: 0a")),
899        ];
900        assert_matches(cases);
901    }
902
903    #[test]
904    fn test_comments() {
905        let cases = vec![
906            ("# some comment", vec![], None),
907            ("5 # 1+1\n5", vec![(T_NUMBER, 0, 1), (T_NUMBER, 8, 1)], None),
908        ];
909        assert_matches(cases);
910    }
911
912    #[test]
913    fn test_operators() {
914        let cases = vec![
915            ("=", vec![(T_EQL, 0, 1)], None),
916            (
917                "{=}",
918                vec![(T_LEFT_BRACE, 0, 1), (T_EQL, 1, 1), (T_RIGHT_BRACE, 2, 1)],
919                None,
920            ),
921            ("==", vec![(T_EQLC, 0, 2)], None),
922            ("!=", vec![(T_NEQ, 0, 2)], None),
923            ("<", vec![(T_LSS, 0, 1)], None),
924            (">", vec![(T_GTR, 0, 1)], None),
925            (">=", vec![(T_GTE, 0, 2)], None),
926            ("<=", vec![(T_LTE, 0, 2)], None),
927            ("+", vec![(T_ADD, 0, 1)], None),
928            ("-", vec![(T_SUB, 0, 1)], None),
929            ("*", vec![(T_MUL, 0, 1)], None),
930            ("/", vec![(T_DIV, 0, 1)], None),
931            ("^", vec![(T_POW, 0, 1)], None),
932            ("%", vec![(T_MOD, 0, 1)], None),
933            ("AND", vec![(T_LAND, 0, 3)], None),
934            ("or", vec![(T_LOR, 0, 2)], None),
935            ("unless", vec![(T_LUNLESS, 0, 6)], None),
936            ("@", vec![(T_AT, 0, 1)], None),
937        ];
938        assert_matches(cases);
939    }
940
941    #[test]
942    fn test_aggregators() {
943        let cases = vec![
944            ("sum", vec![(T_SUM, 0, 3)], None),
945            ("AVG", vec![(T_AVG, 0, 3)], None),
946            ("Max", vec![(T_MAX, 0, 3)], None),
947            ("min", vec![(T_MIN, 0, 3)], None),
948            ("count", vec![(T_COUNT, 0, 5)], None),
949            ("stdvar", vec![(T_STDVAR, 0, 6)], None),
950            ("stddev", vec![(T_STDDEV, 0, 6)], None),
951        ];
952        assert_matches(cases);
953    }
954
955    #[test]
956    fn test_keywords() {
957        let cases = vec![
958            ("offset", vec![(T_OFFSET, 0, 6)], None),
959            ("by", vec![(T_BY, 0, 2)], None),
960            ("without", vec![(T_WITHOUT, 0, 7)], None),
961            ("on", vec![(T_ON, 0, 2)], None),
962            ("ignoring", vec![(T_IGNORING, 0, 8)], None),
963            ("group_left", vec![(T_GROUP_LEFT, 0, 10)], None),
964            ("group_right", vec![(T_GROUP_RIGHT, 0, 11)], None),
965            ("bool", vec![(T_BOOL, 0, 4)], None),
966            ("atan2", vec![(T_ATAN2, 0, 5)], None),
967            // fill as metric identifier (not followed by ()
968            ("fill", vec![(T_IDENTIFIER, 0, 4)], None),
969            ("fill_left", vec![(T_IDENTIFIER, 0, 9)], None),
970            ("fill_right", vec![(T_IDENTIFIER, 0, 10)], None),
971            // fill as modifier (followed by ()
972            (
973                "fill(1)",
974                vec![
975                    (T_FILL, 0, 4),
976                    (T_LEFT_PAREN, 4, 1),
977                    (T_NUMBER, 5, 1),
978                    (T_RIGHT_PAREN, 6, 1),
979                ],
980                None,
981            ),
982            (
983                "fill_left(1)",
984                vec![
985                    (T_FILL_LEFT, 0, 9),
986                    (T_LEFT_PAREN, 9, 1),
987                    (T_NUMBER, 10, 1),
988                    (T_RIGHT_PAREN, 11, 1),
989                ],
990                None,
991            ),
992            (
993                "fill_right(2)",
994                vec![
995                    (T_FILL_RIGHT, 0, 10),
996                    (T_LEFT_PAREN, 10, 1),
997                    (T_NUMBER, 11, 1),
998                    (T_RIGHT_PAREN, 12, 1),
999                ],
1000                None,
1001            ),
1002            // fill with whitespace before (
1003            (
1004                "fill (1)",
1005                vec![
1006                    (T_FILL, 0, 4),
1007                    (T_LEFT_PAREN, 5, 1),
1008                    (T_NUMBER, 6, 1),
1009                    (T_RIGHT_PAREN, 7, 1),
1010                ],
1011                None,
1012            ),
1013            (
1014                "fill_left (1)",
1015                vec![
1016                    (T_FILL_LEFT, 0, 9),
1017                    (T_LEFT_PAREN, 10, 1),
1018                    (T_NUMBER, 11, 1),
1019                    (T_RIGHT_PAREN, 12, 1),
1020                ],
1021                None,
1022            ),
1023            (
1024                "fill_right (2)",
1025                vec![
1026                    (T_FILL_RIGHT, 0, 10),
1027                    (T_LEFT_PAREN, 11, 1),
1028                    (T_NUMBER, 12, 1),
1029                    (T_RIGHT_PAREN, 13, 1),
1030                ],
1031                None,
1032            ),
1033        ];
1034        assert_matches(cases);
1035    }
1036
1037    #[test]
1038    fn test_preprocessors() {
1039        let cases = vec![
1040            ("start", vec![(T_START, 0, 5)], None),
1041            ("end", vec![(T_END, 0, 3)], None),
1042        ];
1043        assert_matches(cases);
1044    }
1045
1046    #[test]
1047    fn test_selectors() {
1048        let cases = vec![
1049            ("北京", vec![], Some("unexpected character: '北'")),
1050            ("北京='a'", vec![], Some("unexpected character: '北'")),
1051            ("0a='a'", vec![], Some("bad number or duration syntax: 0a")),
1052            (
1053                "{foo='bar'}",
1054                vec![
1055                    (T_LEFT_BRACE, 0, 1),
1056                    (T_IDENTIFIER, 1, 3),
1057                    (T_EQL, 4, 1),
1058                    (T_STRING, 5, 5),
1059                    (T_RIGHT_BRACE, 10, 1),
1060                ],
1061                None,
1062            ),
1063            (
1064                r#"{foo="bar"}"#,
1065                vec![
1066                    (T_LEFT_BRACE, 0, 1),
1067                    (T_IDENTIFIER, 1, 3),
1068                    (T_EQL, 4, 1),
1069                    (T_STRING, 5, 5),
1070                    (T_RIGHT_BRACE, 10, 1),
1071                ],
1072                None,
1073            ),
1074            (
1075                r#"{foo="bar\"bar"}"#,
1076                vec![
1077                    (T_LEFT_BRACE, 0, 1),
1078                    (T_IDENTIFIER, 1, 3),
1079                    (T_EQL, 4, 1),
1080                    (T_STRING, 5, 10),
1081                    (T_RIGHT_BRACE, 15, 1),
1082                ],
1083                None,
1084            ),
1085            (
1086                r#"{NaN	!= "bar" }"#,
1087                vec![
1088                    (T_LEFT_BRACE, 0, 1),
1089                    (T_IDENTIFIER, 1, 3),
1090                    (T_NEQ, 5, 2),
1091                    (T_STRING, 8, 5),
1092                    (T_RIGHT_BRACE, 14, 1),
1093                ],
1094                None,
1095            ),
1096            (
1097                r#"{alert=~"bar" }"#,
1098                vec![
1099                    (T_LEFT_BRACE, 0, 1),
1100                    (T_IDENTIFIER, 1, 5),
1101                    (T_EQL_REGEX, 6, 2),
1102                    (T_STRING, 8, 5),
1103                    (T_RIGHT_BRACE, 14, 1),
1104                ],
1105                None,
1106            ),
1107            (
1108                r#"{on!~"bar"}"#,
1109                vec![
1110                    (T_LEFT_BRACE, 0, 1),
1111                    (T_IDENTIFIER, 1, 2),
1112                    (T_NEQ_REGEX, 3, 2),
1113                    (T_STRING, 5, 5),
1114                    (T_RIGHT_BRACE, 10, 1),
1115                ],
1116                None,
1117            ),
1118            (
1119                r#"{alert!#"bar"}"#,
1120                vec![(T_LEFT_BRACE, 0, 1), (T_IDENTIFIER, 1, 5)],
1121                Some("unexpected character after '!' inside braces: '#'"),
1122            ),
1123            (
1124                r#"{foo:a="bar"}"#,
1125                vec![(T_LEFT_BRACE, 0, 1), (T_IDENTIFIER, 1, 3)],
1126                Some("unexpected character inside braces: ':'"),
1127            ),
1128        ];
1129        assert_matches(cases);
1130    }
1131
1132    #[test]
1133    fn test_common_errors() {
1134        let cases = vec![
1135            ("=~", vec![], Some("unexpected character after '=': '~'")),
1136            ("!~", vec![], Some("unexpected character after '!': '~'")),
1137            ("!(", vec![], Some("unexpected character after '!': '('")),
1138            ("1a", vec![], Some("bad number or duration syntax: 1a")),
1139        ];
1140        assert_matches(cases);
1141    }
1142
1143    #[test]
1144    fn test_mismatched_parentheses() {
1145        let cases = vec![
1146            (
1147                "(",
1148                vec![(T_LEFT_PAREN, 0, 1)],
1149                Some("unclosed left parenthesis"),
1150            ),
1151            (")", vec![], Some("unexpected right parenthesis ')'")),
1152            (
1153                "())",
1154                vec![(T_LEFT_PAREN, 0, 1), (T_RIGHT_PAREN, 1, 1)],
1155                Some("unexpected right parenthesis ')'"),
1156            ),
1157            (
1158                "(()",
1159                vec![
1160                    (T_LEFT_PAREN, 0, 1),
1161                    (T_LEFT_PAREN, 1, 1),
1162                    (T_RIGHT_PAREN, 2, 1),
1163                ],
1164                Some("unclosed left parenthesis"),
1165            ),
1166            (
1167                "{",
1168                vec![(T_LEFT_BRACE, 0, 1)],
1169                Some("unexpected end of input inside braces"),
1170            ),
1171            ("}", vec![], Some("unexpected right brace '}'")),
1172            (
1173                "{{",
1174                vec![(T_LEFT_BRACE, 0, 1)],
1175                Some("unexpected left brace '{' inside braces"),
1176            ),
1177            (
1178                "{{}}",
1179                vec![(T_LEFT_BRACE, 0, 1)],
1180                Some("unexpected left brace '{' inside braces"),
1181            ),
1182            (
1183                "[",
1184                vec![(T_LEFT_BRACKET, 0, 1)],
1185                Some("unexpected end of input inside brackets"),
1186            ),
1187            (
1188                "[[",
1189                vec![(T_LEFT_BRACKET, 0, 1)],
1190                Some("unexpected left brace '[' inside brackets"),
1191            ),
1192            (
1193                "[]]",
1194                vec![(T_LEFT_BRACKET, 0, 1), (T_RIGHT_BRACKET, 1, 1)],
1195                Some("unexpected right bracket ']'"),
1196            ),
1197            (
1198                "[[]]",
1199                vec![(T_LEFT_BRACKET, 0, 1)],
1200                Some("unexpected left brace '[' inside brackets"),
1201            ),
1202            ("]", vec![], Some("unexpected right bracket ']'")),
1203        ];
1204        assert_matches(cases);
1205    }
1206
1207    #[test]
1208    fn test_subqueries() {
1209        let cases = vec![
1210            (
1211                r#"test_name{on!~"bar"}[4m:4s]"#,
1212                vec![
1213                    (T_IDENTIFIER, 0, 9),
1214                    (T_LEFT_BRACE, 9, 1),
1215                    (T_IDENTIFIER, 10, 2),
1216                    (T_NEQ_REGEX, 12, 2),
1217                    (T_STRING, 14, 5),
1218                    (T_RIGHT_BRACE, 19, 1),
1219                    (T_LEFT_BRACKET, 20, 1),
1220                    (T_DURATION, 21, 2),
1221                    (T_COLON, 23, 1),
1222                    (T_DURATION, 24, 2),
1223                    (T_RIGHT_BRACKET, 26, 1),
1224                ],
1225                None,
1226            ),
1227            (
1228                r#"test:name{on!~"bar"}[4m:4s]"#,
1229                vec![
1230                    (T_METRIC_IDENTIFIER, 0, 9),
1231                    (T_LEFT_BRACE, 9, 1),
1232                    (T_IDENTIFIER, 10, 2),
1233                    (T_NEQ_REGEX, 12, 2),
1234                    (T_STRING, 14, 5),
1235                    (T_RIGHT_BRACE, 19, 1),
1236                    (T_LEFT_BRACKET, 20, 1),
1237                    (T_DURATION, 21, 2),
1238                    (T_COLON, 23, 1),
1239                    (T_DURATION, 24, 2),
1240                    (T_RIGHT_BRACKET, 26, 1),
1241                ],
1242                None,
1243            ),
1244            (
1245                r#"test:name{on!~"b:ar"}[4m:4s]"#,
1246                vec![
1247                    (T_METRIC_IDENTIFIER, 0, 9),
1248                    (T_LEFT_BRACE, 9, 1),
1249                    (T_IDENTIFIER, 10, 2),
1250                    (T_NEQ_REGEX, 12, 2),
1251                    (T_STRING, 14, 6),
1252                    (T_RIGHT_BRACE, 20, 1),
1253                    (T_LEFT_BRACKET, 21, 1),
1254                    (T_DURATION, 22, 2),
1255                    (T_COLON, 24, 1),
1256                    (T_DURATION, 25, 2),
1257                    (T_RIGHT_BRACKET, 27, 1),
1258                ],
1259                None,
1260            ),
1261            (
1262                r#"test:name{on!~"b:ar"}[4m:]"#,
1263                vec![
1264                    (T_METRIC_IDENTIFIER, 0, 9),
1265                    (T_LEFT_BRACE, 9, 1),
1266                    (T_IDENTIFIER, 10, 2),
1267                    (T_NEQ_REGEX, 12, 2),
1268                    (T_STRING, 14, 6),
1269                    (T_RIGHT_BRACE, 20, 1),
1270                    (T_LEFT_BRACKET, 21, 1),
1271                    (T_DURATION, 22, 2),
1272                    (T_COLON, 24, 1),
1273                    (T_RIGHT_BRACKET, 25, 1),
1274                ],
1275                None,
1276            ),
1277            (
1278                r#"min_over_time(rate(foo{bar="baz"}[2s])[5m:])[4m:3s]"#,
1279                vec![
1280                    (T_IDENTIFIER, 0, 13),
1281                    (T_LEFT_PAREN, 13, 1),
1282                    (T_IDENTIFIER, 14, 4),
1283                    (T_LEFT_PAREN, 18, 1),
1284                    (T_IDENTIFIER, 19, 3),
1285                    (T_LEFT_BRACE, 22, 1),
1286                    (T_IDENTIFIER, 23, 3),
1287                    (T_EQL, 26, 1),
1288                    (T_STRING, 27, 5),
1289                    (T_RIGHT_BRACE, 32, 1),
1290                    (T_LEFT_BRACKET, 33, 1),
1291                    (T_DURATION, 34, 2),
1292                    (T_RIGHT_BRACKET, 36, 1),
1293                    (T_RIGHT_PAREN, 37, 1),
1294                    (T_LEFT_BRACKET, 38, 1),
1295                    (T_DURATION, 39, 2),
1296                    (T_COLON, 41, 1),
1297                    (T_RIGHT_BRACKET, 42, 1),
1298                    (T_RIGHT_PAREN, 43, 1),
1299                    (T_LEFT_BRACKET, 44, 1),
1300                    (T_DURATION, 45, 2),
1301                    (T_COLON, 47, 1),
1302                    (T_DURATION, 48, 2),
1303                    (T_RIGHT_BRACKET, 50, 1),
1304                ],
1305                None,
1306            ),
1307            (
1308                r#"test:name{on!~"b:ar"}[4m:4s] offset 10m"#,
1309                vec![
1310                    (T_METRIC_IDENTIFIER, 0, 9),
1311                    (T_LEFT_BRACE, 9, 1),
1312                    (T_IDENTIFIER, 10, 2),
1313                    (T_NEQ_REGEX, 12, 2),
1314                    (T_STRING, 14, 6),
1315                    (T_RIGHT_BRACE, 20, 1),
1316                    (T_LEFT_BRACKET, 21, 1),
1317                    (T_DURATION, 22, 2),
1318                    (T_COLON, 24, 1),
1319                    (T_DURATION, 25, 2),
1320                    (T_RIGHT_BRACKET, 27, 1),
1321                    (T_OFFSET, 29, 6),
1322                    (T_DURATION, 36, 3),
1323                ],
1324                None,
1325            ),
1326            (
1327                r#"min_over_time(rate(foo{bar="baz"}[2s])[5m:] offset 6m)[4m:3s]"#,
1328                vec![
1329                    (T_IDENTIFIER, 0, 13),
1330                    (T_LEFT_PAREN, 13, 1),
1331                    (T_IDENTIFIER, 14, 4),
1332                    (T_LEFT_PAREN, 18, 1),
1333                    (T_IDENTIFIER, 19, 3),
1334                    (T_LEFT_BRACE, 22, 1),
1335                    (T_IDENTIFIER, 23, 3),
1336                    (T_EQL, 26, 1),
1337                    (T_STRING, 27, 5),
1338                    (T_RIGHT_BRACE, 32, 1),
1339                    (T_LEFT_BRACKET, 33, 1),
1340                    (T_DURATION, 34, 2),
1341                    (T_RIGHT_BRACKET, 36, 1),
1342                    (T_RIGHT_PAREN, 37, 1),
1343                    (T_LEFT_BRACKET, 38, 1),
1344                    (T_DURATION, 39, 2),
1345                    (T_COLON, 41, 1),
1346                    (T_RIGHT_BRACKET, 42, 1),
1347                    (T_OFFSET, 44, 6),
1348                    (T_DURATION, 51, 2),
1349                    (T_RIGHT_PAREN, 53, 1),
1350                    (T_LEFT_BRACKET, 54, 1),
1351                    (T_DURATION, 55, 2),
1352                    (T_COLON, 57, 1),
1353                    (T_DURATION, 58, 2),
1354                    (T_RIGHT_BRACKET, 60, 1),
1355                ],
1356                None,
1357            ),
1358            (
1359                r#"test:name[ 5m]"#,
1360                vec![
1361                    (T_METRIC_IDENTIFIER, 0, 9),
1362                    (T_LEFT_BRACKET, 9, 1),
1363                    (T_DURATION, 11, 2),
1364                    (T_RIGHT_BRACKET, 13, 1),
1365                ],
1366                None,
1367            ),
1368            (
1369                r#"test:name{o:n!~"bar"}[4m:4s]"#,
1370                vec![
1371                    (T_METRIC_IDENTIFIER, 0, 9),
1372                    (T_LEFT_BRACE, 9, 1),
1373                    (T_IDENTIFIER, 10, 1),
1374                ],
1375                Some("unexpected character inside braces: ':'"),
1376            ),
1377            (
1378                r#"test:name{on!~"bar"}[4m:4s:4h]"#,
1379                vec![
1380                    (T_METRIC_IDENTIFIER, 0, 9),
1381                    (T_LEFT_BRACE, 9, 1),
1382                    (T_IDENTIFIER, 10, 2),
1383                    (T_NEQ_REGEX, 12, 2),
1384                    (T_STRING, 14, 5),
1385                    (T_RIGHT_BRACE, 19, 1),
1386                    (T_LEFT_BRACKET, 20, 1),
1387                    (T_DURATION, 21, 2),
1388                    (T_COLON, 23, 1),
1389                    (T_DURATION, 24, 2),
1390                ],
1391                Some("unexpected second colon(:) in brackets"),
1392            ),
1393            (
1394                r#"test:name{on!~"bar"}[4m:4s:]"#,
1395                vec![
1396                    (T_METRIC_IDENTIFIER, 0, 9),
1397                    (T_LEFT_BRACE, 9, 1),
1398                    (T_IDENTIFIER, 10, 2),
1399                    (T_NEQ_REGEX, 12, 2),
1400                    (T_STRING, 14, 5),
1401                    (T_RIGHT_BRACE, 19, 1),
1402                    (T_LEFT_BRACKET, 20, 1),
1403                    (T_DURATION, 21, 2),
1404                    (T_COLON, 23, 1),
1405                    (T_DURATION, 24, 2),
1406                ],
1407                Some("unexpected second colon(:) in brackets"),
1408            ),
1409            (
1410                r#"test:name{on!~"bar"}[4m::]"#,
1411                vec![
1412                    (T_METRIC_IDENTIFIER, 0, 9),
1413                    (T_LEFT_BRACE, 9, 1),
1414                    (T_IDENTIFIER, 10, 2),
1415                    (T_NEQ_REGEX, 12, 2),
1416                    (T_STRING, 14, 5),
1417                    (T_RIGHT_BRACE, 19, 1),
1418                    (T_LEFT_BRACKET, 20, 1),
1419                    (T_DURATION, 21, 2),
1420                    (T_COLON, 23, 1),
1421                ],
1422                Some("unexpected second colon(:) in brackets"),
1423            ),
1424            (
1425                r#"test:name{on!~"bar"}[:4s]"#,
1426                vec![
1427                    (T_METRIC_IDENTIFIER, 0, 9),
1428                    (T_LEFT_BRACE, 9, 1),
1429                    (T_IDENTIFIER, 10, 2),
1430                    (T_NEQ_REGEX, 12, 2),
1431                    (T_STRING, 14, 5),
1432                    (T_RIGHT_BRACE, 19, 1),
1433                    (T_LEFT_BRACKET, 20, 1),
1434                ],
1435                Some("expect duration before first colon(:) in brackets"),
1436            ),
1437        ];
1438        assert_matches(cases);
1439    }
1440
1441    #[test]
1442    fn test_is_alpha() {
1443        assert!(is_alpha('_'));
1444        assert!(is_alpha('a'));
1445        assert!(is_alpha('z'));
1446        assert!(is_alpha('A'));
1447        assert!(is_alpha('Z'));
1448        assert!(!is_alpha('-'));
1449        assert!(!is_alpha('@'));
1450        assert!(!is_alpha('0'));
1451        assert!(!is_alpha('9'));
1452    }
1453
1454    #[test]
1455    fn test_is_alpha_numeric() {
1456        assert!(is_alpha_numeric('_'));
1457        assert!(is_alpha_numeric('a'));
1458        assert!(is_alpha_numeric('z'));
1459        assert!(is_alpha_numeric('A'));
1460        assert!(is_alpha_numeric('Z'));
1461        assert!(is_alpha_numeric('0'));
1462        assert!(is_alpha_numeric('9'));
1463        assert!(!is_alpha_numeric('-'));
1464        assert!(!is_alpha_numeric('@'));
1465    }
1466
1467    #[test]
1468    fn test_is_label() {
1469        assert!(is_label("_"));
1470        assert!(is_label("_up"));
1471        assert!(is_label("up"));
1472        assert!(is_label("up_"));
1473        assert!(is_label("up_system_1"));
1474
1475        assert!(!is_label(""));
1476        assert!(!is_label("0"));
1477        assert!(!is_label("0up"));
1478        assert!(!is_label("0_up"));
1479    }
1480}
promql_parser/parser/lex.rs

promql_parser/parser/
lex.rs