simple_test/parser/
lex.rs

1// Copyright 2023 Greptime Team
2//
3// Licensed under the Apache License, Version 2.0 (the "License");
4// you may not use this file except in compliance with the License.
5// You may obtain a copy of the License at
6//
7//     http://www.apache.org/licenses/LICENSE-2.0
8//
9// Unless required by applicable law or agreed to in writing, software
10// distributed under the License is distributed on an "AS IS" BASIS,
11// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12// See the License for the specific language governing permissions and
13// limitations under the License.
14
15use crate::parser::token::*;
16use lrlex::{DefaultLexeme, LRNonStreamingLexer};
17use lrpar::Lexeme;
18use std::fmt::Debug;
19
20const ESCAPE_SYMBOLS: &str = r"abfnrtv\01234567xuU";
21const STRING_SYMBOLS: &str = r#"'"`"#;
22
23pub(crate) type LexemeType = DefaultLexeme<TokenId>;
24
25pub fn lexer(s: &str) -> Result<LRNonStreamingLexer<LexemeType, TokenId>, String> {
26    let lexemes: Vec<Result<LexemeType, String>> = Lexer::new(s).collect();
27    match lexemes.last() {
28        Some(Err(info)) => Err(info.into()),
29        Some(Ok(_)) => {
30            // TODO: use better error mechanism, instead of filtering the err.
31            let lexemes = lexemes.into_iter().filter_map(|l| l.ok()).map(Ok).collect();
32            Ok(LRNonStreamingLexer::new(s, lexemes, Vec::new()))
33        }
34        None => Err(format!("no expression found in input: '{s}'")),
35    }
36}
37
38#[derive(Debug)]
39enum State {
40    Start,
41    End,
42    Lexeme(TokenId),
43    Identifier,
44    KeywordOrIdentifier,
45    NumberOrDuration,
46    InsideBrackets,
47    InsideBraces,
48    LineComment,
49    Space,
50    String(char), // char is the symbol, ' or " or `
51    Escape(char), // Escape happens inside String. char is the symbol, ' or " or `
52    Err(String),
53}
54
55#[derive(Debug)]
56struct Context {
57    // TODO: use &str instead of Vec<char> for better performance.
58    chars: Vec<char>,
59    idx: usize,   // Current position in the Vec, increment by 1.
60    start: usize, // Start position of one Token, increment by char.len_utf8.
61    pos: usize,   // Current position in the input, increment by char.len_utf8.
62
63    paren_depth: usize, // Nesting depth of ( ) exprs, 0 means no parens.
64    brace_open: bool,   // Whether a { is opened.
65    bracket_open: bool, // Whether a [ is opened.
66    got_colon: bool,    // Whether we got a ':' after [ was opened.
67    eof: bool,          // Whether we got end of file
68}
69
70impl Context {
71    fn new(input: &str) -> Context {
72        Self {
73            chars: input.chars().collect(),
74            idx: 0,
75            start: 0,
76            pos: 0,
77
78            paren_depth: 0,
79            brace_open: false,
80            bracket_open: false,
81            got_colon: false,
82            eof: false,
83        }
84    }
85
86    /// pop the first char.
87    fn pop(&mut self) -> Option<char> {
88        let ch = self.peek()?;
89        self.pos += ch.len_utf8();
90        self.idx += 1;
91        Some(ch)
92    }
93
94    /// backup steps back one char. If cursor is at the beginning, it does nothing.
95    /// caller should pay attention if the backup is successful or not.
96    fn backup(&mut self) -> bool {
97        if let Some(ch) = self.chars.get(self.idx - 1) {
98            self.pos -= ch.len_utf8();
99            self.idx -= 1;
100            return true;
101        };
102        false
103    }
104
105    /// get the char at the pos to check, this won't consume it.
106    fn peek(&self) -> Option<char> {
107        self.chars.get(self.idx).copied()
108    }
109
110    /// string lexeme SHOULD trim the surrounding string symbols, ' or " or `
111    fn lexeme(&mut self, token_id: TokenId) -> LexemeType {
112        let mut start = self.start;
113        let mut len = self.pos - self.start;
114        if token_id == T_STRING {
115            start += 1;
116            len -= 2;
117        }
118        DefaultLexeme::new(token_id, start, len)
119    }
120
121    /// ignore the text between start and pos
122    fn ignore(&mut self) {
123        self.start = self.pos;
124    }
125
126    // TODO: refactor needed, details in Issues/15.
127    fn lexeme_string(&self) -> String {
128        let mut s = String::from("");
129        if self.idx == 0 {
130            return s;
131        }
132
133        let mut pos = self.pos;
134        let mut idx = self.idx;
135        while pos > self.start {
136            if let Some(&ch) = self.chars.get(idx - 1) {
137                pos -= ch.len_utf8();
138                idx -= 1;
139                s.push(ch);
140            };
141        }
142        s.chars().rev().collect()
143    }
144}
145
146#[derive(Debug)]
147struct Lexer {
148    state: State,
149    ctx: Context,
150}
151
152/// block for context operations.
153impl Lexer {
154    fn new(input: &str) -> Self {
155        let ctx = Context::new(input);
156        let state = State::Start;
157        Self { state, ctx }
158    }
159
160    fn is_inside_braces(&self) -> bool {
161        self.ctx.brace_open
162    }
163
164    fn jump_outof_braces(&mut self) {
165        self.ctx.brace_open = false;
166    }
167
168    fn dive_into_braces(&mut self) {
169        self.ctx.brace_open = true;
170    }
171
172    fn is_inside_brackets(&self) -> bool {
173        self.ctx.bracket_open
174    }
175
176    fn jump_outof_brackets(&mut self) {
177        self.ctx.bracket_open = false;
178    }
179
180    fn dive_into_brackets(&mut self) {
181        self.ctx.bracket_open = true;
182    }
183
184    fn is_colon_scanned(&self) -> bool {
185        self.ctx.got_colon
186    }
187
188    fn set_colon_scanned(&mut self) {
189        self.ctx.got_colon = true;
190    }
191
192    fn reset_colon_scanned(&mut self) {
193        self.ctx.got_colon = false;
194    }
195
196    /// true only if paren depth less than MAX
197    fn inc_paren_depth(&mut self) -> bool {
198        if self.ctx.paren_depth < usize::MAX {
199            self.ctx.paren_depth += 1;
200            return true;
201        }
202        false
203    }
204
205    /// true only if paren depth larger than 1
206    fn dec_paren_depth(&mut self) -> bool {
207        if self.ctx.paren_depth >= 1 {
208            self.ctx.paren_depth -= 1;
209            return true;
210        }
211        false
212    }
213
214    fn is_paren_balanced(&self) -> bool {
215        self.ctx.paren_depth == 0
216    }
217
218    fn pop(&mut self) -> Option<char> {
219        self.ctx.pop()
220    }
221
222    fn backup(&mut self) -> bool {
223        self.ctx.backup()
224    }
225
226    fn peek(&self) -> Option<char> {
227        self.ctx.peek()
228    }
229
230    /// lexeme() consumes the Span, which means consecutive lexeme() call
231    /// will get wrong Span unless Lexer shifts its State.
232    fn lexeme(&mut self, token_id: TokenId) -> LexemeType {
233        let lexeme = self.ctx.lexeme(token_id);
234        self.ctx.ignore();
235        lexeme
236    }
237
238    fn lexeme_string(&self) -> String {
239        self.ctx.lexeme_string()
240    }
241
242    fn ignore(&mut self) {
243        self.ctx.ignore();
244    }
245
246    fn is_eof(&self) -> bool {
247        self.ctx.eof
248    }
249
250    fn set_eof(&mut self) {
251        self.ctx.eof = true;
252    }
253}
254
255/// block for state operations.
256impl Lexer {
257    fn shift(&mut self) {
258        // NOTE: the design of the match arms's order is of no importance.
259        // If different orders result in different states, then it has to be fixed.
260        self.state = match self.state {
261            State::Start => self.start(),
262            State::End => State::Err("End state can not shift forward.".into()),
263            State::Lexeme(_) => State::Start,
264            State::String(ch) => self.accept_string(ch),
265            State::KeywordOrIdentifier => self.accept_keyword_or_identifier(),
266            State::Identifier => self.accept_identifier(),
267            State::NumberOrDuration => self.accept_number_or_duration(),
268            State::InsideBrackets => self.inside_brackets(),
269            State::InsideBraces => self.inside_braces(),
270            State::LineComment => self.ignore_comment_line(),
271            State::Escape(ch) => self.accept_escape(ch),
272            State::Space => self.ignore_space(),
273            State::Err(_) => State::End,
274        };
275    }
276
277    fn start(&mut self) -> State {
278        if self.is_inside_braces() {
279            return State::InsideBraces;
280        }
281
282        if self.is_inside_brackets() {
283            return State::InsideBrackets;
284        }
285
286        let c = match self.pop() {
287            None => {
288                if !self.is_paren_balanced() {
289                    return State::Err("unclosed left parenthesis".into());
290                }
291
292                if !self.is_eof() {
293                    self.set_eof();
294                    return State::Lexeme(T_EOF);
295                }
296
297                return State::End;
298            }
299            Some(ch) => ch,
300        };
301
302        // NOTE: the design of the match arms's order is of no importance.
303        // If different orders result in different states, then it has to be fixed.
304        match c {
305            '#' => State::LineComment,
306            '@' => State::Lexeme(T_AT),
307            ',' => State::Lexeme(T_COMMA),
308            '*' => State::Lexeme(T_MUL),
309            '/' => State::Lexeme(T_DIV),
310            '%' => State::Lexeme(T_MOD),
311            '+' => State::Lexeme(T_ADD),
312            '-' => State::Lexeme(T_SUB),
313            '^' => State::Lexeme(T_POW),
314            '=' => match self.peek() {
315                Some('=') => {
316                    self.pop();
317                    State::Lexeme(T_EQLC)
318                }
319                // =~ (label matcher) MUST be in brace
320                Some('~') => State::Err("unexpected character after '=': '~'".into()),
321                _ => State::Lexeme(T_EQL),
322            },
323            '!' => match self.pop() {
324                Some('=') => State::Lexeme(T_NEQ),
325                Some(ch) => State::Err(format!("unexpected character after '!': '{ch}'")),
326                None => State::Err("'!' can not be at the end".into()),
327            },
328            '<' => match self.peek() {
329                Some('=') => {
330                    self.pop();
331                    State::Lexeme(T_LTE)
332                }
333                _ => State::Lexeme(T_LSS),
334            },
335            '>' => match self.peek() {
336                Some('=') => {
337                    self.pop();
338                    State::Lexeme(T_GTE)
339                }
340                _ => State::Lexeme(T_GTR),
341            },
342            ch if ch.is_ascii_whitespace() => self.ignore_space(),
343            ch if ch.is_ascii_digit() => State::NumberOrDuration,
344            '.' => match self.peek() {
345                Some(ch) if ch.is_ascii_digit() => State::NumberOrDuration,
346                Some(ch) => State::Err(format!("unexpected character after '.': '{ch}'")),
347                None => State::Err("unexpected character: '.'".into()),
348            },
349            ch if is_alpha(ch) || ch == ':' => State::KeywordOrIdentifier,
350            ch if STRING_SYMBOLS.contains(ch) => State::String(ch),
351            '(' => {
352                if self.inc_paren_depth() {
353                    return State::Lexeme(T_LEFT_PAREN);
354                }
355                State::Err("too many left parentheses".into())
356            }
357            ')' => {
358                if self.is_paren_balanced() {
359                    return State::Err("unexpected right parenthesis ')'".into());
360                }
361                if self.dec_paren_depth() {
362                    return State::Lexeme(T_RIGHT_PAREN);
363                }
364                State::Err("unexpected right parenthesis ')'".into())
365            }
366            '{' => {
367                self.dive_into_braces();
368                State::Lexeme(T_LEFT_BRACE)
369            }
370            // the matched } has been consumed inside braces
371            '}' => State::Err("unexpected right brace '}'".into()),
372            '[' => {
373                self.reset_colon_scanned();
374                self.dive_into_brackets();
375                State::Lexeme(T_LEFT_BRACKET)
376            }
377            // the matched ] has been consumed inside brackets
378            ']' => State::Err("unexpected right bracket ']'".into()),
379            ch => State::Err(format!("unexpected character: {ch:?}")),
380        }
381    }
382
383    /// the first number has been consumed, so first backup.
384    fn accept_duration(&mut self) -> State {
385        self.backup();
386        self.scan_number();
387        if !self.accept_remaining_duration() {
388            self.pop(); // this is to include the bad syntax
389            return State::Err(format!("bad duration syntax: {}", self.lexeme_string()));
390        }
391        State::Lexeme(T_DURATION)
392    }
393
394    /// the first number has been consumed, so first backup.
395    fn accept_number_or_duration(&mut self) -> State {
396        self.backup();
397        if self.scan_number() {
398            return State::Lexeme(T_NUMBER);
399        }
400
401        // Next two chars must be a valid unit and a non-alphanumeric.
402        if self.accept_remaining_duration() {
403            return State::Lexeme(T_DURATION);
404        }
405
406        // the next char is invalid, so it should be captured in the err info.
407        self.pop();
408        State::Err(format!(
409            "bad number or duration syntax: {}",
410            self.lexeme_string()
411        ))
412    }
413
414    /// the first alphabetic character has been consumed, and no need to backup.
415    fn accept_keyword_or_identifier(&mut self) -> State {
416        while let Some(ch) = self.peek() {
417            if is_alpha_numeric(ch) || ch == ':' {
418                self.pop();
419            } else {
420                break;
421            }
422        }
423
424        let s = self.lexeme_string();
425        match get_keyword_token(&s.to_lowercase()) {
426            Some(token_id) => State::Lexeme(token_id),
427            None if s.contains(':') => State::Lexeme(T_METRIC_IDENTIFIER),
428            _ => State::Lexeme(T_IDENTIFIER),
429        }
430    }
431
432    /// # has already been consumed.
433    fn ignore_comment_line(&mut self) -> State {
434        while let Some(ch) = self.pop() {
435            if ch == '\r' || ch == '\n' {
436                break;
437            }
438        }
439        self.ignore();
440        State::Start
441    }
442
443    /// accept consumes the next char if f(ch) returns true.
444    fn accept<F>(&mut self, f: F) -> bool
445    where
446        F: Fn(char) -> bool,
447    {
448        if let Some(ch) = self.peek() {
449            if f(ch) {
450                self.pop();
451                return true;
452            }
453        }
454        false
455    }
456
457    /// accept_run consumes a run of char from the valid set.
458    fn accept_run<F>(&mut self, f: F)
459    where
460        F: Fn(char) -> bool,
461    {
462        while let Some(ch) = self.peek() {
463            if f(ch) {
464                self.pop();
465            } else {
466                break;
467            }
468        }
469    }
470
471    /// consumes a run of space, and ignore them.
472    fn ignore_space(&mut self) -> State {
473        self.backup(); // backup to include the already spanned space
474        self.accept_run(|ch| ch.is_ascii_whitespace());
475        self.ignore();
476        State::Start
477    }
478
479    /// scan_number scans numbers of different formats. The scanned Item is
480    /// not necessarily a valid number. This case is caught by the parser.
481    fn scan_number(&mut self) -> bool {
482        let mut hex_digit = false;
483        if self.accept(|ch| ch == '0') && self.accept(|ch| ch == 'x' || ch == 'X') {
484            hex_digit = true;
485        }
486        let is_valid_digit = |ch: char| -> bool {
487            if hex_digit {
488                ch.is_ascii_hexdigit()
489            } else {
490                ch.is_ascii_digit()
491            }
492        };
493
494        self.accept_run(is_valid_digit);
495        if self.accept(|ch| ch == '.') {
496            self.accept_run(is_valid_digit);
497        }
498        if self.accept(|ch| ch == 'e' || ch == 'E') {
499            self.accept(|ch| ch == '+' || ch == '-');
500            self.accept_run(|ch| ch.is_ascii_digit());
501        }
502
503        // Next thing must not be alpha or '.'
504        // if alpha: it maybe a duration
505        // if '.': invalid number
506        !matches!(self.peek(), Some(ch) if is_alpha(ch) || ch == '.')
507    }
508
509    /// number part has already been scanned.
510    /// true only if the char after duration is not alphanumeric.
511    fn accept_remaining_duration(&mut self) -> bool {
512        // Next two char must be a valid duration.
513        if !self.accept(|ch| "smhdwy".contains(ch)) {
514            return false;
515        }
516        // Support for ms. Bad units like hs, ys will be caught when we actually
517        // parse the duration.
518        self.accept(|ch| ch == 's');
519
520        // Next char can be another number then a unit.
521        while self.accept(|ch| ch.is_ascii_digit()) {
522            self.accept_run(|ch| ch.is_ascii_digit());
523            // y is no longer in the list as it should always come first in durations.
524            if !self.accept(|ch| "smhdw".contains(ch)) {
525                return false;
526            }
527            // Support for ms. Bad units like hs, ys will be caught when we actually
528            // parse the duration.
529            self.accept(|ch| ch == 's');
530        }
531
532        !matches!(self.peek(), Some(ch) if is_alpha_numeric(ch))
533    }
534
535    /// scans a string escape sequence. The initial escaping character (\)
536    /// has already been consumed.
537    // TODO: checking the validity of code point is NOT supported yet.
538    fn accept_escape(&mut self, symbol: char) -> State {
539        match self.pop() {
540            Some(ch) if ch == symbol || ESCAPE_SYMBOLS.contains(ch) => State::String(symbol),
541            Some(ch) => State::Err(format!("unknown escape sequence '{ch}'")),
542            None => State::Err("escape sequence not terminated".into()),
543        }
544    }
545
546    /// scans a quoted string. The initial quote has already been consumed.
547    fn accept_string(&mut self, symbol: char) -> State {
548        while let Some(ch) = self.pop() {
549            if ch == '\\' {
550                return State::Escape(symbol);
551            }
552
553            if ch == symbol {
554                return State::Lexeme(T_STRING);
555            }
556        }
557
558        State::Err(format!("unterminated quoted string {symbol}"))
559    }
560
561    /// scans the inside of a vector selector. Keywords are ignored and
562    /// scanned as identifiers.
563    fn inside_braces(&mut self) -> State {
564        match self.pop() {
565            Some('#') => State::LineComment,
566            Some(',') => State::Lexeme(T_COMMA),
567            Some(ch) if ch.is_ascii_whitespace() => State::Space,
568            Some(ch) if is_alpha(ch) => State::Identifier,
569            Some(ch) if STRING_SYMBOLS.contains(ch) => State::String(ch),
570            Some('=') => match self.peek() {
571                Some('~') => {
572                    self.pop();
573                    State::Lexeme(T_EQL_REGEX)
574                }
575                _ => State::Lexeme(T_EQL),
576            },
577            Some('!') => match self.pop() {
578                Some('~') => State::Lexeme(T_NEQ_REGEX),
579                Some('=') => State::Lexeme(T_NEQ),
580                Some(ch) => State::Err(format!(
581                    "unexpected character after '!' inside braces: '{ch}'"
582                )),
583                None => State::Err("'!' can not be at the end".into()),
584            },
585            Some('{') => State::Err("unexpected left brace '{' inside braces".into()),
586            Some('}') => {
587                self.jump_outof_braces();
588                State::Lexeme(T_RIGHT_BRACE)
589            }
590            Some(ch) => State::Err(format!("unexpected character inside braces: '{ch}'")),
591            None => State::Err("unexpected end of input inside braces".into()),
592        }
593    }
594
595    // this won't affect the cursor.
596    fn last_char_matches<F>(&mut self, f: F) -> bool
597    where
598        F: Fn(char) -> bool,
599    {
600        // if cursor is at the beginning, then do nothing.
601        if !self.backup() {
602            return false;
603        }
604        let matched = matches!(self.peek(), Some(ch) if f(ch));
605        self.pop();
606        matched
607    }
608
609    // this won't affect the cursor.
610    fn is_colon_the_first_char_in_brackets(&mut self) -> bool {
611        // note: colon has already been consumed, so first backup
612        self.backup();
613        let matched = self.last_char_matches(|ch| ch == '[');
614        self.pop();
615        matched
616    }
617
618    // left brackets has already be consumed.
619    fn inside_brackets(&mut self) -> State {
620        match self.pop() {
621            Some(ch) if ch.is_ascii_whitespace() => State::Space,
622            Some(':') => {
623                if self.is_colon_scanned() {
624                    return State::Err("unexpected second colon(:) in brackets".into());
625                }
626
627                if self.is_colon_the_first_char_in_brackets() {
628                    return State::Err("expect duration before first colon(:) in brackets".into());
629                }
630
631                self.set_colon_scanned();
632                State::Lexeme(T_COLON)
633            }
634            Some(ch) if ch.is_ascii_digit() => self.accept_duration(),
635            Some(']') => {
636                self.jump_outof_brackets();
637                self.reset_colon_scanned();
638                State::Lexeme(T_RIGHT_BRACKET)
639            }
640            Some('[') => State::Err("unexpected left brace '[' inside brackets".into()),
641            Some(ch) => State::Err(format!("unexpected character inside brackets: '{ch}'")),
642            None => State::Err("unexpected end of input inside brackets".into()),
643        }
644    }
645
646    // scans an alphanumeric identifier. The next character
647    // is known to be a letter.
648    fn accept_identifier(&mut self) -> State {
649        self.accept_run(is_alpha_numeric);
650        State::Lexeme(T_IDENTIFIER)
651    }
652}
653
654// TODO: reference iterator
655impl Iterator for Lexer {
656    type Item = Result<LexemeType, String>;
657
658    fn next(&mut self) -> Option<Self::Item> {
659        self.shift();
660        match &self.state {
661            State::Lexeme(token_id) => Some(Ok(self.lexeme(*token_id))),
662            State::Err(info) => Some(Err(info.clone())),
663            State::End => None,
664            _ => self.next(),
665        }
666    }
667}
668
669fn is_alpha_numeric(ch: char) -> bool {
670    is_alpha(ch) || ch.is_ascii_digit()
671}
672
673fn is_alpha(ch: char) -> bool {
674    ch == '_' || ch.is_ascii_alphabetic()
675}
676
677pub(crate) fn is_label(s: &str) -> bool {
678    if s.is_empty() {
679        return false;
680    }
681    let mut chars = s.chars();
682    match chars.next() {
683        None => false,
684        Some(ch) if !is_alpha(ch) => false,
685        Some(_) => {
686            for ch in chars {
687                if !is_alpha_numeric(ch) {
688                    return false;
689                }
690            }
691            true
692        }
693    }
694}
695
696#[cfg(test)]
697mod tests {
698    use super::*;
699
700    type LexemeTuple = (TokenId, usize, usize);
701    /// - MatchTuple.0 is input
702    /// - MatchTuple.1 is the expected generated Lexemes
703    /// - MatchTuple.2 is the Err info if the input is invalid PromQL query
704    type MatchTuple = (&'static str, Vec<LexemeTuple>, Option<&'static str>);
705
706    type Case = (
707        &'static str,
708        Vec<Result<LexemeType, String>>,
709        Vec<Result<LexemeType, String>>,
710    );
711
712    fn assert_matches(v: Vec<MatchTuple>) {
713        let cases: Vec<Case> = v
714            .into_iter()
715            .map(|(input, lexemes, err)| {
716                let mut expected: Vec<Result<LexemeType, String>> = lexemes
717                    .into_iter()
718                    .map(|(token_id, start, len)| Ok(LexemeType::new(token_id, start, len)))
719                    .collect();
720
721                if let Some(s) = err {
722                    expected.push(Err(s.to_string()));
723                }
724
725                let actual: Vec<Result<LexemeType, String>> = Lexer::new(input)
726                    // in lex test cases, we don't compare the EOF token
727                    .filter(|r| !matches!(r, Ok(l) if l.tok_id() == T_EOF))
728                    .collect();
729                (input, expected, actual)
730            })
731            .collect();
732
733        for (input, expected, actual) in cases.iter() {
734            assert_eq!(expected, actual, "\n<input>: {}", input);
735        }
736    }
737
738    #[test]
739    fn test_common() {
740        let cases = vec![
741            (",", vec![(T_COMMA, 0, 1)], None),
742            (
743                "()",
744                vec![(T_LEFT_PAREN, 0, 1), (T_RIGHT_PAREN, 1, 1)],
745                None,
746            ),
747            (
748                "{}",
749                vec![(T_LEFT_BRACE, 0, 1), (T_RIGHT_BRACE, 1, 1)],
750                None,
751            ),
752            (
753                "[5m]",
754                vec![
755                    (T_LEFT_BRACKET, 0, 1),
756                    (T_DURATION, 1, 2),
757                    (T_RIGHT_BRACKET, 3, 1),
758                ],
759                None,
760            ),
761            (
762                "[ 5m]",
763                vec![
764                    (T_LEFT_BRACKET, 0, 1),
765                    (T_DURATION, 2, 2),
766                    (T_RIGHT_BRACKET, 4, 1),
767                ],
768                None,
769            ),
770            (
771                "[  5m]",
772                vec![
773                    (T_LEFT_BRACKET, 0, 1),
774                    (T_DURATION, 3, 2),
775                    (T_RIGHT_BRACKET, 5, 1),
776                ],
777                None,
778            ),
779            (
780                "[  5m ]",
781                vec![
782                    (T_LEFT_BRACKET, 0, 1),
783                    (T_DURATION, 3, 2),
784                    (T_RIGHT_BRACKET, 6, 1),
785                ],
786                None,
787            ),
788            ("\r\n\r", vec![], None),
789        ];
790
791        assert_matches(cases);
792    }
793
794    #[test]
795    fn test_numbers() {
796        let cases = vec![
797            ("1", vec![(T_NUMBER, 0, 1)], None),
798            ("4.23", vec![(T_NUMBER, 0, 4)], None),
799            (".3", vec![(T_NUMBER, 0, 2)], None),
800            ("5.", vec![(T_NUMBER, 0, 2)], None),
801            ("NaN", vec![(T_NUMBER, 0, 3)], None),
802            ("nAN", vec![(T_NUMBER, 0, 3)], None),
803            ("NaN 123", vec![(T_NUMBER, 0, 3), (T_NUMBER, 4, 3)], None),
804            ("NaN123", vec![(T_IDENTIFIER, 0, 6)], None),
805            ("iNf", vec![(T_NUMBER, 0, 3)], None),
806            ("Inf", vec![(T_NUMBER, 0, 3)], None),
807            ("+Inf", vec![(T_ADD, 0, 1), (T_NUMBER, 1, 3)], None),
808            (
809                "+Inf 123",
810                vec![(T_ADD, 0, 1), (T_NUMBER, 1, 3), (T_NUMBER, 5, 3)],
811                None,
812            ),
813            (
814                "-Inf 123",
815                vec![(T_SUB, 0, 1), (T_NUMBER, 1, 3), (T_NUMBER, 5, 3)],
816                None,
817            ),
818            ("Infoo", vec![(T_IDENTIFIER, 0, 5)], None),
819            ("-Inf123", vec![(T_SUB, 0, 1), (T_IDENTIFIER, 1, 6)], None),
820            (
821                "-Inf 123",
822                vec![(T_SUB, 0, 1), (T_NUMBER, 1, 3), (T_NUMBER, 5, 3)],
823                None,
824            ),
825            ("0x123", vec![(T_NUMBER, 0, 5)], None),
826        ];
827        assert_matches(cases);
828    }
829
830    #[test]
831    fn test_strings() {
832        let cases = vec![
833            ("\"test\\tsequence\"", vec![(T_STRING, 1, 14)], None),
834            ("\"test\\\\.expression\"", vec![(T_STRING, 1, 17)], None),
835            (
836                "\"test\\.expression\"",
837                vec![],
838                Some("unknown escape sequence '.'"),
839            ),
840            (
841                "`test\\.expression`",
842                vec![],
843                Some("unknown escape sequence '.'"),
844            ),
845            (".٩", vec![], Some("unexpected character after '.': '٩'")),
846            // TODO: accept_escape SHOULD support invalid escape character
847            // "\xff"
848            // `\xff`
849        ];
850        assert_matches(cases);
851    }
852
853    #[test]
854    fn test_durations() {
855        let cases = vec![
856            ("5s", vec![(T_DURATION, 0, 2)], None),
857            ("123m", vec![(T_DURATION, 0, 4)], None),
858            ("1h", vec![(T_DURATION, 0, 2)], None),
859            ("3w", vec![(T_DURATION, 0, 2)], None),
860            ("1y", vec![(T_DURATION, 0, 2)], None),
861        ];
862        assert_matches(cases);
863    }
864
865    #[test]
866    fn test_identifiers() {
867        let cases = vec![
868            ("abc", vec![(T_IDENTIFIER, 0, 3)], None),
869            ("a:bc", vec![(T_METRIC_IDENTIFIER, 0, 4)], None),
870            (
871                "abc d",
872                vec![(T_IDENTIFIER, 0, 3), (T_IDENTIFIER, 4, 1)],
873                None,
874            ),
875            (":bc", vec![(T_METRIC_IDENTIFIER, 0, 3)], None),
876            ("0a:bc", vec![], Some("bad number or duration syntax: 0a")),
877        ];
878        assert_matches(cases);
879    }
880
881    #[test]
882    fn test_comments() {
883        let cases = vec![
884            ("# some comment", vec![], None),
885            ("5 # 1+1\n5", vec![(T_NUMBER, 0, 1), (T_NUMBER, 8, 1)], None),
886        ];
887        assert_matches(cases);
888    }
889
890    #[test]
891    fn test_operators() {
892        let cases = vec![
893            ("=", vec![(T_EQL, 0, 1)], None),
894            (
895                "{=}",
896                vec![(T_LEFT_BRACE, 0, 1), (T_EQL, 1, 1), (T_RIGHT_BRACE, 2, 1)],
897                None,
898            ),
899            ("==", vec![(T_EQLC, 0, 2)], None),
900            ("!=", vec![(T_NEQ, 0, 2)], None),
901            ("<", vec![(T_LSS, 0, 1)], None),
902            (">", vec![(T_GTR, 0, 1)], None),
903            (">=", vec![(T_GTE, 0, 2)], None),
904            ("<=", vec![(T_LTE, 0, 2)], None),
905            ("+", vec![(T_ADD, 0, 1)], None),
906            ("-", vec![(T_SUB, 0, 1)], None),
907            ("*", vec![(T_MUL, 0, 1)], None),
908            ("/", vec![(T_DIV, 0, 1)], None),
909            ("^", vec![(T_POW, 0, 1)], None),
910            ("%", vec![(T_MOD, 0, 1)], None),
911            ("AND", vec![(T_LAND, 0, 3)], None),
912            ("or", vec![(T_LOR, 0, 2)], None),
913            ("unless", vec![(T_LUNLESS, 0, 6)], None),
914            ("@", vec![(T_AT, 0, 1)], None),
915        ];
916        assert_matches(cases);
917    }
918
919    #[test]
920    fn test_aggregators() {
921        let cases = vec![
922            ("sum", vec![(T_SUM, 0, 3)], None),
923            ("AVG", vec![(T_AVG, 0, 3)], None),
924            ("Max", vec![(T_MAX, 0, 3)], None),
925            ("min", vec![(T_MIN, 0, 3)], None),
926            ("count", vec![(T_COUNT, 0, 5)], None),
927            ("stdvar", vec![(T_STDVAR, 0, 6)], None),
928            ("stddev", vec![(T_STDDEV, 0, 6)], None),
929        ];
930        assert_matches(cases);
931    }
932
933    #[test]
934    fn test_keywords() {
935        let cases = vec![
936            ("offset", vec![(T_OFFSET, 0, 6)], None),
937            ("by", vec![(T_BY, 0, 2)], None),
938            ("without", vec![(T_WITHOUT, 0, 7)], None),
939            ("on", vec![(T_ON, 0, 2)], None),
940            ("ignoring", vec![(T_IGNORING, 0, 8)], None),
941            ("group_left", vec![(T_GROUP_LEFT, 0, 10)], None),
942            ("group_right", vec![(T_GROUP_RIGHT, 0, 11)], None),
943            ("bool", vec![(T_BOOL, 0, 4)], None),
944            ("atan2", vec![(T_ATAN2, 0, 5)], None),
945        ];
946        assert_matches(cases);
947    }
948
949    #[test]
950    fn test_preprocessors() {
951        let cases = vec![
952            ("start", vec![(T_START, 0, 5)], None),
953            ("end", vec![(T_END, 0, 3)], None),
954        ];
955        assert_matches(cases);
956    }
957
958    #[test]
959    fn test_selectors() {
960        let cases = vec![
961            ("北京", vec![], Some("unexpected character: '北'")),
962            ("北京='a'", vec![], Some("unexpected character: '北'")),
963            ("0a='a'", vec![], Some("bad number or duration syntax: 0a")),
964            (
965                "{foo='bar'}",
966                vec![
967                    (T_LEFT_BRACE, 0, 1),
968                    (T_IDENTIFIER, 1, 3),
969                    (T_EQL, 4, 1),
970                    (T_STRING, 6, 3),
971                    (T_RIGHT_BRACE, 10, 1),
972                ],
973                None,
974            ),
975            (
976                r#"{foo="bar"}"#,
977                vec![
978                    (T_LEFT_BRACE, 0, 1),
979                    (T_IDENTIFIER, 1, 3),
980                    (T_EQL, 4, 1),
981                    (T_STRING, 6, 3),
982                    (T_RIGHT_BRACE, 10, 1),
983                ],
984                None,
985            ),
986            (
987                r#"{foo="bar\"bar"}"#,
988                vec![
989                    (T_LEFT_BRACE, 0, 1),
990                    (T_IDENTIFIER, 1, 3),
991                    (T_EQL, 4, 1),
992                    (T_STRING, 6, 8),
993                    (T_RIGHT_BRACE, 15, 1),
994                ],
995                None,
996            ),
997            (
998                r#"{NaN	!= "bar" }"#,
999                vec![
1000                    (T_LEFT_BRACE, 0, 1),
1001                    (T_IDENTIFIER, 1, 3),
1002                    (T_NEQ, 5, 2),
1003                    (T_STRING, 9, 3),
1004                    (T_RIGHT_BRACE, 14, 1),
1005                ],
1006                None,
1007            ),
1008            (
1009                r#"{alert=~"bar" }"#,
1010                vec![
1011                    (T_LEFT_BRACE, 0, 1),
1012                    (T_IDENTIFIER, 1, 5),
1013                    (T_EQL_REGEX, 6, 2),
1014                    (T_STRING, 9, 3),
1015                    (T_RIGHT_BRACE, 14, 1),
1016                ],
1017                None,
1018            ),
1019            (
1020                r#"{on!~"bar"}"#,
1021                vec![
1022                    (T_LEFT_BRACE, 0, 1),
1023                    (T_IDENTIFIER, 1, 2),
1024                    (T_NEQ_REGEX, 3, 2),
1025                    (T_STRING, 6, 3),
1026                    (T_RIGHT_BRACE, 10, 1),
1027                ],
1028                None,
1029            ),
1030            (
1031                r#"{alert!#"bar"}"#,
1032                vec![(T_LEFT_BRACE, 0, 1), (T_IDENTIFIER, 1, 5)],
1033                Some("unexpected character after '!' inside braces: '#'"),
1034            ),
1035            (
1036                r#"{foo:a="bar"}"#,
1037                vec![(T_LEFT_BRACE, 0, 1), (T_IDENTIFIER, 1, 3)],
1038                Some("unexpected character inside braces: ':'"),
1039            ),
1040        ];
1041        assert_matches(cases);
1042    }
1043
1044    #[test]
1045    fn test_common_errors() {
1046        let cases = vec![
1047            ("=~", vec![], Some("unexpected character after '=': '~'")),
1048            ("!~", vec![], Some("unexpected character after '!': '~'")),
1049            ("!(", vec![], Some("unexpected character after '!': '('")),
1050            ("1a", vec![], Some("bad number or duration syntax: 1a")),
1051        ];
1052        assert_matches(cases);
1053    }
1054
1055    #[test]
1056    fn test_mismatched_parentheses() {
1057        let cases = vec![
1058            (
1059                "(",
1060                vec![(T_LEFT_PAREN, 0, 1)],
1061                Some("unclosed left parenthesis"),
1062            ),
1063            (")", vec![], Some("unexpected right parenthesis ')'")),
1064            (
1065                "())",
1066                vec![(T_LEFT_PAREN, 0, 1), (T_RIGHT_PAREN, 1, 1)],
1067                Some("unexpected right parenthesis ')'"),
1068            ),
1069            (
1070                "(()",
1071                vec![
1072                    (T_LEFT_PAREN, 0, 1),
1073                    (T_LEFT_PAREN, 1, 1),
1074                    (T_RIGHT_PAREN, 2, 1),
1075                ],
1076                Some("unclosed left parenthesis"),
1077            ),
1078            (
1079                "{",
1080                vec![(T_LEFT_BRACE, 0, 1)],
1081                Some("unexpected end of input inside braces"),
1082            ),
1083            ("}", vec![], Some("unexpected right brace '}'")),
1084            (
1085                "{{",
1086                vec![(T_LEFT_BRACE, 0, 1)],
1087                Some("unexpected left brace '{' inside braces"),
1088            ),
1089            (
1090                "{{}}",
1091                vec![(T_LEFT_BRACE, 0, 1)],
1092                Some("unexpected left brace '{' inside braces"),
1093            ),
1094            (
1095                "[",
1096                vec![(T_LEFT_BRACKET, 0, 1)],
1097                Some("unexpected end of input inside brackets"),
1098            ),
1099            (
1100                "[[",
1101                vec![(T_LEFT_BRACKET, 0, 1)],
1102                Some("unexpected left brace '[' inside brackets"),
1103            ),
1104            (
1105                "[]]",
1106                vec![(T_LEFT_BRACKET, 0, 1), (T_RIGHT_BRACKET, 1, 1)],
1107                Some("unexpected right bracket ']'"),
1108            ),
1109            (
1110                "[[]]",
1111                vec![(T_LEFT_BRACKET, 0, 1)],
1112                Some("unexpected left brace '[' inside brackets"),
1113            ),
1114            ("]", vec![], Some("unexpected right bracket ']'")),
1115        ];
1116        assert_matches(cases);
1117    }
1118
1119    #[test]
1120    fn test_subqueries() {
1121        let cases = vec![
1122            (
1123                r#"test_name{on!~"bar"}[4m:4s]"#,
1124                vec![
1125                    (T_IDENTIFIER, 0, 9),
1126                    (T_LEFT_BRACE, 9, 1),
1127                    (T_IDENTIFIER, 10, 2),
1128                    (T_NEQ_REGEX, 12, 2),
1129                    (T_STRING, 15, 3),
1130                    (T_RIGHT_BRACE, 19, 1),
1131                    (T_LEFT_BRACKET, 20, 1),
1132                    (T_DURATION, 21, 2),
1133                    (T_COLON, 23, 1),
1134                    (T_DURATION, 24, 2),
1135                    (T_RIGHT_BRACKET, 26, 1),
1136                ],
1137                None,
1138            ),
1139            (
1140                r#"test:name{on!~"bar"}[4m:4s]"#,
1141                vec![
1142                    (T_METRIC_IDENTIFIER, 0, 9),
1143                    (T_LEFT_BRACE, 9, 1),
1144                    (T_IDENTIFIER, 10, 2),
1145                    (T_NEQ_REGEX, 12, 2),
1146                    (T_STRING, 15, 3),
1147                    (T_RIGHT_BRACE, 19, 1),
1148                    (T_LEFT_BRACKET, 20, 1),
1149                    (T_DURATION, 21, 2),
1150                    (T_COLON, 23, 1),
1151                    (T_DURATION, 24, 2),
1152                    (T_RIGHT_BRACKET, 26, 1),
1153                ],
1154                None,
1155            ),
1156            (
1157                r#"test:name{on!~"b:ar"}[4m:4s]"#,
1158                vec![
1159                    (T_METRIC_IDENTIFIER, 0, 9),
1160                    (T_LEFT_BRACE, 9, 1),
1161                    (T_IDENTIFIER, 10, 2),
1162                    (T_NEQ_REGEX, 12, 2),
1163                    (T_STRING, 15, 4),
1164                    (T_RIGHT_BRACE, 20, 1),
1165                    (T_LEFT_BRACKET, 21, 1),
1166                    (T_DURATION, 22, 2),
1167                    (T_COLON, 24, 1),
1168                    (T_DURATION, 25, 2),
1169                    (T_RIGHT_BRACKET, 27, 1),
1170                ],
1171                None,
1172            ),
1173            (
1174                r#"test:name{on!~"b:ar"}[4m:]"#,
1175                vec![
1176                    (T_METRIC_IDENTIFIER, 0, 9),
1177                    (T_LEFT_BRACE, 9, 1),
1178                    (T_IDENTIFIER, 10, 2),
1179                    (T_NEQ_REGEX, 12, 2),
1180                    (T_STRING, 15, 4),
1181                    (T_RIGHT_BRACE, 20, 1),
1182                    (T_LEFT_BRACKET, 21, 1),
1183                    (T_DURATION, 22, 2),
1184                    (T_COLON, 24, 1),
1185                    (T_RIGHT_BRACKET, 25, 1),
1186                ],
1187                None,
1188            ),
1189            (
1190                r#"min_over_time(rate(foo{bar="baz"}[2s])[5m:])[4m:3s]"#,
1191                vec![
1192                    (T_IDENTIFIER, 0, 13),
1193                    (T_LEFT_PAREN, 13, 1),
1194                    (T_IDENTIFIER, 14, 4),
1195                    (T_LEFT_PAREN, 18, 1),
1196                    (T_IDENTIFIER, 19, 3),
1197                    (T_LEFT_BRACE, 22, 1),
1198                    (T_IDENTIFIER, 23, 3),
1199                    (T_EQL, 26, 1),
1200                    (T_STRING, 28, 3),
1201                    (T_RIGHT_BRACE, 32, 1),
1202                    (T_LEFT_BRACKET, 33, 1),
1203                    (T_DURATION, 34, 2),
1204                    (T_RIGHT_BRACKET, 36, 1),
1205                    (T_RIGHT_PAREN, 37, 1),
1206                    (T_LEFT_BRACKET, 38, 1),
1207                    (T_DURATION, 39, 2),
1208                    (T_COLON, 41, 1),
1209                    (T_RIGHT_BRACKET, 42, 1),
1210                    (T_RIGHT_PAREN, 43, 1),
1211                    (T_LEFT_BRACKET, 44, 1),
1212                    (T_DURATION, 45, 2),
1213                    (T_COLON, 47, 1),
1214                    (T_DURATION, 48, 2),
1215                    (T_RIGHT_BRACKET, 50, 1),
1216                ],
1217                None,
1218            ),
1219            (
1220                r#"test:name{on!~"b:ar"}[4m:4s] offset 10m"#,
1221                vec![
1222                    (T_METRIC_IDENTIFIER, 0, 9),
1223                    (T_LEFT_BRACE, 9, 1),
1224                    (T_IDENTIFIER, 10, 2),
1225                    (T_NEQ_REGEX, 12, 2),
1226                    (T_STRING, 15, 4),
1227                    (T_RIGHT_BRACE, 20, 1),
1228                    (T_LEFT_BRACKET, 21, 1),
1229                    (T_DURATION, 22, 2),
1230                    (T_COLON, 24, 1),
1231                    (T_DURATION, 25, 2),
1232                    (T_RIGHT_BRACKET, 27, 1),
1233                    (T_OFFSET, 29, 6),
1234                    (T_DURATION, 36, 3),
1235                ],
1236                None,
1237            ),
1238            (
1239                r#"min_over_time(rate(foo{bar="baz"}[2s])[5m:] offset 6m)[4m:3s]"#,
1240                vec![
1241                    (T_IDENTIFIER, 0, 13),
1242                    (T_LEFT_PAREN, 13, 1),
1243                    (T_IDENTIFIER, 14, 4),
1244                    (T_LEFT_PAREN, 18, 1),
1245                    (T_IDENTIFIER, 19, 3),
1246                    (T_LEFT_BRACE, 22, 1),
1247                    (T_IDENTIFIER, 23, 3),
1248                    (T_EQL, 26, 1),
1249                    (T_STRING, 28, 3),
1250                    (T_RIGHT_BRACE, 32, 1),
1251                    (T_LEFT_BRACKET, 33, 1),
1252                    (T_DURATION, 34, 2),
1253                    (T_RIGHT_BRACKET, 36, 1),
1254                    (T_RIGHT_PAREN, 37, 1),
1255                    (T_LEFT_BRACKET, 38, 1),
1256                    (T_DURATION, 39, 2),
1257                    (T_COLON, 41, 1),
1258                    (T_RIGHT_BRACKET, 42, 1),
1259                    (T_OFFSET, 44, 6),
1260                    (T_DURATION, 51, 2),
1261                    (T_RIGHT_PAREN, 53, 1),
1262                    (T_LEFT_BRACKET, 54, 1),
1263                    (T_DURATION, 55, 2),
1264                    (T_COLON, 57, 1),
1265                    (T_DURATION, 58, 2),
1266                    (T_RIGHT_BRACKET, 60, 1),
1267                ],
1268                None,
1269            ),
1270            (
1271                r#"test:name[ 5m]"#,
1272                vec![
1273                    (T_METRIC_IDENTIFIER, 0, 9),
1274                    (T_LEFT_BRACKET, 9, 1),
1275                    (T_DURATION, 11, 2),
1276                    (T_RIGHT_BRACKET, 13, 1),
1277                ],
1278                None,
1279            ),
1280            (
1281                r#"test:name{o:n!~"bar"}[4m:4s]"#,
1282                vec![
1283                    (T_METRIC_IDENTIFIER, 0, 9),
1284                    (T_LEFT_BRACE, 9, 1),
1285                    (T_IDENTIFIER, 10, 1),
1286                ],
1287                Some("unexpected character inside braces: ':'"),
1288            ),
1289            (
1290                r#"test:name{on!~"bar"}[4m:4s:4h]"#,
1291                vec![
1292                    (T_METRIC_IDENTIFIER, 0, 9),
1293                    (T_LEFT_BRACE, 9, 1),
1294                    (T_IDENTIFIER, 10, 2),
1295                    (T_NEQ_REGEX, 12, 2),
1296                    (T_STRING, 15, 3),
1297                    (T_RIGHT_BRACE, 19, 1),
1298                    (T_LEFT_BRACKET, 20, 1),
1299                    (T_DURATION, 21, 2),
1300                    (T_COLON, 23, 1),
1301                    (T_DURATION, 24, 2),
1302                ],
1303                Some("unexpected second colon(:) in brackets"),
1304            ),
1305            (
1306                r#"test:name{on!~"bar"}[4m:4s:]"#,
1307                vec![
1308                    (T_METRIC_IDENTIFIER, 0, 9),
1309                    (T_LEFT_BRACE, 9, 1),
1310                    (T_IDENTIFIER, 10, 2),
1311                    (T_NEQ_REGEX, 12, 2),
1312                    (T_STRING, 15, 3),
1313                    (T_RIGHT_BRACE, 19, 1),
1314                    (T_LEFT_BRACKET, 20, 1),
1315                    (T_DURATION, 21, 2),
1316                    (T_COLON, 23, 1),
1317                    (T_DURATION, 24, 2),
1318                ],
1319                Some("unexpected second colon(:) in brackets"),
1320            ),
1321            (
1322                r#"test:name{on!~"bar"}[4m::]"#,
1323                vec![
1324                    (T_METRIC_IDENTIFIER, 0, 9),
1325                    (T_LEFT_BRACE, 9, 1),
1326                    (T_IDENTIFIER, 10, 2),
1327                    (T_NEQ_REGEX, 12, 2),
1328                    (T_STRING, 15, 3),
1329                    (T_RIGHT_BRACE, 19, 1),
1330                    (T_LEFT_BRACKET, 20, 1),
1331                    (T_DURATION, 21, 2),
1332                    (T_COLON, 23, 1),
1333                ],
1334                Some("unexpected second colon(:) in brackets"),
1335            ),
1336            (
1337                r#"test:name{on!~"bar"}[:4s]"#,
1338                vec![
1339                    (T_METRIC_IDENTIFIER, 0, 9),
1340                    (T_LEFT_BRACE, 9, 1),
1341                    (T_IDENTIFIER, 10, 2),
1342                    (T_NEQ_REGEX, 12, 2),
1343                    (T_STRING, 15, 3),
1344                    (T_RIGHT_BRACE, 19, 1),
1345                    (T_LEFT_BRACKET, 20, 1),
1346                ],
1347                Some("expect duration before first colon(:) in brackets"),
1348            ),
1349        ];
1350        assert_matches(cases);
1351    }
1352
1353    #[test]
1354    fn test_is_alpha() {
1355        assert!(is_alpha('_'));
1356        assert!(is_alpha('a'));
1357        assert!(is_alpha('z'));
1358        assert!(is_alpha('A'));
1359        assert!(is_alpha('Z'));
1360        assert!(!is_alpha('-'));
1361        assert!(!is_alpha('@'));
1362        assert!(!is_alpha('0'));
1363        assert!(!is_alpha('9'));
1364    }
1365
1366    #[test]
1367    fn test_is_alpha_numeric() {
1368        assert!(is_alpha_numeric('_'));
1369        assert!(is_alpha_numeric('a'));
1370        assert!(is_alpha_numeric('z'));
1371        assert!(is_alpha_numeric('A'));
1372        assert!(is_alpha_numeric('Z'));
1373        assert!(is_alpha_numeric('0'));
1374        assert!(is_alpha_numeric('9'));
1375        assert!(!is_alpha_numeric('-'));
1376        assert!(!is_alpha_numeric('@'));
1377    }
1378
1379    #[test]
1380    fn test_is_label() {
1381        assert!(is_label("_"));
1382        assert!(is_label("_up"));
1383        assert!(is_label("up"));
1384        assert!(is_label("up_"));
1385        assert!(is_label("up_system_1"));
1386
1387        assert!(!is_label(""));
1388        assert!(!is_label("0"));
1389        assert!(!is_label("0up"));
1390        assert!(!is_label("0_up"));
1391    }
1392}