1use crate::parser::token::*;
16use cfgrammar::NewlineCache;
17use lrlex::{DefaultLexeme, DefaultLexerTypes, LRNonStreamingLexer};
18use lrpar::Lexeme;
19use std::fmt::Debug;
20
21const ESCAPE_SYMBOLS: &str = r"abfnrtv\01234567xuU";
22const STRING_SYMBOLS: &str = r#"'"`"#;
23
24pub(crate) type LexemeType = DefaultLexeme<TokenId>;
25
26pub fn lexer(s: &str) -> Result<LRNonStreamingLexer<DefaultLexerTypes<TokenId>>, String> {
27 let lexemes: Vec<Result<LexemeType, String>> = Lexer::new(s).collect();
28 match lexemes.last() {
29 Some(Err(info)) => Err(info.into()),
30 Some(Ok(_)) => {
31 let lexemes = lexemes.into_iter().filter_map(|l| l.ok()).map(Ok).collect();
33 Ok(LRNonStreamingLexer::new(s, lexemes, NewlineCache::new()))
34 }
35 None => Err(format!("no expression found in input: '{s}'")),
36 }
37}
38
39#[derive(Debug)]
40enum State {
41 Start,
42 End,
43 Lexeme(TokenId),
44 Identifier,
45 KeywordOrIdentifier,
46 NumberOrDuration,
47 InsideBrackets,
48 InsideBraces,
49 LineComment,
50 Space,
51 String(char), Escape(char), Err(String),
54}
55
56#[derive(Debug)]
57struct Context {
58 chars: Vec<char>,
60 idx: usize, start: usize, pos: usize, paren_depth: usize, brace_open: bool, bracket_open: bool, got_colon: bool, eof: bool, }
70
71impl Context {
72 fn new(input: &str) -> Context {
73 Self {
74 chars: input.chars().collect(),
75 idx: 0,
76 start: 0,
77 pos: 0,
78
79 paren_depth: 0,
80 brace_open: false,
81 bracket_open: false,
82 got_colon: false,
83 eof: false,
84 }
85 }
86
87 fn pop(&mut self) -> Option<char> {
89 let ch = self.peek()?;
90 self.pos += ch.len_utf8();
91 self.idx += 1;
92 Some(ch)
93 }
94
95 fn backup(&mut self) -> bool {
98 if let Some(ch) = self.chars.get(self.idx - 1) {
99 self.pos -= ch.len_utf8();
100 self.idx -= 1;
101 return true;
102 };
103 false
104 }
105
106 fn peek(&self) -> Option<char> {
108 self.chars.get(self.idx).copied()
109 }
110
111 fn lexeme(&mut self, token_id: TokenId) -> LexemeType {
113 let mut start = self.start;
114 let mut len = self.pos - self.start;
115 if token_id == T_STRING {
116 start += 1;
117 len -= 2;
118 }
119 DefaultLexeme::new(token_id, start, len)
120 }
121
122 fn ignore(&mut self) {
124 self.start = self.pos;
125 }
126
127 fn lexeme_string(&self) -> String {
129 let mut s = String::from("");
130 if self.idx == 0 {
131 return s;
132 }
133
134 let mut pos = self.pos;
135 let mut idx = self.idx;
136 while pos > self.start {
137 if let Some(&ch) = self.chars.get(idx - 1) {
138 pos -= ch.len_utf8();
139 idx -= 1;
140 s.push(ch);
141 };
142 }
143 s.chars().rev().collect()
144 }
145}
146
147#[derive(Debug)]
148struct Lexer {
149 state: State,
150 ctx: Context,
151}
152
153impl Lexer {
155 fn new(input: &str) -> Self {
156 let ctx = Context::new(input);
157 let state = State::Start;
158 Self { state, ctx }
159 }
160
161 fn is_inside_braces(&self) -> bool {
162 self.ctx.brace_open
163 }
164
165 fn jump_outof_braces(&mut self) {
166 self.ctx.brace_open = false;
167 }
168
169 fn dive_into_braces(&mut self) {
170 self.ctx.brace_open = true;
171 }
172
173 fn is_inside_brackets(&self) -> bool {
174 self.ctx.bracket_open
175 }
176
177 fn jump_outof_brackets(&mut self) {
178 self.ctx.bracket_open = false;
179 }
180
181 fn dive_into_brackets(&mut self) {
182 self.ctx.bracket_open = true;
183 }
184
185 fn is_colon_scanned(&self) -> bool {
186 self.ctx.got_colon
187 }
188
189 fn set_colon_scanned(&mut self) {
190 self.ctx.got_colon = true;
191 }
192
193 fn reset_colon_scanned(&mut self) {
194 self.ctx.got_colon = false;
195 }
196
197 fn inc_paren_depth(&mut self) -> bool {
199 if self.ctx.paren_depth < usize::MAX {
200 self.ctx.paren_depth += 1;
201 return true;
202 }
203 false
204 }
205
206 fn dec_paren_depth(&mut self) -> bool {
208 if self.ctx.paren_depth >= 1 {
209 self.ctx.paren_depth -= 1;
210 return true;
211 }
212 false
213 }
214
215 fn is_paren_balanced(&self) -> bool {
216 self.ctx.paren_depth == 0
217 }
218
219 fn pop(&mut self) -> Option<char> {
220 self.ctx.pop()
221 }
222
223 fn backup(&mut self) -> bool {
224 self.ctx.backup()
225 }
226
227 fn peek(&self) -> Option<char> {
228 self.ctx.peek()
229 }
230
231 fn lexeme(&mut self, token_id: TokenId) -> LexemeType {
234 let lexeme = self.ctx.lexeme(token_id);
235 self.ctx.ignore();
236 lexeme
237 }
238
239 fn lexeme_string(&self) -> String {
240 self.ctx.lexeme_string()
241 }
242
243 fn ignore(&mut self) {
244 self.ctx.ignore();
245 }
246
247 fn is_eof(&self) -> bool {
248 self.ctx.eof
249 }
250
251 fn set_eof(&mut self) {
252 self.ctx.eof = true;
253 }
254}
255
256impl Lexer {
258 fn shift(&mut self) {
259 self.state = match self.state {
262 State::Start => self.start(),
263 State::End => State::Err("End state can not shift forward.".into()),
264 State::Lexeme(_) => State::Start,
265 State::String(ch) => self.accept_string(ch),
266 State::KeywordOrIdentifier => self.accept_keyword_or_identifier(),
267 State::Identifier => self.accept_identifier(),
268 State::NumberOrDuration => self.accept_number_or_duration(),
269 State::InsideBrackets => self.inside_brackets(),
270 State::InsideBraces => self.inside_braces(),
271 State::LineComment => self.ignore_comment_line(),
272 State::Escape(ch) => self.accept_escape(ch),
273 State::Space => self.ignore_space(),
274 State::Err(_) => State::End,
275 };
276 }
277
278 fn start(&mut self) -> State {
279 if self.is_inside_braces() {
280 return State::InsideBraces;
281 }
282
283 if self.is_inside_brackets() {
284 return State::InsideBrackets;
285 }
286
287 let c = match self.pop() {
288 None => {
289 if !self.is_paren_balanced() {
290 return State::Err("unclosed left parenthesis".into());
291 }
292
293 if !self.is_eof() {
294 self.set_eof();
295 return State::Lexeme(T_EOF);
296 }
297
298 return State::End;
299 }
300 Some(ch) => ch,
301 };
302
303 match c {
306 '#' => State::LineComment,
307 '@' => State::Lexeme(T_AT),
308 ',' => State::Lexeme(T_COMMA),
309 '*' => State::Lexeme(T_MUL),
310 '/' => State::Lexeme(T_DIV),
311 '%' => State::Lexeme(T_MOD),
312 '+' => State::Lexeme(T_ADD),
313 '-' => State::Lexeme(T_SUB),
314 '^' => State::Lexeme(T_POW),
315 '=' => match self.peek() {
316 Some('=') => {
317 self.pop();
318 State::Lexeme(T_EQLC)
319 }
320 Some('~') => State::Err("unexpected character after '=': '~'".into()),
322 _ => State::Lexeme(T_EQL),
323 },
324 '!' => match self.pop() {
325 Some('=') => State::Lexeme(T_NEQ),
326 Some(ch) => State::Err(format!("unexpected character after '!': '{ch}'")),
327 None => State::Err("'!' can not be at the end".into()),
328 },
329 '<' => match self.peek() {
330 Some('=') => {
331 self.pop();
332 State::Lexeme(T_LTE)
333 }
334 _ => State::Lexeme(T_LSS),
335 },
336 '>' => match self.peek() {
337 Some('=') => {
338 self.pop();
339 State::Lexeme(T_GTE)
340 }
341 _ => State::Lexeme(T_GTR),
342 },
343 ch if ch.is_ascii_whitespace() => self.ignore_space(),
344 ch if ch.is_ascii_digit() => State::NumberOrDuration,
345 '.' => match self.peek() {
346 Some(ch) if ch.is_ascii_digit() => State::NumberOrDuration,
347 Some(ch) => State::Err(format!("unexpected character after '.': '{ch}'")),
348 None => State::Err("unexpected character: '.'".into()),
349 },
350 ch if is_alpha(ch) || ch == ':' => State::KeywordOrIdentifier,
351 ch if STRING_SYMBOLS.contains(ch) => State::String(ch),
352 '(' => {
353 if self.inc_paren_depth() {
354 return State::Lexeme(T_LEFT_PAREN);
355 }
356 State::Err("too many left parentheses".into())
357 }
358 ')' => {
359 if self.is_paren_balanced() {
360 return State::Err("unexpected right parenthesis ')'".into());
361 }
362 if self.dec_paren_depth() {
363 return State::Lexeme(T_RIGHT_PAREN);
364 }
365 State::Err("unexpected right parenthesis ')'".into())
366 }
367 '{' => {
368 self.dive_into_braces();
369 State::Lexeme(T_LEFT_BRACE)
370 }
371 '}' => State::Err("unexpected right brace '}'".into()),
373 '[' => {
374 self.reset_colon_scanned();
375 self.dive_into_brackets();
376 State::Lexeme(T_LEFT_BRACKET)
377 }
378 ']' => State::Err("unexpected right bracket ']'".into()),
380 ch => State::Err(format!("unexpected character: {ch:?}")),
381 }
382 }
383
384 fn accept_number_or_duration(&mut self) -> State {
386 self.backup();
387 if self.scan_number() {
388 return State::Lexeme(T_NUMBER);
389 }
390
391 if self.accept_remaining_duration() {
393 return State::Lexeme(T_DURATION);
394 }
395
396 self.pop();
398 State::Err(format!(
399 "bad number or duration syntax: {}",
400 self.lexeme_string()
401 ))
402 }
403
404 fn accept_keyword_or_identifier(&mut self) -> State {
406 while let Some(ch) = self.peek() {
407 if is_alpha_numeric(ch) || ch == ':' {
408 self.pop();
409 } else {
410 break;
411 }
412 }
413
414 let s = self.lexeme_string();
415 match get_keyword_token(&s.to_lowercase()) {
416 Some(token_id) => State::Lexeme(token_id),
417 None if s.contains(':') => State::Lexeme(T_METRIC_IDENTIFIER),
418 _ => State::Lexeme(T_IDENTIFIER),
419 }
420 }
421
422 fn ignore_comment_line(&mut self) -> State {
424 while let Some(ch) = self.pop() {
425 if ch == '\r' || ch == '\n' {
426 break;
427 }
428 }
429 self.ignore();
430 State::Start
431 }
432
433 fn accept<F>(&mut self, f: F) -> bool
435 where
436 F: Fn(char) -> bool,
437 {
438 if let Some(ch) = self.peek() {
439 if f(ch) {
440 self.pop();
441 return true;
442 }
443 }
444 false
445 }
446
447 fn accept_run<F>(&mut self, f: F)
449 where
450 F: Fn(char) -> bool,
451 {
452 while let Some(ch) = self.peek() {
453 if f(ch) {
454 self.pop();
455 } else {
456 break;
457 }
458 }
459 }
460
461 fn ignore_space(&mut self) -> State {
463 self.backup(); self.accept_run(|ch| ch.is_ascii_whitespace());
465 self.ignore();
466 State::Start
467 }
468
469 fn scan_number(&mut self) -> bool {
472 let mut hex_digit = false;
473 if self.accept(|ch| ch == '0') && self.accept(|ch| ch == 'x' || ch == 'X') {
474 hex_digit = true;
475 }
476 let is_valid_digit = |ch: char| -> bool {
477 if hex_digit {
478 ch.is_ascii_hexdigit()
479 } else {
480 ch.is_ascii_digit()
481 }
482 };
483
484 self.accept_run(is_valid_digit);
485 if self.accept(|ch| ch == '.') {
486 self.accept_run(is_valid_digit);
487 }
488 if self.accept(|ch| ch == 'e' || ch == 'E') {
489 self.accept(|ch| ch == '+' || ch == '-');
490 self.accept_run(|ch| ch.is_ascii_digit());
491 }
492
493 !matches!(self.peek(), Some(ch) if is_alpha(ch) || ch == '.')
497 }
498
499 fn accept_remaining_duration(&mut self) -> bool {
502 if !self.accept(|ch| "smhdwy".contains(ch)) {
504 return false;
505 }
506 self.accept(|ch| ch == 's');
509
510 while self.accept(|ch| ch.is_ascii_digit()) {
512 self.accept_run(|ch| ch.is_ascii_digit());
513 if !self.accept(|ch| "smhdw".contains(ch)) {
515 return false;
516 }
517 self.accept(|ch| ch == 's');
520 }
521
522 !matches!(self.peek(), Some(ch) if is_alpha_numeric(ch))
523 }
524
525 fn accept_escape(&mut self, symbol: char) -> State {
529 match self.pop() {
530 Some(ch) if ch == symbol || ESCAPE_SYMBOLS.contains(ch) => State::String(symbol),
531 Some(ch) => State::Err(format!("unknown escape sequence '{ch}'")),
532 None => State::Err("escape sequence not terminated".into()),
533 }
534 }
535
536 fn accept_string(&mut self, symbol: char) -> State {
538 while let Some(ch) = self.pop() {
539 if ch == '\\' {
540 return State::Escape(symbol);
541 }
542
543 if ch == symbol {
544 return State::Lexeme(T_STRING);
545 }
546 }
547
548 State::Err(format!("unterminated quoted string {symbol}"))
549 }
550
551 fn inside_braces(&mut self) -> State {
554 match self.pop() {
555 Some('#') => State::LineComment,
556 Some(',') => State::Lexeme(T_COMMA),
557 Some('o') | Some('O') => {
558 if let Some('r') | Some('R') = self.peek() {
559 self.pop();
560 if let Some(' ') = self.peek() {
561 State::Lexeme(T_LOR)
562 } else {
563 State::Identifier
564 }
565 } else {
566 State::Identifier
567 }
568 }
569 Some(ch) if ch.is_ascii_whitespace() => State::Space,
570 Some(ch) if is_alpha(ch) => State::Identifier,
571 Some(ch) if STRING_SYMBOLS.contains(ch) => State::String(ch),
572 Some('=') => match self.peek() {
573 Some('~') => {
574 self.pop();
575 State::Lexeme(T_EQL_REGEX)
576 }
577 _ => State::Lexeme(T_EQL),
578 },
579 Some('!') => match self.pop() {
580 Some('~') => State::Lexeme(T_NEQ_REGEX),
581 Some('=') => State::Lexeme(T_NEQ),
582 Some(ch) => State::Err(format!(
583 "unexpected character after '!' inside braces: '{ch}'"
584 )),
585 None => State::Err("'!' can not be at the end".into()),
586 },
587 Some('{') => State::Err("unexpected left brace '{' inside braces".into()),
588 Some('}') => {
589 self.jump_outof_braces();
590 State::Lexeme(T_RIGHT_BRACE)
591 }
592 Some(ch) => State::Err(format!("unexpected character inside braces: '{ch}'")),
593 None => State::Err("unexpected end of input inside braces".into()),
594 }
595 }
596
597 fn last_char_matches<F>(&mut self, f: F) -> bool
599 where
600 F: Fn(char) -> bool,
601 {
602 if !self.backup() {
604 return false;
605 }
606 let matched = matches!(self.peek(), Some(ch) if f(ch));
607 self.pop();
608 matched
609 }
610
611 fn is_colon_the_first_char_in_brackets(&mut self) -> bool {
613 self.backup();
615 let matched = self.last_char_matches(|ch| ch == '[');
616 self.pop();
617 matched
618 }
619
620 fn inside_brackets(&mut self) -> State {
622 match self.pop() {
623 Some(ch) if ch.is_ascii_whitespace() => State::Space,
624 Some(':') => {
625 if self.is_colon_scanned() {
626 return State::Err("unexpected second colon(:) in brackets".into());
627 }
628
629 if self.is_colon_the_first_char_in_brackets() {
630 return State::Err("expect duration before first colon(:) in brackets".into());
631 }
632
633 self.set_colon_scanned();
634 State::Lexeme(T_COLON)
635 }
636 Some(ch) if ch.is_ascii_digit() => self.accept_number_or_duration(),
637 Some(']') => {
638 self.jump_outof_brackets();
639 self.reset_colon_scanned();
640 State::Lexeme(T_RIGHT_BRACKET)
641 }
642 Some('[') => State::Err("unexpected left brace '[' inside brackets".into()),
643 Some(ch) => State::Err(format!("unexpected character inside brackets: '{ch}'")),
644 None => State::Err("unexpected end of input inside brackets".into()),
645 }
646 }
647
648 fn accept_identifier(&mut self) -> State {
651 self.accept_run(is_alpha_numeric);
652 State::Lexeme(T_IDENTIFIER)
653 }
654}
655
656impl Iterator for Lexer {
658 type Item = Result<LexemeType, String>;
659
660 fn next(&mut self) -> Option<Self::Item> {
661 self.shift();
662 match &self.state {
663 State::Lexeme(token_id) => Some(Ok(self.lexeme(*token_id))),
664 State::Err(info) => Some(Err(info.clone())),
665 State::End => None,
666 _ => self.next(),
667 }
668 }
669}
670
671fn is_alpha_numeric(ch: char) -> bool {
672 is_alpha(ch) || ch.is_ascii_digit()
673}
674
675fn is_alpha(ch: char) -> bool {
676 ch == '_' || ch.is_ascii_alphabetic()
677}
678
679pub(crate) fn is_label(s: &str) -> bool {
680 if s.is_empty() {
681 return false;
682 }
683 let mut chars = s.chars();
684 match chars.next() {
685 None => false,
686 Some(ch) if !is_alpha(ch) => false,
687 Some(_) => {
688 for ch in chars {
689 if !is_alpha_numeric(ch) {
690 return false;
691 }
692 }
693 true
694 }
695 }
696}
697
698#[cfg(test)]
699mod tests {
700 use super::*;
701
702 type LexemeTuple = (TokenId, usize, usize);
703 type MatchTuple = (&'static str, Vec<LexemeTuple>, Option<&'static str>);
707
708 type Case = (
709 &'static str,
710 Vec<Result<LexemeType, String>>,
711 Vec<Result<LexemeType, String>>,
712 );
713
714 fn assert_matches(v: Vec<MatchTuple>) {
715 let cases: Vec<Case> = v
716 .into_iter()
717 .map(|(input, lexemes, err)| {
718 let mut expected: Vec<Result<LexemeType, String>> = lexemes
719 .into_iter()
720 .map(|(token_id, start, len)| Ok(LexemeType::new(token_id, start, len)))
721 .collect();
722
723 if let Some(s) = err {
724 expected.push(Err(s.to_string()));
725 }
726
727 let actual: Vec<Result<LexemeType, String>> = Lexer::new(input)
728 .filter(|r| !matches!(r, Ok(l) if l.tok_id() == T_EOF))
730 .collect();
731 (input, expected, actual)
732 })
733 .collect();
734
735 for (input, expected, actual) in cases.iter() {
736 assert_eq!(expected, actual, "\n<input>: {input}");
737 }
738 }
739
740 #[test]
741 fn test_common() {
742 let cases = vec![
743 (",", vec![(T_COMMA, 0, 1)], None),
744 (
745 "()",
746 vec![(T_LEFT_PAREN, 0, 1), (T_RIGHT_PAREN, 1, 1)],
747 None,
748 ),
749 (
750 "{}",
751 vec![(T_LEFT_BRACE, 0, 1), (T_RIGHT_BRACE, 1, 1)],
752 None,
753 ),
754 (
755 "[5m]",
756 vec![
757 (T_LEFT_BRACKET, 0, 1),
758 (T_DURATION, 1, 2),
759 (T_RIGHT_BRACKET, 3, 1),
760 ],
761 None,
762 ),
763 (
764 "[ 5m]",
765 vec![
766 (T_LEFT_BRACKET, 0, 1),
767 (T_DURATION, 2, 2),
768 (T_RIGHT_BRACKET, 4, 1),
769 ],
770 None,
771 ),
772 (
773 "[ 5m]",
774 vec![
775 (T_LEFT_BRACKET, 0, 1),
776 (T_DURATION, 3, 2),
777 (T_RIGHT_BRACKET, 5, 1),
778 ],
779 None,
780 ),
781 (
782 "[ 5m ]",
783 vec![
784 (T_LEFT_BRACKET, 0, 1),
785 (T_DURATION, 3, 2),
786 (T_RIGHT_BRACKET, 6, 1),
787 ],
788 None,
789 ),
790 ("\r\n\r", vec![], None),
791 ];
792
793 assert_matches(cases);
794 }
795
796 #[test]
797 fn test_numbers() {
798 let cases = vec![
799 ("1", vec![(T_NUMBER, 0, 1)], None),
800 ("4.23", vec![(T_NUMBER, 0, 4)], None),
801 (".3", vec![(T_NUMBER, 0, 2)], None),
802 ("5.", vec![(T_NUMBER, 0, 2)], None),
803 ("NaN", vec![(T_NUMBER, 0, 3)], None),
804 ("nAN", vec![(T_NUMBER, 0, 3)], None),
805 ("NaN 123", vec![(T_NUMBER, 0, 3), (T_NUMBER, 4, 3)], None),
806 ("NaN123", vec![(T_IDENTIFIER, 0, 6)], None),
807 ("iNf", vec![(T_NUMBER, 0, 3)], None),
808 ("Inf", vec![(T_NUMBER, 0, 3)], None),
809 ("+Inf", vec![(T_ADD, 0, 1), (T_NUMBER, 1, 3)], None),
810 (
811 "+Inf 123",
812 vec![(T_ADD, 0, 1), (T_NUMBER, 1, 3), (T_NUMBER, 5, 3)],
813 None,
814 ),
815 (
816 "-Inf 123",
817 vec![(T_SUB, 0, 1), (T_NUMBER, 1, 3), (T_NUMBER, 5, 3)],
818 None,
819 ),
820 ("Infoo", vec![(T_IDENTIFIER, 0, 5)], None),
821 ("-Inf123", vec![(T_SUB, 0, 1), (T_IDENTIFIER, 1, 6)], None),
822 (
823 "-Inf 123",
824 vec![(T_SUB, 0, 1), (T_NUMBER, 1, 3), (T_NUMBER, 5, 3)],
825 None,
826 ),
827 ("0x123", vec![(T_NUMBER, 0, 5)], None),
828 ];
829 assert_matches(cases);
830 }
831
832 #[test]
833 fn test_strings() {
834 let cases = vec![
835 ("\"test\\tsequence\"", vec![(T_STRING, 1, 14)], None),
836 ("\"test\\\\.expression\"", vec![(T_STRING, 1, 17)], None),
837 (
838 "\"test\\.expression\"",
839 vec![],
840 Some("unknown escape sequence '.'"),
841 ),
842 (
843 "`test\\.expression`",
844 vec![],
845 Some("unknown escape sequence '.'"),
846 ),
847 (".٩", vec![], Some("unexpected character after '.': '٩'")),
848 ];
852 assert_matches(cases);
853 }
854
855 #[test]
856 fn test_durations() {
857 let cases = vec![
858 ("5s", vec![(T_DURATION, 0, 2)], None),
859 ("123m", vec![(T_DURATION, 0, 4)], None),
860 ("1h", vec![(T_DURATION, 0, 2)], None),
861 ("3w", vec![(T_DURATION, 0, 2)], None),
862 ("1y", vec![(T_DURATION, 0, 2)], None),
863 ];
864 assert_matches(cases);
865 }
866
867 #[test]
868 fn test_identifiers() {
869 let cases = vec![
870 ("abc", vec![(T_IDENTIFIER, 0, 3)], None),
871 ("a:bc", vec![(T_METRIC_IDENTIFIER, 0, 4)], None),
872 (
873 "abc d",
874 vec![(T_IDENTIFIER, 0, 3), (T_IDENTIFIER, 4, 1)],
875 None,
876 ),
877 (":bc", vec![(T_METRIC_IDENTIFIER, 0, 3)], None),
878 ("0a:bc", vec![], Some("bad number or duration syntax: 0a")),
879 ];
880 assert_matches(cases);
881 }
882
883 #[test]
884 fn test_comments() {
885 let cases = vec![
886 ("# some comment", vec![], None),
887 ("5 # 1+1\n5", vec![(T_NUMBER, 0, 1), (T_NUMBER, 8, 1)], None),
888 ];
889 assert_matches(cases);
890 }
891
892 #[test]
893 fn test_operators() {
894 let cases = vec![
895 ("=", vec![(T_EQL, 0, 1)], None),
896 (
897 "{=}",
898 vec![(T_LEFT_BRACE, 0, 1), (T_EQL, 1, 1), (T_RIGHT_BRACE, 2, 1)],
899 None,
900 ),
901 ("==", vec![(T_EQLC, 0, 2)], None),
902 ("!=", vec![(T_NEQ, 0, 2)], None),
903 ("<", vec![(T_LSS, 0, 1)], None),
904 (">", vec![(T_GTR, 0, 1)], None),
905 (">=", vec![(T_GTE, 0, 2)], None),
906 ("<=", vec![(T_LTE, 0, 2)], None),
907 ("+", vec![(T_ADD, 0, 1)], None),
908 ("-", vec![(T_SUB, 0, 1)], None),
909 ("*", vec![(T_MUL, 0, 1)], None),
910 ("/", vec![(T_DIV, 0, 1)], None),
911 ("^", vec![(T_POW, 0, 1)], None),
912 ("%", vec![(T_MOD, 0, 1)], None),
913 ("AND", vec![(T_LAND, 0, 3)], None),
914 ("or", vec![(T_LOR, 0, 2)], None),
915 ("unless", vec![(T_LUNLESS, 0, 6)], None),
916 ("@", vec![(T_AT, 0, 1)], None),
917 ];
918 assert_matches(cases);
919 }
920
921 #[test]
922 fn test_aggregators() {
923 let cases = vec![
924 ("sum", vec![(T_SUM, 0, 3)], None),
925 ("AVG", vec![(T_AVG, 0, 3)], None),
926 ("Max", vec![(T_MAX, 0, 3)], None),
927 ("min", vec![(T_MIN, 0, 3)], None),
928 ("count", vec![(T_COUNT, 0, 5)], None),
929 ("stdvar", vec![(T_STDVAR, 0, 6)], None),
930 ("stddev", vec![(T_STDDEV, 0, 6)], None),
931 ];
932 assert_matches(cases);
933 }
934
935 #[test]
936 fn test_keywords() {
937 let cases = vec![
938 ("offset", vec![(T_OFFSET, 0, 6)], None),
939 ("by", vec![(T_BY, 0, 2)], None),
940 ("without", vec![(T_WITHOUT, 0, 7)], None),
941 ("on", vec![(T_ON, 0, 2)], None),
942 ("ignoring", vec![(T_IGNORING, 0, 8)], None),
943 ("group_left", vec![(T_GROUP_LEFT, 0, 10)], None),
944 ("group_right", vec![(T_GROUP_RIGHT, 0, 11)], None),
945 ("bool", vec![(T_BOOL, 0, 4)], None),
946 ("atan2", vec![(T_ATAN2, 0, 5)], None),
947 ];
948 assert_matches(cases);
949 }
950
951 #[test]
952 fn test_preprocessors() {
953 let cases = vec![
954 ("start", vec![(T_START, 0, 5)], None),
955 ("end", vec![(T_END, 0, 3)], None),
956 ];
957 assert_matches(cases);
958 }
959
960 #[test]
961 fn test_selectors() {
962 let cases = vec![
963 ("北京", vec![], Some("unexpected character: '北'")),
964 ("北京='a'", vec![], Some("unexpected character: '北'")),
965 ("0a='a'", vec![], Some("bad number or duration syntax: 0a")),
966 (
967 "{foo='bar'}",
968 vec![
969 (T_LEFT_BRACE, 0, 1),
970 (T_IDENTIFIER, 1, 3),
971 (T_EQL, 4, 1),
972 (T_STRING, 6, 3),
973 (T_RIGHT_BRACE, 10, 1),
974 ],
975 None,
976 ),
977 (
978 r#"{foo="bar"}"#,
979 vec![
980 (T_LEFT_BRACE, 0, 1),
981 (T_IDENTIFIER, 1, 3),
982 (T_EQL, 4, 1),
983 (T_STRING, 6, 3),
984 (T_RIGHT_BRACE, 10, 1),
985 ],
986 None,
987 ),
988 (
989 r#"{foo="bar\"bar"}"#,
990 vec![
991 (T_LEFT_BRACE, 0, 1),
992 (T_IDENTIFIER, 1, 3),
993 (T_EQL, 4, 1),
994 (T_STRING, 6, 8),
995 (T_RIGHT_BRACE, 15, 1),
996 ],
997 None,
998 ),
999 (
1000 r#"{NaN != "bar" }"#,
1001 vec![
1002 (T_LEFT_BRACE, 0, 1),
1003 (T_IDENTIFIER, 1, 3),
1004 (T_NEQ, 5, 2),
1005 (T_STRING, 9, 3),
1006 (T_RIGHT_BRACE, 14, 1),
1007 ],
1008 None,
1009 ),
1010 (
1011 r#"{alert=~"bar" }"#,
1012 vec![
1013 (T_LEFT_BRACE, 0, 1),
1014 (T_IDENTIFIER, 1, 5),
1015 (T_EQL_REGEX, 6, 2),
1016 (T_STRING, 9, 3),
1017 (T_RIGHT_BRACE, 14, 1),
1018 ],
1019 None,
1020 ),
1021 (
1022 r#"{on!~"bar"}"#,
1023 vec![
1024 (T_LEFT_BRACE, 0, 1),
1025 (T_IDENTIFIER, 1, 2),
1026 (T_NEQ_REGEX, 3, 2),
1027 (T_STRING, 6, 3),
1028 (T_RIGHT_BRACE, 10, 1),
1029 ],
1030 None,
1031 ),
1032 (
1033 r#"{alert!#"bar"}"#,
1034 vec![(T_LEFT_BRACE, 0, 1), (T_IDENTIFIER, 1, 5)],
1035 Some("unexpected character after '!' inside braces: '#'"),
1036 ),
1037 (
1038 r#"{foo:a="bar"}"#,
1039 vec![(T_LEFT_BRACE, 0, 1), (T_IDENTIFIER, 1, 3)],
1040 Some("unexpected character inside braces: ':'"),
1041 ),
1042 ];
1043 assert_matches(cases);
1044 }
1045
1046 #[test]
1047 fn test_common_errors() {
1048 let cases = vec![
1049 ("=~", vec![], Some("unexpected character after '=': '~'")),
1050 ("!~", vec![], Some("unexpected character after '!': '~'")),
1051 ("!(", vec![], Some("unexpected character after '!': '('")),
1052 ("1a", vec![], Some("bad number or duration syntax: 1a")),
1053 ];
1054 assert_matches(cases);
1055 }
1056
1057 #[test]
1058 fn test_mismatched_parentheses() {
1059 let cases = vec![
1060 (
1061 "(",
1062 vec![(T_LEFT_PAREN, 0, 1)],
1063 Some("unclosed left parenthesis"),
1064 ),
1065 (")", vec![], Some("unexpected right parenthesis ')'")),
1066 (
1067 "())",
1068 vec![(T_LEFT_PAREN, 0, 1), (T_RIGHT_PAREN, 1, 1)],
1069 Some("unexpected right parenthesis ')'"),
1070 ),
1071 (
1072 "(()",
1073 vec![
1074 (T_LEFT_PAREN, 0, 1),
1075 (T_LEFT_PAREN, 1, 1),
1076 (T_RIGHT_PAREN, 2, 1),
1077 ],
1078 Some("unclosed left parenthesis"),
1079 ),
1080 (
1081 "{",
1082 vec![(T_LEFT_BRACE, 0, 1)],
1083 Some("unexpected end of input inside braces"),
1084 ),
1085 ("}", vec![], Some("unexpected right brace '}'")),
1086 (
1087 "{{",
1088 vec![(T_LEFT_BRACE, 0, 1)],
1089 Some("unexpected left brace '{' inside braces"),
1090 ),
1091 (
1092 "{{}}",
1093 vec![(T_LEFT_BRACE, 0, 1)],
1094 Some("unexpected left brace '{' inside braces"),
1095 ),
1096 (
1097 "[",
1098 vec![(T_LEFT_BRACKET, 0, 1)],
1099 Some("unexpected end of input inside brackets"),
1100 ),
1101 (
1102 "[[",
1103 vec![(T_LEFT_BRACKET, 0, 1)],
1104 Some("unexpected left brace '[' inside brackets"),
1105 ),
1106 (
1107 "[]]",
1108 vec![(T_LEFT_BRACKET, 0, 1), (T_RIGHT_BRACKET, 1, 1)],
1109 Some("unexpected right bracket ']'"),
1110 ),
1111 (
1112 "[[]]",
1113 vec![(T_LEFT_BRACKET, 0, 1)],
1114 Some("unexpected left brace '[' inside brackets"),
1115 ),
1116 ("]", vec![], Some("unexpected right bracket ']'")),
1117 ];
1118 assert_matches(cases);
1119 }
1120
1121 #[test]
1122 fn test_subqueries() {
1123 let cases = vec![
1124 (
1125 r#"test_name{on!~"bar"}[4m:4s]"#,
1126 vec![
1127 (T_IDENTIFIER, 0, 9),
1128 (T_LEFT_BRACE, 9, 1),
1129 (T_IDENTIFIER, 10, 2),
1130 (T_NEQ_REGEX, 12, 2),
1131 (T_STRING, 15, 3),
1132 (T_RIGHT_BRACE, 19, 1),
1133 (T_LEFT_BRACKET, 20, 1),
1134 (T_DURATION, 21, 2),
1135 (T_COLON, 23, 1),
1136 (T_DURATION, 24, 2),
1137 (T_RIGHT_BRACKET, 26, 1),
1138 ],
1139 None,
1140 ),
1141 (
1142 r#"test:name{on!~"bar"}[4m:4s]"#,
1143 vec![
1144 (T_METRIC_IDENTIFIER, 0, 9),
1145 (T_LEFT_BRACE, 9, 1),
1146 (T_IDENTIFIER, 10, 2),
1147 (T_NEQ_REGEX, 12, 2),
1148 (T_STRING, 15, 3),
1149 (T_RIGHT_BRACE, 19, 1),
1150 (T_LEFT_BRACKET, 20, 1),
1151 (T_DURATION, 21, 2),
1152 (T_COLON, 23, 1),
1153 (T_DURATION, 24, 2),
1154 (T_RIGHT_BRACKET, 26, 1),
1155 ],
1156 None,
1157 ),
1158 (
1159 r#"test:name{on!~"b:ar"}[4m:4s]"#,
1160 vec![
1161 (T_METRIC_IDENTIFIER, 0, 9),
1162 (T_LEFT_BRACE, 9, 1),
1163 (T_IDENTIFIER, 10, 2),
1164 (T_NEQ_REGEX, 12, 2),
1165 (T_STRING, 15, 4),
1166 (T_RIGHT_BRACE, 20, 1),
1167 (T_LEFT_BRACKET, 21, 1),
1168 (T_DURATION, 22, 2),
1169 (T_COLON, 24, 1),
1170 (T_DURATION, 25, 2),
1171 (T_RIGHT_BRACKET, 27, 1),
1172 ],
1173 None,
1174 ),
1175 (
1176 r#"test:name{on!~"b:ar"}[4m:]"#,
1177 vec![
1178 (T_METRIC_IDENTIFIER, 0, 9),
1179 (T_LEFT_BRACE, 9, 1),
1180 (T_IDENTIFIER, 10, 2),
1181 (T_NEQ_REGEX, 12, 2),
1182 (T_STRING, 15, 4),
1183 (T_RIGHT_BRACE, 20, 1),
1184 (T_LEFT_BRACKET, 21, 1),
1185 (T_DURATION, 22, 2),
1186 (T_COLON, 24, 1),
1187 (T_RIGHT_BRACKET, 25, 1),
1188 ],
1189 None,
1190 ),
1191 (
1192 r#"min_over_time(rate(foo{bar="baz"}[2s])[5m:])[4m:3s]"#,
1193 vec![
1194 (T_IDENTIFIER, 0, 13),
1195 (T_LEFT_PAREN, 13, 1),
1196 (T_IDENTIFIER, 14, 4),
1197 (T_LEFT_PAREN, 18, 1),
1198 (T_IDENTIFIER, 19, 3),
1199 (T_LEFT_BRACE, 22, 1),
1200 (T_IDENTIFIER, 23, 3),
1201 (T_EQL, 26, 1),
1202 (T_STRING, 28, 3),
1203 (T_RIGHT_BRACE, 32, 1),
1204 (T_LEFT_BRACKET, 33, 1),
1205 (T_DURATION, 34, 2),
1206 (T_RIGHT_BRACKET, 36, 1),
1207 (T_RIGHT_PAREN, 37, 1),
1208 (T_LEFT_BRACKET, 38, 1),
1209 (T_DURATION, 39, 2),
1210 (T_COLON, 41, 1),
1211 (T_RIGHT_BRACKET, 42, 1),
1212 (T_RIGHT_PAREN, 43, 1),
1213 (T_LEFT_BRACKET, 44, 1),
1214 (T_DURATION, 45, 2),
1215 (T_COLON, 47, 1),
1216 (T_DURATION, 48, 2),
1217 (T_RIGHT_BRACKET, 50, 1),
1218 ],
1219 None,
1220 ),
1221 (
1222 r#"test:name{on!~"b:ar"}[4m:4s] offset 10m"#,
1223 vec![
1224 (T_METRIC_IDENTIFIER, 0, 9),
1225 (T_LEFT_BRACE, 9, 1),
1226 (T_IDENTIFIER, 10, 2),
1227 (T_NEQ_REGEX, 12, 2),
1228 (T_STRING, 15, 4),
1229 (T_RIGHT_BRACE, 20, 1),
1230 (T_LEFT_BRACKET, 21, 1),
1231 (T_DURATION, 22, 2),
1232 (T_COLON, 24, 1),
1233 (T_DURATION, 25, 2),
1234 (T_RIGHT_BRACKET, 27, 1),
1235 (T_OFFSET, 29, 6),
1236 (T_DURATION, 36, 3),
1237 ],
1238 None,
1239 ),
1240 (
1241 r#"min_over_time(rate(foo{bar="baz"}[2s])[5m:] offset 6m)[4m:3s]"#,
1242 vec![
1243 (T_IDENTIFIER, 0, 13),
1244 (T_LEFT_PAREN, 13, 1),
1245 (T_IDENTIFIER, 14, 4),
1246 (T_LEFT_PAREN, 18, 1),
1247 (T_IDENTIFIER, 19, 3),
1248 (T_LEFT_BRACE, 22, 1),
1249 (T_IDENTIFIER, 23, 3),
1250 (T_EQL, 26, 1),
1251 (T_STRING, 28, 3),
1252 (T_RIGHT_BRACE, 32, 1),
1253 (T_LEFT_BRACKET, 33, 1),
1254 (T_DURATION, 34, 2),
1255 (T_RIGHT_BRACKET, 36, 1),
1256 (T_RIGHT_PAREN, 37, 1),
1257 (T_LEFT_BRACKET, 38, 1),
1258 (T_DURATION, 39, 2),
1259 (T_COLON, 41, 1),
1260 (T_RIGHT_BRACKET, 42, 1),
1261 (T_OFFSET, 44, 6),
1262 (T_DURATION, 51, 2),
1263 (T_RIGHT_PAREN, 53, 1),
1264 (T_LEFT_BRACKET, 54, 1),
1265 (T_DURATION, 55, 2),
1266 (T_COLON, 57, 1),
1267 (T_DURATION, 58, 2),
1268 (T_RIGHT_BRACKET, 60, 1),
1269 ],
1270 None,
1271 ),
1272 (
1273 r#"test:name[ 5m]"#,
1274 vec![
1275 (T_METRIC_IDENTIFIER, 0, 9),
1276 (T_LEFT_BRACKET, 9, 1),
1277 (T_DURATION, 11, 2),
1278 (T_RIGHT_BRACKET, 13, 1),
1279 ],
1280 None,
1281 ),
1282 (
1283 r#"test:name{o:n!~"bar"}[4m:4s]"#,
1284 vec![
1285 (T_METRIC_IDENTIFIER, 0, 9),
1286 (T_LEFT_BRACE, 9, 1),
1287 (T_IDENTIFIER, 10, 1),
1288 ],
1289 Some("unexpected character inside braces: ':'"),
1290 ),
1291 (
1292 r#"test:name{on!~"bar"}[4m:4s:4h]"#,
1293 vec![
1294 (T_METRIC_IDENTIFIER, 0, 9),
1295 (T_LEFT_BRACE, 9, 1),
1296 (T_IDENTIFIER, 10, 2),
1297 (T_NEQ_REGEX, 12, 2),
1298 (T_STRING, 15, 3),
1299 (T_RIGHT_BRACE, 19, 1),
1300 (T_LEFT_BRACKET, 20, 1),
1301 (T_DURATION, 21, 2),
1302 (T_COLON, 23, 1),
1303 (T_DURATION, 24, 2),
1304 ],
1305 Some("unexpected second colon(:) in brackets"),
1306 ),
1307 (
1308 r#"test:name{on!~"bar"}[4m:4s:]"#,
1309 vec![
1310 (T_METRIC_IDENTIFIER, 0, 9),
1311 (T_LEFT_BRACE, 9, 1),
1312 (T_IDENTIFIER, 10, 2),
1313 (T_NEQ_REGEX, 12, 2),
1314 (T_STRING, 15, 3),
1315 (T_RIGHT_BRACE, 19, 1),
1316 (T_LEFT_BRACKET, 20, 1),
1317 (T_DURATION, 21, 2),
1318 (T_COLON, 23, 1),
1319 (T_DURATION, 24, 2),
1320 ],
1321 Some("unexpected second colon(:) in brackets"),
1322 ),
1323 (
1324 r#"test:name{on!~"bar"}[4m::]"#,
1325 vec![
1326 (T_METRIC_IDENTIFIER, 0, 9),
1327 (T_LEFT_BRACE, 9, 1),
1328 (T_IDENTIFIER, 10, 2),
1329 (T_NEQ_REGEX, 12, 2),
1330 (T_STRING, 15, 3),
1331 (T_RIGHT_BRACE, 19, 1),
1332 (T_LEFT_BRACKET, 20, 1),
1333 (T_DURATION, 21, 2),
1334 (T_COLON, 23, 1),
1335 ],
1336 Some("unexpected second colon(:) in brackets"),
1337 ),
1338 (
1339 r#"test:name{on!~"bar"}[:4s]"#,
1340 vec![
1341 (T_METRIC_IDENTIFIER, 0, 9),
1342 (T_LEFT_BRACE, 9, 1),
1343 (T_IDENTIFIER, 10, 2),
1344 (T_NEQ_REGEX, 12, 2),
1345 (T_STRING, 15, 3),
1346 (T_RIGHT_BRACE, 19, 1),
1347 (T_LEFT_BRACKET, 20, 1),
1348 ],
1349 Some("expect duration before first colon(:) in brackets"),
1350 ),
1351 ];
1352 assert_matches(cases);
1353 }
1354
1355 #[test]
1356 fn test_is_alpha() {
1357 assert!(is_alpha('_'));
1358 assert!(is_alpha('a'));
1359 assert!(is_alpha('z'));
1360 assert!(is_alpha('A'));
1361 assert!(is_alpha('Z'));
1362 assert!(!is_alpha('-'));
1363 assert!(!is_alpha('@'));
1364 assert!(!is_alpha('0'));
1365 assert!(!is_alpha('9'));
1366 }
1367
1368 #[test]
1369 fn test_is_alpha_numeric() {
1370 assert!(is_alpha_numeric('_'));
1371 assert!(is_alpha_numeric('a'));
1372 assert!(is_alpha_numeric('z'));
1373 assert!(is_alpha_numeric('A'));
1374 assert!(is_alpha_numeric('Z'));
1375 assert!(is_alpha_numeric('0'));
1376 assert!(is_alpha_numeric('9'));
1377 assert!(!is_alpha_numeric('-'));
1378 assert!(!is_alpha_numeric('@'));
1379 }
1380
1381 #[test]
1382 fn test_is_label() {
1383 assert!(is_label("_"));
1384 assert!(is_label("_up"));
1385 assert!(is_label("up"));
1386 assert!(is_label("up_"));
1387 assert!(is_label("up_system_1"));
1388
1389 assert!(!is_label(""));
1390 assert!(!is_label("0"));
1391 assert!(!is_label("0up"));
1392 assert!(!is_label("0_up"));
1393 }
1394}