1use crate::parser::token::*;
16use cfgrammar::NewlineCache;
17use lrlex::{DefaultLexeme, DefaultLexerTypes, LRNonStreamingLexer};
18use lrpar::Lexeme;
19use std::fmt::Debug;
20
21const ESCAPE_SYMBOLS: &str = r"abfnrtv\01234567xuU";
22const STRING_SYMBOLS: &str = r#"'"`"#;
23
24pub(crate) type LexemeType = DefaultLexeme<TokenId>;
25
26pub fn lexer(s: &str) -> Result<LRNonStreamingLexer<'_, '_, DefaultLexerTypes<TokenId>>, String> {
27 let lexemes: Vec<Result<LexemeType, String>> = Lexer::new(s).collect();
28 match lexemes.last() {
29 Some(Err(info)) => Err(info.into()),
30 Some(Ok(_)) => {
31 let lexemes = lexemes.into_iter().filter_map(|l| l.ok()).map(Ok).collect();
33 Ok(LRNonStreamingLexer::new(s, lexemes, NewlineCache::new()))
34 }
35 None => Err(format!("no expression found in input: '{s}'")),
36 }
37}
38
39#[derive(Debug)]
40enum State {
41 Start,
42 End,
43 Lexeme(TokenId),
44 Identifier,
45 KeywordOrIdentifier,
46 NumberOrDuration,
47 InsideBrackets,
48 InsideBraces,
49 LineComment,
50 Space,
51 String(char), Escape(char), Err(String),
54}
55
56#[derive(Debug)]
57struct Context {
58 chars: Vec<char>,
60 idx: usize, start: usize, pos: usize, paren_depth: usize, brace_open: bool, bracket_open: bool, got_colon: bool, eof: bool, }
70
71impl Context {
72 fn new(input: &str) -> Context {
73 Self {
74 chars: input.chars().collect(),
75 idx: 0,
76 start: 0,
77 pos: 0,
78
79 paren_depth: 0,
80 brace_open: false,
81 bracket_open: false,
82 got_colon: false,
83 eof: false,
84 }
85 }
86
87 fn pop(&mut self) -> Option<char> {
89 let ch = self.peek()?;
90 self.pos += ch.len_utf8();
91 self.idx += 1;
92 Some(ch)
93 }
94
95 fn backup(&mut self) -> bool {
98 if let Some(ch) = self.chars.get(self.idx - 1) {
99 self.pos -= ch.len_utf8();
100 self.idx -= 1;
101 return true;
102 };
103 false
104 }
105
106 fn peek(&self) -> Option<char> {
108 self.chars.get(self.idx).copied()
109 }
110
111 fn lexeme(&mut self, token_id: TokenId) -> LexemeType {
113 let start = self.start;
114 let len = self.pos - self.start;
115 DefaultLexeme::new(token_id, start, len)
116 }
117
118 fn ignore(&mut self) {
120 self.start = self.pos;
121 }
122
123 fn lexeme_string(&self) -> String {
125 let mut s = String::from("");
126 if self.idx == 0 {
127 return s;
128 }
129
130 let mut pos = self.pos;
131 let mut idx = self.idx;
132 while pos > self.start {
133 if let Some(&ch) = self.chars.get(idx - 1) {
134 pos -= ch.len_utf8();
135 idx -= 1;
136 s.push(ch);
137 };
138 }
139 s.chars().rev().collect()
140 }
141}
142
143#[derive(Debug)]
144struct Lexer {
145 state: State,
146 ctx: Context,
147}
148
149impl Lexer {
151 fn new(input: &str) -> Self {
152 let ctx = Context::new(input);
153 let state = State::Start;
154 Self { state, ctx }
155 }
156
157 fn is_inside_braces(&self) -> bool {
158 self.ctx.brace_open
159 }
160
161 fn jump_outof_braces(&mut self) {
162 self.ctx.brace_open = false;
163 }
164
165 fn dive_into_braces(&mut self) {
166 self.ctx.brace_open = true;
167 }
168
169 fn is_inside_brackets(&self) -> bool {
170 self.ctx.bracket_open
171 }
172
173 fn jump_outof_brackets(&mut self) {
174 self.ctx.bracket_open = false;
175 }
176
177 fn dive_into_brackets(&mut self) {
178 self.ctx.bracket_open = true;
179 }
180
181 fn is_colon_scanned(&self) -> bool {
182 self.ctx.got_colon
183 }
184
185 fn set_colon_scanned(&mut self) {
186 self.ctx.got_colon = true;
187 }
188
189 fn reset_colon_scanned(&mut self) {
190 self.ctx.got_colon = false;
191 }
192
193 fn inc_paren_depth(&mut self) -> bool {
195 if self.ctx.paren_depth < usize::MAX {
196 self.ctx.paren_depth += 1;
197 return true;
198 }
199 false
200 }
201
202 fn dec_paren_depth(&mut self) -> bool {
204 if self.ctx.paren_depth >= 1 {
205 self.ctx.paren_depth -= 1;
206 return true;
207 }
208 false
209 }
210
211 fn is_paren_balanced(&self) -> bool {
212 self.ctx.paren_depth == 0
213 }
214
215 fn pop(&mut self) -> Option<char> {
216 self.ctx.pop()
217 }
218
219 fn backup(&mut self) -> bool {
220 self.ctx.backup()
221 }
222
223 fn peek(&self) -> Option<char> {
224 self.ctx.peek()
225 }
226
227 fn lexeme(&mut self, token_id: TokenId) -> LexemeType {
230 let lexeme = self.ctx.lexeme(token_id);
231 self.ctx.ignore();
232 lexeme
233 }
234
235 fn lexeme_string(&self) -> String {
236 self.ctx.lexeme_string()
237 }
238
239 fn ignore(&mut self) {
240 self.ctx.ignore();
241 }
242
243 fn is_eof(&self) -> bool {
244 self.ctx.eof
245 }
246
247 fn set_eof(&mut self) {
248 self.ctx.eof = true;
249 }
250}
251
252impl Lexer {
254 fn shift(&mut self) {
255 self.state = match self.state {
258 State::Start => self.start(),
259 State::End => State::Err("End state can not shift forward.".into()),
260 State::Lexeme(_) => State::Start,
261 State::String(ch) => self.accept_string(ch),
262 State::KeywordOrIdentifier => self.accept_keyword_or_identifier(),
263 State::Identifier => self.accept_identifier(),
264 State::NumberOrDuration => self.accept_number_or_duration(),
265 State::InsideBrackets => self.inside_brackets(),
266 State::InsideBraces => self.inside_braces(),
267 State::LineComment => self.ignore_comment_line(),
268 State::Escape(ch) => self.accept_escape(ch),
269 State::Space => self.ignore_space(),
270 State::Err(_) => State::End,
271 };
272 }
273
274 fn start(&mut self) -> State {
275 if self.is_inside_braces() {
276 return State::InsideBraces;
277 }
278
279 if self.is_inside_brackets() {
280 return State::InsideBrackets;
281 }
282
283 let c = match self.pop() {
284 None => {
285 if !self.is_paren_balanced() {
286 return State::Err("unclosed left parenthesis".into());
287 }
288
289 if !self.is_eof() {
290 self.set_eof();
291 return State::Lexeme(T_EOF);
292 }
293
294 return State::End;
295 }
296 Some(ch) => ch,
297 };
298
299 match c {
302 '#' => State::LineComment,
303 '@' => State::Lexeme(T_AT),
304 ',' => State::Lexeme(T_COMMA),
305 '*' => State::Lexeme(T_MUL),
306 '/' => State::Lexeme(T_DIV),
307 '%' => State::Lexeme(T_MOD),
308 '+' => State::Lexeme(T_ADD),
309 '-' => State::Lexeme(T_SUB),
310 '^' => State::Lexeme(T_POW),
311 '=' => match self.peek() {
312 Some('=') => {
313 self.pop();
314 State::Lexeme(T_EQLC)
315 }
316 Some('~') => State::Err("unexpected character after '=': '~'".into()),
318 _ => State::Lexeme(T_EQL),
319 },
320 '!' => match self.pop() {
321 Some('=') => State::Lexeme(T_NEQ),
322 Some(ch) => State::Err(format!("unexpected character after '!': '{ch}'")),
323 None => State::Err("'!' can not be at the end".into()),
324 },
325 '<' => match self.peek() {
326 Some('=') => {
327 self.pop();
328 State::Lexeme(T_LTE)
329 }
330 _ => State::Lexeme(T_LSS),
331 },
332 '>' => match self.peek() {
333 Some('=') => {
334 self.pop();
335 State::Lexeme(T_GTE)
336 }
337 _ => State::Lexeme(T_GTR),
338 },
339 ch if ch.is_ascii_whitespace() => self.ignore_space(),
340 ch if ch.is_ascii_digit() => State::NumberOrDuration,
341 '.' => match self.peek() {
342 Some(ch) if ch.is_ascii_digit() => State::NumberOrDuration,
343 Some(ch) => State::Err(format!("unexpected character after '.': '{ch}'")),
344 None => State::Err("unexpected character: '.'".into()),
345 },
346 ch if is_alpha(ch) || ch == ':' => State::KeywordOrIdentifier,
347 ch if STRING_SYMBOLS.contains(ch) => State::String(ch),
348 '(' => {
349 if self.inc_paren_depth() {
350 return State::Lexeme(T_LEFT_PAREN);
351 }
352 State::Err("too many left parentheses".into())
353 }
354 ')' => {
355 if self.is_paren_balanced() {
356 return State::Err("unexpected right parenthesis ')'".into());
357 }
358 if self.dec_paren_depth() {
359 return State::Lexeme(T_RIGHT_PAREN);
360 }
361 State::Err("unexpected right parenthesis ')'".into())
362 }
363 '{' => {
364 self.dive_into_braces();
365 State::Lexeme(T_LEFT_BRACE)
366 }
367 '}' => State::Err("unexpected right brace '}'".into()),
369 '[' => {
370 self.reset_colon_scanned();
371 self.dive_into_brackets();
372 State::Lexeme(T_LEFT_BRACKET)
373 }
374 ']' => State::Err("unexpected right bracket ']'".into()),
376 ch => State::Err(format!("unexpected character: {ch:?}")),
377 }
378 }
379
380 fn accept_number_or_duration(&mut self) -> State {
382 self.backup();
383 if self.scan_number() {
384 return State::Lexeme(T_NUMBER);
385 }
386
387 if self.accept_remaining_duration() {
389 return State::Lexeme(T_DURATION);
390 }
391
392 self.pop();
394 State::Err(format!(
395 "bad number or duration syntax: {}",
396 self.lexeme_string()
397 ))
398 }
399
400 fn accept_keyword_or_identifier(&mut self) -> State {
402 while let Some(ch) = self.peek() {
403 if is_alpha_numeric(ch) || ch == ':' {
404 self.pop();
405 } else {
406 break;
407 }
408 }
409
410 let s = self.lexeme_string();
411 let s_lower = s.to_lowercase();
412 match get_keyword_token(&s_lower) {
413 Some(token_id) => {
414 if token_id == T_FILL || token_id == T_FILL_LEFT || token_id == T_FILL_RIGHT {
417 let mut idx = self.ctx.idx;
419 let mut found_lparen = false;
420 while let Some(&ch) = self.ctx.chars.get(idx) {
421 if ch.is_ascii_whitespace() {
422 idx += 1;
423 } else if ch == '(' {
424 found_lparen = true;
425 break;
426 } else {
427 break;
428 }
429 }
430 if !found_lparen {
431 return State::Lexeme(T_IDENTIFIER);
433 }
434 }
435 State::Lexeme(token_id)
436 }
437 None if s.contains(':') => State::Lexeme(T_METRIC_IDENTIFIER),
438 _ => State::Lexeme(T_IDENTIFIER),
439 }
440 }
441
442 fn ignore_comment_line(&mut self) -> State {
444 while let Some(ch) = self.pop() {
445 if ch == '\r' || ch == '\n' {
446 break;
447 }
448 }
449 self.ignore();
450 State::Start
451 }
452
453 fn accept<F>(&mut self, f: F) -> bool
455 where
456 F: Fn(char) -> bool,
457 {
458 if let Some(ch) = self.peek() {
459 if f(ch) {
460 self.pop();
461 return true;
462 }
463 }
464 false
465 }
466
467 fn accept_run<F>(&mut self, f: F)
469 where
470 F: Fn(char) -> bool,
471 {
472 while let Some(ch) = self.peek() {
473 if f(ch) {
474 self.pop();
475 } else {
476 break;
477 }
478 }
479 }
480
481 fn ignore_space(&mut self) -> State {
483 self.backup(); self.accept_run(|ch| ch.is_ascii_whitespace());
485 self.ignore();
486 State::Start
487 }
488
489 fn scan_number(&mut self) -> bool {
492 let mut hex_digit = false;
493 if self.accept(|ch| ch == '0') && self.accept(|ch| ch == 'x' || ch == 'X') {
494 hex_digit = true;
495 }
496 let is_valid_digit = |ch: char| -> bool {
497 if hex_digit {
498 ch.is_ascii_hexdigit()
499 } else {
500 ch.is_ascii_digit()
501 }
502 };
503
504 self.accept_run(is_valid_digit);
505 if self.accept(|ch| ch == '.') {
506 self.accept_run(is_valid_digit);
507 }
508 if self.accept(|ch| ch == 'e' || ch == 'E') {
509 self.accept(|ch| ch == '+' || ch == '-');
510 self.accept_run(|ch| ch.is_ascii_digit());
511 }
512
513 !matches!(self.peek(), Some(ch) if is_alpha(ch) || ch == '.')
517 }
518
519 fn accept_remaining_duration(&mut self) -> bool {
522 if !self.accept(|ch| "smhdwy".contains(ch)) {
524 return false;
525 }
526 self.accept(|ch| ch == 's');
529
530 while self.accept(|ch| ch.is_ascii_digit()) {
532 self.accept_run(|ch| ch.is_ascii_digit());
533 if !self.accept(|ch| "smhdw".contains(ch)) {
535 return false;
536 }
537 self.accept(|ch| ch == 's');
540 }
541
542 !matches!(self.peek(), Some(ch) if is_alpha_numeric(ch))
543 }
544
545 fn accept_escape(&mut self, symbol: char) -> State {
549 match self.pop() {
550 Some(ch) if ch == symbol || ESCAPE_SYMBOLS.contains(ch) => State::String(symbol),
551 Some(ch) => State::Err(format!("unknown escape sequence '{ch}'")),
552 None => State::Err("escape sequence not terminated".into()),
553 }
554 }
555
556 fn accept_string(&mut self, symbol: char) -> State {
558 while let Some(ch) = self.pop() {
559 if ch == '\\' {
560 return State::Escape(symbol);
561 }
562
563 if ch == symbol {
564 return State::Lexeme(T_STRING);
565 }
566 }
567
568 State::Err(format!("unterminated quoted string {symbol}"))
569 }
570
571 fn inside_braces(&mut self) -> State {
574 match self.pop() {
575 Some('#') => State::LineComment,
576 Some(',') => State::Lexeme(T_COMMA),
577 Some('o') | Some('O') => {
578 if let Some('r') | Some('R') = self.peek() {
579 self.pop();
580 if let Some(' ') = self.peek() {
581 State::Lexeme(T_LOR)
582 } else {
583 State::Identifier
584 }
585 } else {
586 State::Identifier
587 }
588 }
589 Some(ch) if ch.is_ascii_whitespace() => State::Space,
590 Some(ch) if is_alpha(ch) => State::Identifier,
591 Some(ch) if STRING_SYMBOLS.contains(ch) => State::String(ch),
592 Some('=') => match self.peek() {
593 Some('~') => {
594 self.pop();
595 State::Lexeme(T_EQL_REGEX)
596 }
597 _ => State::Lexeme(T_EQL),
598 },
599 Some('!') => match self.pop() {
600 Some('~') => State::Lexeme(T_NEQ_REGEX),
601 Some('=') => State::Lexeme(T_NEQ),
602 Some(ch) => State::Err(format!(
603 "unexpected character after '!' inside braces: '{ch}'"
604 )),
605 None => State::Err("'!' can not be at the end".into()),
606 },
607 Some('{') => State::Err("unexpected left brace '{' inside braces".into()),
608 Some('}') => {
609 self.jump_outof_braces();
610 State::Lexeme(T_RIGHT_BRACE)
611 }
612 Some(ch) => State::Err(format!("unexpected character inside braces: '{ch}'")),
613 None => State::Err("unexpected end of input inside braces".into()),
614 }
615 }
616
617 fn last_char_matches<F>(&mut self, f: F) -> bool
619 where
620 F: Fn(char) -> bool,
621 {
622 if !self.backup() {
624 return false;
625 }
626 let matched = matches!(self.peek(), Some(ch) if f(ch));
627 self.pop();
628 matched
629 }
630
631 fn is_colon_the_first_char_in_brackets(&mut self) -> bool {
633 self.backup();
635 let matched = self.last_char_matches(|ch| ch == '[');
636 self.pop();
637 matched
638 }
639
640 fn inside_brackets(&mut self) -> State {
642 match self.pop() {
643 Some(ch) if ch.is_ascii_whitespace() => State::Space,
644 Some(':') => {
645 if self.is_colon_scanned() {
646 return State::Err("unexpected second colon(:) in brackets".into());
647 }
648
649 if self.is_colon_the_first_char_in_brackets() {
650 return State::Err("expect duration before first colon(:) in brackets".into());
651 }
652
653 self.set_colon_scanned();
654 State::Lexeme(T_COLON)
655 }
656 Some(ch) if ch.is_ascii_digit() => self.accept_number_or_duration(),
657 Some(']') => {
658 self.jump_outof_brackets();
659 self.reset_colon_scanned();
660 State::Lexeme(T_RIGHT_BRACKET)
661 }
662 Some('[') => State::Err("unexpected left brace '[' inside brackets".into()),
663 Some(ch) => State::Err(format!("unexpected character inside brackets: '{ch}'")),
664 None => State::Err("unexpected end of input inside brackets".into()),
665 }
666 }
667
668 fn accept_identifier(&mut self) -> State {
671 self.accept_run(is_alpha_numeric);
672 State::Lexeme(T_IDENTIFIER)
673 }
674}
675
676impl Iterator for Lexer {
678 type Item = Result<LexemeType, String>;
679
680 fn next(&mut self) -> Option<Self::Item> {
681 self.shift();
682 match &self.state {
683 State::Lexeme(token_id) => Some(Ok(self.lexeme(*token_id))),
684 State::Err(info) => Some(Err(info.clone())),
685 State::End => None,
686 _ => self.next(),
687 }
688 }
689}
690
691fn is_alpha_numeric(ch: char) -> bool {
692 is_alpha(ch) || ch.is_ascii_digit()
693}
694
695fn is_alpha(ch: char) -> bool {
696 ch == '_' || ch.is_ascii_alphabetic()
697}
698
699pub(crate) fn is_label(s: &str) -> bool {
700 if s.is_empty() {
701 return false;
702 }
703 let mut chars = s.chars();
704 match chars.next() {
705 None => false,
706 Some(ch) if !is_alpha(ch) => false,
707 Some(_) => {
708 for ch in chars {
709 if !is_alpha_numeric(ch) {
710 return false;
711 }
712 }
713 true
714 }
715 }
716}
717
718#[cfg(test)]
719mod tests {
720 use super::*;
721
722 type LexemeTuple = (TokenId, usize, usize);
723 type MatchTuple = (&'static str, Vec<LexemeTuple>, Option<&'static str>);
727
728 type Case = (
729 &'static str,
730 Vec<Result<LexemeType, String>>,
731 Vec<Result<LexemeType, String>>,
732 );
733
734 fn assert_matches(v: Vec<MatchTuple>) {
735 let cases: Vec<Case> = v
736 .into_iter()
737 .map(|(input, lexemes, err)| {
738 let mut expected: Vec<Result<LexemeType, String>> = lexemes
739 .into_iter()
740 .map(|(token_id, start, len)| Ok(LexemeType::new(token_id, start, len)))
741 .collect();
742
743 if let Some(s) = err {
744 expected.push(Err(s.to_string()));
745 }
746
747 let actual: Vec<Result<LexemeType, String>> = Lexer::new(input)
748 .filter(|r| !matches!(r, Ok(l) if l.tok_id() == T_EOF))
750 .collect();
751 (input, expected, actual)
752 })
753 .collect();
754
755 for (input, expected, actual) in cases.iter() {
756 assert_eq!(expected, actual, "\n<input>: {input}");
757 }
758 }
759
760 #[test]
761 fn test_common() {
762 let cases = vec![
763 (",", vec![(T_COMMA, 0, 1)], None),
764 (
765 "()",
766 vec![(T_LEFT_PAREN, 0, 1), (T_RIGHT_PAREN, 1, 1)],
767 None,
768 ),
769 (
770 "{}",
771 vec![(T_LEFT_BRACE, 0, 1), (T_RIGHT_BRACE, 1, 1)],
772 None,
773 ),
774 (
775 "[5m]",
776 vec![
777 (T_LEFT_BRACKET, 0, 1),
778 (T_DURATION, 1, 2),
779 (T_RIGHT_BRACKET, 3, 1),
780 ],
781 None,
782 ),
783 (
784 "[ 5m]",
785 vec![
786 (T_LEFT_BRACKET, 0, 1),
787 (T_DURATION, 2, 2),
788 (T_RIGHT_BRACKET, 4, 1),
789 ],
790 None,
791 ),
792 (
793 "[ 5m]",
794 vec![
795 (T_LEFT_BRACKET, 0, 1),
796 (T_DURATION, 3, 2),
797 (T_RIGHT_BRACKET, 5, 1),
798 ],
799 None,
800 ),
801 (
802 "[ 5m ]",
803 vec![
804 (T_LEFT_BRACKET, 0, 1),
805 (T_DURATION, 3, 2),
806 (T_RIGHT_BRACKET, 6, 1),
807 ],
808 None,
809 ),
810 ("\r\n\r", vec![], None),
811 ];
812
813 assert_matches(cases);
814 }
815
816 #[test]
817 fn test_numbers() {
818 let cases = vec![
819 ("1", vec![(T_NUMBER, 0, 1)], None),
820 ("4.23", vec![(T_NUMBER, 0, 4)], None),
821 (".3", vec![(T_NUMBER, 0, 2)], None),
822 ("5.", vec![(T_NUMBER, 0, 2)], None),
823 ("NaN", vec![(T_NUMBER, 0, 3)], None),
824 ("nAN", vec![(T_NUMBER, 0, 3)], None),
825 ("NaN 123", vec![(T_NUMBER, 0, 3), (T_NUMBER, 4, 3)], None),
826 ("NaN123", vec![(T_IDENTIFIER, 0, 6)], None),
827 ("iNf", vec![(T_NUMBER, 0, 3)], None),
828 ("Inf", vec![(T_NUMBER, 0, 3)], None),
829 ("+Inf", vec![(T_ADD, 0, 1), (T_NUMBER, 1, 3)], None),
830 (
831 "+Inf 123",
832 vec![(T_ADD, 0, 1), (T_NUMBER, 1, 3), (T_NUMBER, 5, 3)],
833 None,
834 ),
835 (
836 "-Inf 123",
837 vec![(T_SUB, 0, 1), (T_NUMBER, 1, 3), (T_NUMBER, 5, 3)],
838 None,
839 ),
840 ("Infoo", vec![(T_IDENTIFIER, 0, 5)], None),
841 ("-Inf123", vec![(T_SUB, 0, 1), (T_IDENTIFIER, 1, 6)], None),
842 (
843 "-Inf 123",
844 vec![(T_SUB, 0, 1), (T_NUMBER, 1, 3), (T_NUMBER, 5, 3)],
845 None,
846 ),
847 ("0x123", vec![(T_NUMBER, 0, 5)], None),
848 ];
849 assert_matches(cases);
850 }
851
852 #[test]
853 fn test_strings() {
854 let cases = vec![
855 ("\"test\\tsequence\"", vec![(T_STRING, 0, 16)], None),
856 ("\"test\\\\.expression\"", vec![(T_STRING, 0, 19)], None),
857 (
858 "\"test\\.expression\"",
859 vec![],
860 Some("unknown escape sequence '.'"),
861 ),
862 (
863 "`test\\.expression`",
864 vec![],
865 Some("unknown escape sequence '.'"),
866 ),
867 (".٩", vec![], Some("unexpected character after '.': '٩'")),
868 ];
872 assert_matches(cases);
873 }
874
875 #[test]
876 fn test_durations() {
877 let cases = vec![
878 ("5s", vec![(T_DURATION, 0, 2)], None),
879 ("123m", vec![(T_DURATION, 0, 4)], None),
880 ("1h", vec![(T_DURATION, 0, 2)], None),
881 ("3w", vec![(T_DURATION, 0, 2)], None),
882 ("1y", vec![(T_DURATION, 0, 2)], None),
883 ];
884 assert_matches(cases);
885 }
886
887 #[test]
888 fn test_identifiers() {
889 let cases = vec![
890 ("abc", vec![(T_IDENTIFIER, 0, 3)], None),
891 ("a:bc", vec![(T_METRIC_IDENTIFIER, 0, 4)], None),
892 (
893 "abc d",
894 vec![(T_IDENTIFIER, 0, 3), (T_IDENTIFIER, 4, 1)],
895 None,
896 ),
897 (":bc", vec![(T_METRIC_IDENTIFIER, 0, 3)], None),
898 ("0a:bc", vec![], Some("bad number or duration syntax: 0a")),
899 ];
900 assert_matches(cases);
901 }
902
903 #[test]
904 fn test_comments() {
905 let cases = vec![
906 ("# some comment", vec![], None),
907 ("5 # 1+1\n5", vec![(T_NUMBER, 0, 1), (T_NUMBER, 8, 1)], None),
908 ];
909 assert_matches(cases);
910 }
911
912 #[test]
913 fn test_operators() {
914 let cases = vec![
915 ("=", vec![(T_EQL, 0, 1)], None),
916 (
917 "{=}",
918 vec![(T_LEFT_BRACE, 0, 1), (T_EQL, 1, 1), (T_RIGHT_BRACE, 2, 1)],
919 None,
920 ),
921 ("==", vec![(T_EQLC, 0, 2)], None),
922 ("!=", vec![(T_NEQ, 0, 2)], None),
923 ("<", vec![(T_LSS, 0, 1)], None),
924 (">", vec![(T_GTR, 0, 1)], None),
925 (">=", vec![(T_GTE, 0, 2)], None),
926 ("<=", vec![(T_LTE, 0, 2)], None),
927 ("+", vec![(T_ADD, 0, 1)], None),
928 ("-", vec![(T_SUB, 0, 1)], None),
929 ("*", vec![(T_MUL, 0, 1)], None),
930 ("/", vec![(T_DIV, 0, 1)], None),
931 ("^", vec![(T_POW, 0, 1)], None),
932 ("%", vec![(T_MOD, 0, 1)], None),
933 ("AND", vec![(T_LAND, 0, 3)], None),
934 ("or", vec![(T_LOR, 0, 2)], None),
935 ("unless", vec![(T_LUNLESS, 0, 6)], None),
936 ("@", vec![(T_AT, 0, 1)], None),
937 ];
938 assert_matches(cases);
939 }
940
941 #[test]
942 fn test_aggregators() {
943 let cases = vec![
944 ("sum", vec![(T_SUM, 0, 3)], None),
945 ("AVG", vec![(T_AVG, 0, 3)], None),
946 ("Max", vec![(T_MAX, 0, 3)], None),
947 ("min", vec![(T_MIN, 0, 3)], None),
948 ("count", vec![(T_COUNT, 0, 5)], None),
949 ("stdvar", vec![(T_STDVAR, 0, 6)], None),
950 ("stddev", vec![(T_STDDEV, 0, 6)], None),
951 ];
952 assert_matches(cases);
953 }
954
955 #[test]
956 fn test_keywords() {
957 let cases = vec![
958 ("offset", vec![(T_OFFSET, 0, 6)], None),
959 ("by", vec![(T_BY, 0, 2)], None),
960 ("without", vec![(T_WITHOUT, 0, 7)], None),
961 ("on", vec![(T_ON, 0, 2)], None),
962 ("ignoring", vec![(T_IGNORING, 0, 8)], None),
963 ("group_left", vec![(T_GROUP_LEFT, 0, 10)], None),
964 ("group_right", vec![(T_GROUP_RIGHT, 0, 11)], None),
965 ("bool", vec![(T_BOOL, 0, 4)], None),
966 ("atan2", vec![(T_ATAN2, 0, 5)], None),
967 ("fill", vec![(T_IDENTIFIER, 0, 4)], None),
969 ("fill_left", vec![(T_IDENTIFIER, 0, 9)], None),
970 ("fill_right", vec![(T_IDENTIFIER, 0, 10)], None),
971 (
973 "fill(1)",
974 vec![
975 (T_FILL, 0, 4),
976 (T_LEFT_PAREN, 4, 1),
977 (T_NUMBER, 5, 1),
978 (T_RIGHT_PAREN, 6, 1),
979 ],
980 None,
981 ),
982 (
983 "fill_left(1)",
984 vec![
985 (T_FILL_LEFT, 0, 9),
986 (T_LEFT_PAREN, 9, 1),
987 (T_NUMBER, 10, 1),
988 (T_RIGHT_PAREN, 11, 1),
989 ],
990 None,
991 ),
992 (
993 "fill_right(2)",
994 vec![
995 (T_FILL_RIGHT, 0, 10),
996 (T_LEFT_PAREN, 10, 1),
997 (T_NUMBER, 11, 1),
998 (T_RIGHT_PAREN, 12, 1),
999 ],
1000 None,
1001 ),
1002 (
1004 "fill (1)",
1005 vec![
1006 (T_FILL, 0, 4),
1007 (T_LEFT_PAREN, 5, 1),
1008 (T_NUMBER, 6, 1),
1009 (T_RIGHT_PAREN, 7, 1),
1010 ],
1011 None,
1012 ),
1013 (
1014 "fill_left (1)",
1015 vec![
1016 (T_FILL_LEFT, 0, 9),
1017 (T_LEFT_PAREN, 10, 1),
1018 (T_NUMBER, 11, 1),
1019 (T_RIGHT_PAREN, 12, 1),
1020 ],
1021 None,
1022 ),
1023 (
1024 "fill_right (2)",
1025 vec![
1026 (T_FILL_RIGHT, 0, 10),
1027 (T_LEFT_PAREN, 11, 1),
1028 (T_NUMBER, 12, 1),
1029 (T_RIGHT_PAREN, 13, 1),
1030 ],
1031 None,
1032 ),
1033 ];
1034 assert_matches(cases);
1035 }
1036
1037 #[test]
1038 fn test_preprocessors() {
1039 let cases = vec![
1040 ("start", vec![(T_START, 0, 5)], None),
1041 ("end", vec![(T_END, 0, 3)], None),
1042 ];
1043 assert_matches(cases);
1044 }
1045
1046 #[test]
1047 fn test_selectors() {
1048 let cases = vec![
1049 ("北京", vec![], Some("unexpected character: '北'")),
1050 ("北京='a'", vec![], Some("unexpected character: '北'")),
1051 ("0a='a'", vec![], Some("bad number or duration syntax: 0a")),
1052 (
1053 "{foo='bar'}",
1054 vec![
1055 (T_LEFT_BRACE, 0, 1),
1056 (T_IDENTIFIER, 1, 3),
1057 (T_EQL, 4, 1),
1058 (T_STRING, 5, 5),
1059 (T_RIGHT_BRACE, 10, 1),
1060 ],
1061 None,
1062 ),
1063 (
1064 r#"{foo="bar"}"#,
1065 vec![
1066 (T_LEFT_BRACE, 0, 1),
1067 (T_IDENTIFIER, 1, 3),
1068 (T_EQL, 4, 1),
1069 (T_STRING, 5, 5),
1070 (T_RIGHT_BRACE, 10, 1),
1071 ],
1072 None,
1073 ),
1074 (
1075 r#"{foo="bar\"bar"}"#,
1076 vec![
1077 (T_LEFT_BRACE, 0, 1),
1078 (T_IDENTIFIER, 1, 3),
1079 (T_EQL, 4, 1),
1080 (T_STRING, 5, 10),
1081 (T_RIGHT_BRACE, 15, 1),
1082 ],
1083 None,
1084 ),
1085 (
1086 r#"{NaN != "bar" }"#,
1087 vec![
1088 (T_LEFT_BRACE, 0, 1),
1089 (T_IDENTIFIER, 1, 3),
1090 (T_NEQ, 5, 2),
1091 (T_STRING, 8, 5),
1092 (T_RIGHT_BRACE, 14, 1),
1093 ],
1094 None,
1095 ),
1096 (
1097 r#"{alert=~"bar" }"#,
1098 vec![
1099 (T_LEFT_BRACE, 0, 1),
1100 (T_IDENTIFIER, 1, 5),
1101 (T_EQL_REGEX, 6, 2),
1102 (T_STRING, 8, 5),
1103 (T_RIGHT_BRACE, 14, 1),
1104 ],
1105 None,
1106 ),
1107 (
1108 r#"{on!~"bar"}"#,
1109 vec![
1110 (T_LEFT_BRACE, 0, 1),
1111 (T_IDENTIFIER, 1, 2),
1112 (T_NEQ_REGEX, 3, 2),
1113 (T_STRING, 5, 5),
1114 (T_RIGHT_BRACE, 10, 1),
1115 ],
1116 None,
1117 ),
1118 (
1119 r#"{alert!#"bar"}"#,
1120 vec![(T_LEFT_BRACE, 0, 1), (T_IDENTIFIER, 1, 5)],
1121 Some("unexpected character after '!' inside braces: '#'"),
1122 ),
1123 (
1124 r#"{foo:a="bar"}"#,
1125 vec![(T_LEFT_BRACE, 0, 1), (T_IDENTIFIER, 1, 3)],
1126 Some("unexpected character inside braces: ':'"),
1127 ),
1128 ];
1129 assert_matches(cases);
1130 }
1131
1132 #[test]
1133 fn test_common_errors() {
1134 let cases = vec![
1135 ("=~", vec![], Some("unexpected character after '=': '~'")),
1136 ("!~", vec![], Some("unexpected character after '!': '~'")),
1137 ("!(", vec![], Some("unexpected character after '!': '('")),
1138 ("1a", vec![], Some("bad number or duration syntax: 1a")),
1139 ];
1140 assert_matches(cases);
1141 }
1142
1143 #[test]
1144 fn test_mismatched_parentheses() {
1145 let cases = vec![
1146 (
1147 "(",
1148 vec![(T_LEFT_PAREN, 0, 1)],
1149 Some("unclosed left parenthesis"),
1150 ),
1151 (")", vec![], Some("unexpected right parenthesis ')'")),
1152 (
1153 "())",
1154 vec![(T_LEFT_PAREN, 0, 1), (T_RIGHT_PAREN, 1, 1)],
1155 Some("unexpected right parenthesis ')'"),
1156 ),
1157 (
1158 "(()",
1159 vec![
1160 (T_LEFT_PAREN, 0, 1),
1161 (T_LEFT_PAREN, 1, 1),
1162 (T_RIGHT_PAREN, 2, 1),
1163 ],
1164 Some("unclosed left parenthesis"),
1165 ),
1166 (
1167 "{",
1168 vec![(T_LEFT_BRACE, 0, 1)],
1169 Some("unexpected end of input inside braces"),
1170 ),
1171 ("}", vec![], Some("unexpected right brace '}'")),
1172 (
1173 "{{",
1174 vec![(T_LEFT_BRACE, 0, 1)],
1175 Some("unexpected left brace '{' inside braces"),
1176 ),
1177 (
1178 "{{}}",
1179 vec![(T_LEFT_BRACE, 0, 1)],
1180 Some("unexpected left brace '{' inside braces"),
1181 ),
1182 (
1183 "[",
1184 vec![(T_LEFT_BRACKET, 0, 1)],
1185 Some("unexpected end of input inside brackets"),
1186 ),
1187 (
1188 "[[",
1189 vec![(T_LEFT_BRACKET, 0, 1)],
1190 Some("unexpected left brace '[' inside brackets"),
1191 ),
1192 (
1193 "[]]",
1194 vec![(T_LEFT_BRACKET, 0, 1), (T_RIGHT_BRACKET, 1, 1)],
1195 Some("unexpected right bracket ']'"),
1196 ),
1197 (
1198 "[[]]",
1199 vec![(T_LEFT_BRACKET, 0, 1)],
1200 Some("unexpected left brace '[' inside brackets"),
1201 ),
1202 ("]", vec![], Some("unexpected right bracket ']'")),
1203 ];
1204 assert_matches(cases);
1205 }
1206
1207 #[test]
1208 fn test_subqueries() {
1209 let cases = vec![
1210 (
1211 r#"test_name{on!~"bar"}[4m:4s]"#,
1212 vec![
1213 (T_IDENTIFIER, 0, 9),
1214 (T_LEFT_BRACE, 9, 1),
1215 (T_IDENTIFIER, 10, 2),
1216 (T_NEQ_REGEX, 12, 2),
1217 (T_STRING, 14, 5),
1218 (T_RIGHT_BRACE, 19, 1),
1219 (T_LEFT_BRACKET, 20, 1),
1220 (T_DURATION, 21, 2),
1221 (T_COLON, 23, 1),
1222 (T_DURATION, 24, 2),
1223 (T_RIGHT_BRACKET, 26, 1),
1224 ],
1225 None,
1226 ),
1227 (
1228 r#"test:name{on!~"bar"}[4m:4s]"#,
1229 vec![
1230 (T_METRIC_IDENTIFIER, 0, 9),
1231 (T_LEFT_BRACE, 9, 1),
1232 (T_IDENTIFIER, 10, 2),
1233 (T_NEQ_REGEX, 12, 2),
1234 (T_STRING, 14, 5),
1235 (T_RIGHT_BRACE, 19, 1),
1236 (T_LEFT_BRACKET, 20, 1),
1237 (T_DURATION, 21, 2),
1238 (T_COLON, 23, 1),
1239 (T_DURATION, 24, 2),
1240 (T_RIGHT_BRACKET, 26, 1),
1241 ],
1242 None,
1243 ),
1244 (
1245 r#"test:name{on!~"b:ar"}[4m:4s]"#,
1246 vec![
1247 (T_METRIC_IDENTIFIER, 0, 9),
1248 (T_LEFT_BRACE, 9, 1),
1249 (T_IDENTIFIER, 10, 2),
1250 (T_NEQ_REGEX, 12, 2),
1251 (T_STRING, 14, 6),
1252 (T_RIGHT_BRACE, 20, 1),
1253 (T_LEFT_BRACKET, 21, 1),
1254 (T_DURATION, 22, 2),
1255 (T_COLON, 24, 1),
1256 (T_DURATION, 25, 2),
1257 (T_RIGHT_BRACKET, 27, 1),
1258 ],
1259 None,
1260 ),
1261 (
1262 r#"test:name{on!~"b:ar"}[4m:]"#,
1263 vec![
1264 (T_METRIC_IDENTIFIER, 0, 9),
1265 (T_LEFT_BRACE, 9, 1),
1266 (T_IDENTIFIER, 10, 2),
1267 (T_NEQ_REGEX, 12, 2),
1268 (T_STRING, 14, 6),
1269 (T_RIGHT_BRACE, 20, 1),
1270 (T_LEFT_BRACKET, 21, 1),
1271 (T_DURATION, 22, 2),
1272 (T_COLON, 24, 1),
1273 (T_RIGHT_BRACKET, 25, 1),
1274 ],
1275 None,
1276 ),
1277 (
1278 r#"min_over_time(rate(foo{bar="baz"}[2s])[5m:])[4m:3s]"#,
1279 vec![
1280 (T_IDENTIFIER, 0, 13),
1281 (T_LEFT_PAREN, 13, 1),
1282 (T_IDENTIFIER, 14, 4),
1283 (T_LEFT_PAREN, 18, 1),
1284 (T_IDENTIFIER, 19, 3),
1285 (T_LEFT_BRACE, 22, 1),
1286 (T_IDENTIFIER, 23, 3),
1287 (T_EQL, 26, 1),
1288 (T_STRING, 27, 5),
1289 (T_RIGHT_BRACE, 32, 1),
1290 (T_LEFT_BRACKET, 33, 1),
1291 (T_DURATION, 34, 2),
1292 (T_RIGHT_BRACKET, 36, 1),
1293 (T_RIGHT_PAREN, 37, 1),
1294 (T_LEFT_BRACKET, 38, 1),
1295 (T_DURATION, 39, 2),
1296 (T_COLON, 41, 1),
1297 (T_RIGHT_BRACKET, 42, 1),
1298 (T_RIGHT_PAREN, 43, 1),
1299 (T_LEFT_BRACKET, 44, 1),
1300 (T_DURATION, 45, 2),
1301 (T_COLON, 47, 1),
1302 (T_DURATION, 48, 2),
1303 (T_RIGHT_BRACKET, 50, 1),
1304 ],
1305 None,
1306 ),
1307 (
1308 r#"test:name{on!~"b:ar"}[4m:4s] offset 10m"#,
1309 vec![
1310 (T_METRIC_IDENTIFIER, 0, 9),
1311 (T_LEFT_BRACE, 9, 1),
1312 (T_IDENTIFIER, 10, 2),
1313 (T_NEQ_REGEX, 12, 2),
1314 (T_STRING, 14, 6),
1315 (T_RIGHT_BRACE, 20, 1),
1316 (T_LEFT_BRACKET, 21, 1),
1317 (T_DURATION, 22, 2),
1318 (T_COLON, 24, 1),
1319 (T_DURATION, 25, 2),
1320 (T_RIGHT_BRACKET, 27, 1),
1321 (T_OFFSET, 29, 6),
1322 (T_DURATION, 36, 3),
1323 ],
1324 None,
1325 ),
1326 (
1327 r#"min_over_time(rate(foo{bar="baz"}[2s])[5m:] offset 6m)[4m:3s]"#,
1328 vec![
1329 (T_IDENTIFIER, 0, 13),
1330 (T_LEFT_PAREN, 13, 1),
1331 (T_IDENTIFIER, 14, 4),
1332 (T_LEFT_PAREN, 18, 1),
1333 (T_IDENTIFIER, 19, 3),
1334 (T_LEFT_BRACE, 22, 1),
1335 (T_IDENTIFIER, 23, 3),
1336 (T_EQL, 26, 1),
1337 (T_STRING, 27, 5),
1338 (T_RIGHT_BRACE, 32, 1),
1339 (T_LEFT_BRACKET, 33, 1),
1340 (T_DURATION, 34, 2),
1341 (T_RIGHT_BRACKET, 36, 1),
1342 (T_RIGHT_PAREN, 37, 1),
1343 (T_LEFT_BRACKET, 38, 1),
1344 (T_DURATION, 39, 2),
1345 (T_COLON, 41, 1),
1346 (T_RIGHT_BRACKET, 42, 1),
1347 (T_OFFSET, 44, 6),
1348 (T_DURATION, 51, 2),
1349 (T_RIGHT_PAREN, 53, 1),
1350 (T_LEFT_BRACKET, 54, 1),
1351 (T_DURATION, 55, 2),
1352 (T_COLON, 57, 1),
1353 (T_DURATION, 58, 2),
1354 (T_RIGHT_BRACKET, 60, 1),
1355 ],
1356 None,
1357 ),
1358 (
1359 r#"test:name[ 5m]"#,
1360 vec![
1361 (T_METRIC_IDENTIFIER, 0, 9),
1362 (T_LEFT_BRACKET, 9, 1),
1363 (T_DURATION, 11, 2),
1364 (T_RIGHT_BRACKET, 13, 1),
1365 ],
1366 None,
1367 ),
1368 (
1369 r#"test:name{o:n!~"bar"}[4m:4s]"#,
1370 vec![
1371 (T_METRIC_IDENTIFIER, 0, 9),
1372 (T_LEFT_BRACE, 9, 1),
1373 (T_IDENTIFIER, 10, 1),
1374 ],
1375 Some("unexpected character inside braces: ':'"),
1376 ),
1377 (
1378 r#"test:name{on!~"bar"}[4m:4s:4h]"#,
1379 vec![
1380 (T_METRIC_IDENTIFIER, 0, 9),
1381 (T_LEFT_BRACE, 9, 1),
1382 (T_IDENTIFIER, 10, 2),
1383 (T_NEQ_REGEX, 12, 2),
1384 (T_STRING, 14, 5),
1385 (T_RIGHT_BRACE, 19, 1),
1386 (T_LEFT_BRACKET, 20, 1),
1387 (T_DURATION, 21, 2),
1388 (T_COLON, 23, 1),
1389 (T_DURATION, 24, 2),
1390 ],
1391 Some("unexpected second colon(:) in brackets"),
1392 ),
1393 (
1394 r#"test:name{on!~"bar"}[4m:4s:]"#,
1395 vec![
1396 (T_METRIC_IDENTIFIER, 0, 9),
1397 (T_LEFT_BRACE, 9, 1),
1398 (T_IDENTIFIER, 10, 2),
1399 (T_NEQ_REGEX, 12, 2),
1400 (T_STRING, 14, 5),
1401 (T_RIGHT_BRACE, 19, 1),
1402 (T_LEFT_BRACKET, 20, 1),
1403 (T_DURATION, 21, 2),
1404 (T_COLON, 23, 1),
1405 (T_DURATION, 24, 2),
1406 ],
1407 Some("unexpected second colon(:) in brackets"),
1408 ),
1409 (
1410 r#"test:name{on!~"bar"}[4m::]"#,
1411 vec![
1412 (T_METRIC_IDENTIFIER, 0, 9),
1413 (T_LEFT_BRACE, 9, 1),
1414 (T_IDENTIFIER, 10, 2),
1415 (T_NEQ_REGEX, 12, 2),
1416 (T_STRING, 14, 5),
1417 (T_RIGHT_BRACE, 19, 1),
1418 (T_LEFT_BRACKET, 20, 1),
1419 (T_DURATION, 21, 2),
1420 (T_COLON, 23, 1),
1421 ],
1422 Some("unexpected second colon(:) in brackets"),
1423 ),
1424 (
1425 r#"test:name{on!~"bar"}[:4s]"#,
1426 vec![
1427 (T_METRIC_IDENTIFIER, 0, 9),
1428 (T_LEFT_BRACE, 9, 1),
1429 (T_IDENTIFIER, 10, 2),
1430 (T_NEQ_REGEX, 12, 2),
1431 (T_STRING, 14, 5),
1432 (T_RIGHT_BRACE, 19, 1),
1433 (T_LEFT_BRACKET, 20, 1),
1434 ],
1435 Some("expect duration before first colon(:) in brackets"),
1436 ),
1437 ];
1438 assert_matches(cases);
1439 }
1440
1441 #[test]
1442 fn test_is_alpha() {
1443 assert!(is_alpha('_'));
1444 assert!(is_alpha('a'));
1445 assert!(is_alpha('z'));
1446 assert!(is_alpha('A'));
1447 assert!(is_alpha('Z'));
1448 assert!(!is_alpha('-'));
1449 assert!(!is_alpha('@'));
1450 assert!(!is_alpha('0'));
1451 assert!(!is_alpha('9'));
1452 }
1453
1454 #[test]
1455 fn test_is_alpha_numeric() {
1456 assert!(is_alpha_numeric('_'));
1457 assert!(is_alpha_numeric('a'));
1458 assert!(is_alpha_numeric('z'));
1459 assert!(is_alpha_numeric('A'));
1460 assert!(is_alpha_numeric('Z'));
1461 assert!(is_alpha_numeric('0'));
1462 assert!(is_alpha_numeric('9'));
1463 assert!(!is_alpha_numeric('-'));
1464 assert!(!is_alpha_numeric('@'));
1465 }
1466
1467 #[test]
1468 fn test_is_label() {
1469 assert!(is_label("_"));
1470 assert!(is_label("_up"));
1471 assert!(is_label("up"));
1472 assert!(is_label("up_"));
1473 assert!(is_label("up_system_1"));
1474
1475 assert!(!is_label(""));
1476 assert!(!is_label("0"));
1477 assert!(!is_label("0up"));
1478 assert!(!is_label("0_up"));
1479 }
1480}