1use crate::parser::token::*;
16use cfgrammar::NewlineCache;
17use lrlex::{DefaultLexeme, DefaultLexerTypes, LRNonStreamingLexer};
18use lrpar::Lexeme;
19use std::fmt::Debug;
20
21const ESCAPE_SYMBOLS: &str = r"abfnrtv\01234567xuU";
22const STRING_SYMBOLS: &str = r#"'"`"#;
23
24pub(crate) type LexemeType = DefaultLexeme<TokenId>;
25
26pub fn lexer(s: &str) -> Result<LRNonStreamingLexer<'_, '_, DefaultLexerTypes<TokenId>>, String> {
27 let lexemes: Vec<Result<LexemeType, String>> = Lexer::new(s).collect();
28 match lexemes.last() {
29 Some(Err(info)) => Err(info.into()),
30 Some(Ok(_)) => {
31 let lexemes = lexemes.into_iter().filter_map(|l| l.ok()).map(Ok).collect();
33 Ok(LRNonStreamingLexer::new(s, lexemes, NewlineCache::new()))
34 }
35 None => Err(format!("no expression found in input: '{s}'")),
36 }
37}
38
39#[derive(Debug)]
40enum State {
41 Start,
42 End,
43 Lexeme(TokenId),
44 Identifier,
45 KeywordOrIdentifier,
46 NumberOrDuration,
47 InsideBrackets,
48 InsideBraces,
49 LineComment,
50 Space,
51 String(char), Escape(char), Err(String),
54}
55
56#[derive(Debug)]
57struct Context {
58 chars: Vec<char>,
60 idx: usize, start: usize, pos: usize, paren_depth: usize, brace_open: bool, bracket_open: bool, got_colon: bool, eof: bool, }
70
71impl Context {
72 fn new(input: &str) -> Context {
73 Self {
74 chars: input.chars().collect(),
75 idx: 0,
76 start: 0,
77 pos: 0,
78
79 paren_depth: 0,
80 brace_open: false,
81 bracket_open: false,
82 got_colon: false,
83 eof: false,
84 }
85 }
86
87 fn pop(&mut self) -> Option<char> {
89 let ch = self.peek()?;
90 self.pos += ch.len_utf8();
91 self.idx += 1;
92 Some(ch)
93 }
94
95 fn backup(&mut self) -> bool {
98 if let Some(ch) = self.chars.get(self.idx - 1) {
99 self.pos -= ch.len_utf8();
100 self.idx -= 1;
101 return true;
102 };
103 false
104 }
105
106 fn peek(&self) -> Option<char> {
108 self.chars.get(self.idx).copied()
109 }
110
111 fn lexeme(&mut self, token_id: TokenId) -> LexemeType {
113 let start = self.start;
114 let len = self.pos - self.start;
115 DefaultLexeme::new(token_id, start, len)
116 }
117
118 fn ignore(&mut self) {
120 self.start = self.pos;
121 }
122
123 fn lexeme_string(&self) -> String {
125 let mut s = String::from("");
126 if self.idx == 0 {
127 return s;
128 }
129
130 let mut pos = self.pos;
131 let mut idx = self.idx;
132 while pos > self.start {
133 if let Some(&ch) = self.chars.get(idx - 1) {
134 pos -= ch.len_utf8();
135 idx -= 1;
136 s.push(ch);
137 };
138 }
139 s.chars().rev().collect()
140 }
141}
142
143#[derive(Debug)]
144struct Lexer {
145 state: State,
146 ctx: Context,
147}
148
149impl Lexer {
151 fn new(input: &str) -> Self {
152 let ctx = Context::new(input);
153 let state = State::Start;
154 Self { state, ctx }
155 }
156
157 fn is_inside_braces(&self) -> bool {
158 self.ctx.brace_open
159 }
160
161 fn jump_outof_braces(&mut self) {
162 self.ctx.brace_open = false;
163 }
164
165 fn dive_into_braces(&mut self) {
166 self.ctx.brace_open = true;
167 }
168
169 fn is_inside_brackets(&self) -> bool {
170 self.ctx.bracket_open
171 }
172
173 fn jump_outof_brackets(&mut self) {
174 self.ctx.bracket_open = false;
175 }
176
177 fn dive_into_brackets(&mut self) {
178 self.ctx.bracket_open = true;
179 }
180
181 fn is_colon_scanned(&self) -> bool {
182 self.ctx.got_colon
183 }
184
185 fn set_colon_scanned(&mut self) {
186 self.ctx.got_colon = true;
187 }
188
189 fn reset_colon_scanned(&mut self) {
190 self.ctx.got_colon = false;
191 }
192
193 fn inc_paren_depth(&mut self) -> bool {
195 if self.ctx.paren_depth < usize::MAX {
196 self.ctx.paren_depth += 1;
197 return true;
198 }
199 false
200 }
201
202 fn dec_paren_depth(&mut self) -> bool {
204 if self.ctx.paren_depth >= 1 {
205 self.ctx.paren_depth -= 1;
206 return true;
207 }
208 false
209 }
210
211 fn is_paren_balanced(&self) -> bool {
212 self.ctx.paren_depth == 0
213 }
214
215 fn pop(&mut self) -> Option<char> {
216 self.ctx.pop()
217 }
218
219 fn backup(&mut self) -> bool {
220 self.ctx.backup()
221 }
222
223 fn peek(&self) -> Option<char> {
224 self.ctx.peek()
225 }
226
227 fn lexeme(&mut self, token_id: TokenId) -> LexemeType {
230 let lexeme = self.ctx.lexeme(token_id);
231 self.ctx.ignore();
232 lexeme
233 }
234
235 fn lexeme_string(&self) -> String {
236 self.ctx.lexeme_string()
237 }
238
239 fn ignore(&mut self) {
240 self.ctx.ignore();
241 }
242
243 fn is_eof(&self) -> bool {
244 self.ctx.eof
245 }
246
247 fn set_eof(&mut self) {
248 self.ctx.eof = true;
249 }
250}
251
252impl Lexer {
254 fn shift(&mut self) {
255 self.state = match self.state {
258 State::Start => self.start(),
259 State::End => State::Err("End state can not shift forward.".into()),
260 State::Lexeme(_) => State::Start,
261 State::String(ch) => self.accept_string(ch),
262 State::KeywordOrIdentifier => self.accept_keyword_or_identifier(),
263 State::Identifier => self.accept_identifier(),
264 State::NumberOrDuration => self.accept_number_or_duration(),
265 State::InsideBrackets => self.inside_brackets(),
266 State::InsideBraces => self.inside_braces(),
267 State::LineComment => self.ignore_comment_line(),
268 State::Escape(ch) => self.accept_escape(ch),
269 State::Space => self.ignore_space(),
270 State::Err(_) => State::End,
271 };
272 }
273
274 fn start(&mut self) -> State {
275 if self.is_inside_braces() {
276 return State::InsideBraces;
277 }
278
279 if self.is_inside_brackets() {
280 return State::InsideBrackets;
281 }
282
283 let c = match self.pop() {
284 None => {
285 if !self.is_paren_balanced() {
286 return State::Err("unclosed left parenthesis".into());
287 }
288
289 if !self.is_eof() {
290 self.set_eof();
291 return State::Lexeme(T_EOF);
292 }
293
294 return State::End;
295 }
296 Some(ch) => ch,
297 };
298
299 match c {
302 '#' => State::LineComment,
303 '@' => State::Lexeme(T_AT),
304 ',' => State::Lexeme(T_COMMA),
305 '*' => State::Lexeme(T_MUL),
306 '/' => State::Lexeme(T_DIV),
307 '%' => State::Lexeme(T_MOD),
308 '+' => State::Lexeme(T_ADD),
309 '-' => State::Lexeme(T_SUB),
310 '^' => State::Lexeme(T_POW),
311 '=' => match self.peek() {
312 Some('=') => {
313 self.pop();
314 State::Lexeme(T_EQLC)
315 }
316 Some('~') => State::Err("unexpected character after '=': '~'".into()),
318 _ => State::Lexeme(T_EQL),
319 },
320 '!' => match self.pop() {
321 Some('=') => State::Lexeme(T_NEQ),
322 Some(ch) => State::Err(format!("unexpected character after '!': '{ch}'")),
323 None => State::Err("'!' can not be at the end".into()),
324 },
325 '<' => match self.peek() {
326 Some('=') => {
327 self.pop();
328 State::Lexeme(T_LTE)
329 }
330 _ => State::Lexeme(T_LSS),
331 },
332 '>' => match self.peek() {
333 Some('=') => {
334 self.pop();
335 State::Lexeme(T_GTE)
336 }
337 _ => State::Lexeme(T_GTR),
338 },
339 ch if ch.is_ascii_whitespace() => self.ignore_space(),
340 ch if ch.is_ascii_digit() => State::NumberOrDuration,
341 '.' => match self.peek() {
342 Some(ch) if ch.is_ascii_digit() => State::NumberOrDuration,
343 Some(ch) => State::Err(format!("unexpected character after '.': '{ch}'")),
344 None => State::Err("unexpected character: '.'".into()),
345 },
346 ch if is_alpha(ch) || ch == ':' => State::KeywordOrIdentifier,
347 ch if STRING_SYMBOLS.contains(ch) => State::String(ch),
348 '(' => {
349 if self.inc_paren_depth() {
350 return State::Lexeme(T_LEFT_PAREN);
351 }
352 State::Err("too many left parentheses".into())
353 }
354 ')' => {
355 if self.is_paren_balanced() {
356 return State::Err("unexpected right parenthesis ')'".into());
357 }
358 if self.dec_paren_depth() {
359 return State::Lexeme(T_RIGHT_PAREN);
360 }
361 State::Err("unexpected right parenthesis ')'".into())
362 }
363 '{' => {
364 self.dive_into_braces();
365 State::Lexeme(T_LEFT_BRACE)
366 }
367 '}' => State::Err("unexpected right brace '}'".into()),
369 '[' => {
370 self.reset_colon_scanned();
371 self.dive_into_brackets();
372 State::Lexeme(T_LEFT_BRACKET)
373 }
374 ']' => State::Err("unexpected right bracket ']'".into()),
376 ch => State::Err(format!("unexpected character: {ch:?}")),
377 }
378 }
379
380 fn accept_number_or_duration(&mut self) -> State {
382 self.backup();
383 if self.scan_number() {
384 return State::Lexeme(T_NUMBER);
385 }
386
387 if self.accept_remaining_duration() {
389 return State::Lexeme(T_DURATION);
390 }
391
392 self.pop();
394 State::Err(format!(
395 "bad number or duration syntax: {}",
396 self.lexeme_string()
397 ))
398 }
399
400 fn accept_keyword_or_identifier(&mut self) -> State {
402 while let Some(ch) = self.peek() {
403 if is_alpha_numeric(ch) || ch == ':' {
404 self.pop();
405 } else {
406 break;
407 }
408 }
409
410 let s = self.lexeme_string();
411 match get_keyword_token(&s.to_lowercase()) {
412 Some(token_id) => State::Lexeme(token_id),
413 None if s.contains(':') => State::Lexeme(T_METRIC_IDENTIFIER),
414 _ => State::Lexeme(T_IDENTIFIER),
415 }
416 }
417
418 fn ignore_comment_line(&mut self) -> State {
420 while let Some(ch) = self.pop() {
421 if ch == '\r' || ch == '\n' {
422 break;
423 }
424 }
425 self.ignore();
426 State::Start
427 }
428
429 fn accept<F>(&mut self, f: F) -> bool
431 where
432 F: Fn(char) -> bool,
433 {
434 if let Some(ch) = self.peek() {
435 if f(ch) {
436 self.pop();
437 return true;
438 }
439 }
440 false
441 }
442
443 fn accept_run<F>(&mut self, f: F)
445 where
446 F: Fn(char) -> bool,
447 {
448 while let Some(ch) = self.peek() {
449 if f(ch) {
450 self.pop();
451 } else {
452 break;
453 }
454 }
455 }
456
457 fn ignore_space(&mut self) -> State {
459 self.backup(); self.accept_run(|ch| ch.is_ascii_whitespace());
461 self.ignore();
462 State::Start
463 }
464
465 fn scan_number(&mut self) -> bool {
468 let mut hex_digit = false;
469 if self.accept(|ch| ch == '0') && self.accept(|ch| ch == 'x' || ch == 'X') {
470 hex_digit = true;
471 }
472 let is_valid_digit = |ch: char| -> bool {
473 if hex_digit {
474 ch.is_ascii_hexdigit()
475 } else {
476 ch.is_ascii_digit()
477 }
478 };
479
480 self.accept_run(is_valid_digit);
481 if self.accept(|ch| ch == '.') {
482 self.accept_run(is_valid_digit);
483 }
484 if self.accept(|ch| ch == 'e' || ch == 'E') {
485 self.accept(|ch| ch == '+' || ch == '-');
486 self.accept_run(|ch| ch.is_ascii_digit());
487 }
488
489 !matches!(self.peek(), Some(ch) if is_alpha(ch) || ch == '.')
493 }
494
495 fn accept_remaining_duration(&mut self) -> bool {
498 if !self.accept(|ch| "smhdwy".contains(ch)) {
500 return false;
501 }
502 self.accept(|ch| ch == 's');
505
506 while self.accept(|ch| ch.is_ascii_digit()) {
508 self.accept_run(|ch| ch.is_ascii_digit());
509 if !self.accept(|ch| "smhdw".contains(ch)) {
511 return false;
512 }
513 self.accept(|ch| ch == 's');
516 }
517
518 !matches!(self.peek(), Some(ch) if is_alpha_numeric(ch))
519 }
520
521 fn accept_escape(&mut self, symbol: char) -> State {
525 match self.pop() {
526 Some(ch) if ch == symbol || ESCAPE_SYMBOLS.contains(ch) => State::String(symbol),
527 Some(ch) => State::Err(format!("unknown escape sequence '{ch}'")),
528 None => State::Err("escape sequence not terminated".into()),
529 }
530 }
531
532 fn accept_string(&mut self, symbol: char) -> State {
534 while let Some(ch) = self.pop() {
535 if ch == '\\' {
536 return State::Escape(symbol);
537 }
538
539 if ch == symbol {
540 return State::Lexeme(T_STRING);
541 }
542 }
543
544 State::Err(format!("unterminated quoted string {symbol}"))
545 }
546
547 fn inside_braces(&mut self) -> State {
550 match self.pop() {
551 Some('#') => State::LineComment,
552 Some(',') => State::Lexeme(T_COMMA),
553 Some('o') | Some('O') => {
554 if let Some('r') | Some('R') = self.peek() {
555 self.pop();
556 if let Some(' ') = self.peek() {
557 State::Lexeme(T_LOR)
558 } else {
559 State::Identifier
560 }
561 } else {
562 State::Identifier
563 }
564 }
565 Some(ch) if ch.is_ascii_whitespace() => State::Space,
566 Some(ch) if is_alpha(ch) => State::Identifier,
567 Some(ch) if STRING_SYMBOLS.contains(ch) => State::String(ch),
568 Some('=') => match self.peek() {
569 Some('~') => {
570 self.pop();
571 State::Lexeme(T_EQL_REGEX)
572 }
573 _ => State::Lexeme(T_EQL),
574 },
575 Some('!') => match self.pop() {
576 Some('~') => State::Lexeme(T_NEQ_REGEX),
577 Some('=') => State::Lexeme(T_NEQ),
578 Some(ch) => State::Err(format!(
579 "unexpected character after '!' inside braces: '{ch}'"
580 )),
581 None => State::Err("'!' can not be at the end".into()),
582 },
583 Some('{') => State::Err("unexpected left brace '{' inside braces".into()),
584 Some('}') => {
585 self.jump_outof_braces();
586 State::Lexeme(T_RIGHT_BRACE)
587 }
588 Some(ch) => State::Err(format!("unexpected character inside braces: '{ch}'")),
589 None => State::Err("unexpected end of input inside braces".into()),
590 }
591 }
592
593 fn last_char_matches<F>(&mut self, f: F) -> bool
595 where
596 F: Fn(char) -> bool,
597 {
598 if !self.backup() {
600 return false;
601 }
602 let matched = matches!(self.peek(), Some(ch) if f(ch));
603 self.pop();
604 matched
605 }
606
607 fn is_colon_the_first_char_in_brackets(&mut self) -> bool {
609 self.backup();
611 let matched = self.last_char_matches(|ch| ch == '[');
612 self.pop();
613 matched
614 }
615
616 fn inside_brackets(&mut self) -> State {
618 match self.pop() {
619 Some(ch) if ch.is_ascii_whitespace() => State::Space,
620 Some(':') => {
621 if self.is_colon_scanned() {
622 return State::Err("unexpected second colon(:) in brackets".into());
623 }
624
625 if self.is_colon_the_first_char_in_brackets() {
626 return State::Err("expect duration before first colon(:) in brackets".into());
627 }
628
629 self.set_colon_scanned();
630 State::Lexeme(T_COLON)
631 }
632 Some(ch) if ch.is_ascii_digit() => self.accept_number_or_duration(),
633 Some(']') => {
634 self.jump_outof_brackets();
635 self.reset_colon_scanned();
636 State::Lexeme(T_RIGHT_BRACKET)
637 }
638 Some('[') => State::Err("unexpected left brace '[' inside brackets".into()),
639 Some(ch) => State::Err(format!("unexpected character inside brackets: '{ch}'")),
640 None => State::Err("unexpected end of input inside brackets".into()),
641 }
642 }
643
644 fn accept_identifier(&mut self) -> State {
647 self.accept_run(is_alpha_numeric);
648 State::Lexeme(T_IDENTIFIER)
649 }
650}
651
652impl Iterator for Lexer {
654 type Item = Result<LexemeType, String>;
655
656 fn next(&mut self) -> Option<Self::Item> {
657 self.shift();
658 match &self.state {
659 State::Lexeme(token_id) => Some(Ok(self.lexeme(*token_id))),
660 State::Err(info) => Some(Err(info.clone())),
661 State::End => None,
662 _ => self.next(),
663 }
664 }
665}
666
667fn is_alpha_numeric(ch: char) -> bool {
668 is_alpha(ch) || ch.is_ascii_digit()
669}
670
671fn is_alpha(ch: char) -> bool {
672 ch == '_' || ch.is_ascii_alphabetic()
673}
674
675pub(crate) fn is_label(s: &str) -> bool {
676 if s.is_empty() {
677 return false;
678 }
679 let mut chars = s.chars();
680 match chars.next() {
681 None => false,
682 Some(ch) if !is_alpha(ch) => false,
683 Some(_) => {
684 for ch in chars {
685 if !is_alpha_numeric(ch) {
686 return false;
687 }
688 }
689 true
690 }
691 }
692}
693
694#[cfg(test)]
695mod tests {
696 use super::*;
697
698 type LexemeTuple = (TokenId, usize, usize);
699 type MatchTuple = (&'static str, Vec<LexemeTuple>, Option<&'static str>);
703
704 type Case = (
705 &'static str,
706 Vec<Result<LexemeType, String>>,
707 Vec<Result<LexemeType, String>>,
708 );
709
710 fn assert_matches(v: Vec<MatchTuple>) {
711 let cases: Vec<Case> = v
712 .into_iter()
713 .map(|(input, lexemes, err)| {
714 let mut expected: Vec<Result<LexemeType, String>> = lexemes
715 .into_iter()
716 .map(|(token_id, start, len)| Ok(LexemeType::new(token_id, start, len)))
717 .collect();
718
719 if let Some(s) = err {
720 expected.push(Err(s.to_string()));
721 }
722
723 let actual: Vec<Result<LexemeType, String>> = Lexer::new(input)
724 .filter(|r| !matches!(r, Ok(l) if l.tok_id() == T_EOF))
726 .collect();
727 (input, expected, actual)
728 })
729 .collect();
730
731 for (input, expected, actual) in cases.iter() {
732 assert_eq!(expected, actual, "\n<input>: {input}");
733 }
734 }
735
736 #[test]
737 fn test_common() {
738 let cases = vec![
739 (",", vec![(T_COMMA, 0, 1)], None),
740 (
741 "()",
742 vec![(T_LEFT_PAREN, 0, 1), (T_RIGHT_PAREN, 1, 1)],
743 None,
744 ),
745 (
746 "{}",
747 vec![(T_LEFT_BRACE, 0, 1), (T_RIGHT_BRACE, 1, 1)],
748 None,
749 ),
750 (
751 "[5m]",
752 vec![
753 (T_LEFT_BRACKET, 0, 1),
754 (T_DURATION, 1, 2),
755 (T_RIGHT_BRACKET, 3, 1),
756 ],
757 None,
758 ),
759 (
760 "[ 5m]",
761 vec![
762 (T_LEFT_BRACKET, 0, 1),
763 (T_DURATION, 2, 2),
764 (T_RIGHT_BRACKET, 4, 1),
765 ],
766 None,
767 ),
768 (
769 "[ 5m]",
770 vec![
771 (T_LEFT_BRACKET, 0, 1),
772 (T_DURATION, 3, 2),
773 (T_RIGHT_BRACKET, 5, 1),
774 ],
775 None,
776 ),
777 (
778 "[ 5m ]",
779 vec![
780 (T_LEFT_BRACKET, 0, 1),
781 (T_DURATION, 3, 2),
782 (T_RIGHT_BRACKET, 6, 1),
783 ],
784 None,
785 ),
786 ("\r\n\r", vec![], None),
787 ];
788
789 assert_matches(cases);
790 }
791
792 #[test]
793 fn test_numbers() {
794 let cases = vec![
795 ("1", vec![(T_NUMBER, 0, 1)], None),
796 ("4.23", vec![(T_NUMBER, 0, 4)], None),
797 (".3", vec![(T_NUMBER, 0, 2)], None),
798 ("5.", vec![(T_NUMBER, 0, 2)], None),
799 ("NaN", vec![(T_NUMBER, 0, 3)], None),
800 ("nAN", vec![(T_NUMBER, 0, 3)], None),
801 ("NaN 123", vec![(T_NUMBER, 0, 3), (T_NUMBER, 4, 3)], None),
802 ("NaN123", vec![(T_IDENTIFIER, 0, 6)], None),
803 ("iNf", vec![(T_NUMBER, 0, 3)], None),
804 ("Inf", vec![(T_NUMBER, 0, 3)], None),
805 ("+Inf", vec![(T_ADD, 0, 1), (T_NUMBER, 1, 3)], None),
806 (
807 "+Inf 123",
808 vec![(T_ADD, 0, 1), (T_NUMBER, 1, 3), (T_NUMBER, 5, 3)],
809 None,
810 ),
811 (
812 "-Inf 123",
813 vec![(T_SUB, 0, 1), (T_NUMBER, 1, 3), (T_NUMBER, 5, 3)],
814 None,
815 ),
816 ("Infoo", vec![(T_IDENTIFIER, 0, 5)], None),
817 ("-Inf123", vec![(T_SUB, 0, 1), (T_IDENTIFIER, 1, 6)], None),
818 (
819 "-Inf 123",
820 vec![(T_SUB, 0, 1), (T_NUMBER, 1, 3), (T_NUMBER, 5, 3)],
821 None,
822 ),
823 ("0x123", vec![(T_NUMBER, 0, 5)], None),
824 ];
825 assert_matches(cases);
826 }
827
828 #[test]
829 fn test_strings() {
830 let cases = vec![
831 ("\"test\\tsequence\"", vec![(T_STRING, 0, 16)], None),
832 ("\"test\\\\.expression\"", vec![(T_STRING, 0, 19)], None),
833 (
834 "\"test\\.expression\"",
835 vec![],
836 Some("unknown escape sequence '.'"),
837 ),
838 (
839 "`test\\.expression`",
840 vec![],
841 Some("unknown escape sequence '.'"),
842 ),
843 (".٩", vec![], Some("unexpected character after '.': '٩'")),
844 ];
848 assert_matches(cases);
849 }
850
851 #[test]
852 fn test_durations() {
853 let cases = vec![
854 ("5s", vec![(T_DURATION, 0, 2)], None),
855 ("123m", vec![(T_DURATION, 0, 4)], None),
856 ("1h", vec![(T_DURATION, 0, 2)], None),
857 ("3w", vec![(T_DURATION, 0, 2)], None),
858 ("1y", vec![(T_DURATION, 0, 2)], None),
859 ];
860 assert_matches(cases);
861 }
862
863 #[test]
864 fn test_identifiers() {
865 let cases = vec![
866 ("abc", vec![(T_IDENTIFIER, 0, 3)], None),
867 ("a:bc", vec![(T_METRIC_IDENTIFIER, 0, 4)], None),
868 (
869 "abc d",
870 vec![(T_IDENTIFIER, 0, 3), (T_IDENTIFIER, 4, 1)],
871 None,
872 ),
873 (":bc", vec![(T_METRIC_IDENTIFIER, 0, 3)], None),
874 ("0a:bc", vec![], Some("bad number or duration syntax: 0a")),
875 ];
876 assert_matches(cases);
877 }
878
879 #[test]
880 fn test_comments() {
881 let cases = vec![
882 ("# some comment", vec![], None),
883 ("5 # 1+1\n5", vec![(T_NUMBER, 0, 1), (T_NUMBER, 8, 1)], None),
884 ];
885 assert_matches(cases);
886 }
887
888 #[test]
889 fn test_operators() {
890 let cases = vec![
891 ("=", vec![(T_EQL, 0, 1)], None),
892 (
893 "{=}",
894 vec![(T_LEFT_BRACE, 0, 1), (T_EQL, 1, 1), (T_RIGHT_BRACE, 2, 1)],
895 None,
896 ),
897 ("==", vec![(T_EQLC, 0, 2)], None),
898 ("!=", vec![(T_NEQ, 0, 2)], None),
899 ("<", vec![(T_LSS, 0, 1)], None),
900 (">", vec![(T_GTR, 0, 1)], None),
901 (">=", vec![(T_GTE, 0, 2)], None),
902 ("<=", vec![(T_LTE, 0, 2)], None),
903 ("+", vec![(T_ADD, 0, 1)], None),
904 ("-", vec![(T_SUB, 0, 1)], None),
905 ("*", vec![(T_MUL, 0, 1)], None),
906 ("/", vec![(T_DIV, 0, 1)], None),
907 ("^", vec![(T_POW, 0, 1)], None),
908 ("%", vec![(T_MOD, 0, 1)], None),
909 ("AND", vec![(T_LAND, 0, 3)], None),
910 ("or", vec![(T_LOR, 0, 2)], None),
911 ("unless", vec![(T_LUNLESS, 0, 6)], None),
912 ("@", vec![(T_AT, 0, 1)], None),
913 ];
914 assert_matches(cases);
915 }
916
917 #[test]
918 fn test_aggregators() {
919 let cases = vec![
920 ("sum", vec![(T_SUM, 0, 3)], None),
921 ("AVG", vec![(T_AVG, 0, 3)], None),
922 ("Max", vec![(T_MAX, 0, 3)], None),
923 ("min", vec![(T_MIN, 0, 3)], None),
924 ("count", vec![(T_COUNT, 0, 5)], None),
925 ("stdvar", vec![(T_STDVAR, 0, 6)], None),
926 ("stddev", vec![(T_STDDEV, 0, 6)], None),
927 ];
928 assert_matches(cases);
929 }
930
931 #[test]
932 fn test_keywords() {
933 let cases = vec![
934 ("offset", vec![(T_OFFSET, 0, 6)], None),
935 ("by", vec![(T_BY, 0, 2)], None),
936 ("without", vec![(T_WITHOUT, 0, 7)], None),
937 ("on", vec![(T_ON, 0, 2)], None),
938 ("ignoring", vec![(T_IGNORING, 0, 8)], None),
939 ("group_left", vec![(T_GROUP_LEFT, 0, 10)], None),
940 ("group_right", vec![(T_GROUP_RIGHT, 0, 11)], None),
941 ("bool", vec![(T_BOOL, 0, 4)], None),
942 ("atan2", vec![(T_ATAN2, 0, 5)], None),
943 ];
944 assert_matches(cases);
945 }
946
947 #[test]
948 fn test_preprocessors() {
949 let cases = vec![
950 ("start", vec![(T_START, 0, 5)], None),
951 ("end", vec![(T_END, 0, 3)], None),
952 ];
953 assert_matches(cases);
954 }
955
956 #[test]
957 fn test_selectors() {
958 let cases = vec![
959 ("北京", vec![], Some("unexpected character: '北'")),
960 ("北京='a'", vec![], Some("unexpected character: '北'")),
961 ("0a='a'", vec![], Some("bad number or duration syntax: 0a")),
962 (
963 "{foo='bar'}",
964 vec![
965 (T_LEFT_BRACE, 0, 1),
966 (T_IDENTIFIER, 1, 3),
967 (T_EQL, 4, 1),
968 (T_STRING, 5, 5),
969 (T_RIGHT_BRACE, 10, 1),
970 ],
971 None,
972 ),
973 (
974 r#"{foo="bar"}"#,
975 vec![
976 (T_LEFT_BRACE, 0, 1),
977 (T_IDENTIFIER, 1, 3),
978 (T_EQL, 4, 1),
979 (T_STRING, 5, 5),
980 (T_RIGHT_BRACE, 10, 1),
981 ],
982 None,
983 ),
984 (
985 r#"{foo="bar\"bar"}"#,
986 vec![
987 (T_LEFT_BRACE, 0, 1),
988 (T_IDENTIFIER, 1, 3),
989 (T_EQL, 4, 1),
990 (T_STRING, 5, 10),
991 (T_RIGHT_BRACE, 15, 1),
992 ],
993 None,
994 ),
995 (
996 r#"{NaN != "bar" }"#,
997 vec![
998 (T_LEFT_BRACE, 0, 1),
999 (T_IDENTIFIER, 1, 3),
1000 (T_NEQ, 5, 2),
1001 (T_STRING, 8, 5),
1002 (T_RIGHT_BRACE, 14, 1),
1003 ],
1004 None,
1005 ),
1006 (
1007 r#"{alert=~"bar" }"#,
1008 vec![
1009 (T_LEFT_BRACE, 0, 1),
1010 (T_IDENTIFIER, 1, 5),
1011 (T_EQL_REGEX, 6, 2),
1012 (T_STRING, 8, 5),
1013 (T_RIGHT_BRACE, 14, 1),
1014 ],
1015 None,
1016 ),
1017 (
1018 r#"{on!~"bar"}"#,
1019 vec![
1020 (T_LEFT_BRACE, 0, 1),
1021 (T_IDENTIFIER, 1, 2),
1022 (T_NEQ_REGEX, 3, 2),
1023 (T_STRING, 5, 5),
1024 (T_RIGHT_BRACE, 10, 1),
1025 ],
1026 None,
1027 ),
1028 (
1029 r#"{alert!#"bar"}"#,
1030 vec![(T_LEFT_BRACE, 0, 1), (T_IDENTIFIER, 1, 5)],
1031 Some("unexpected character after '!' inside braces: '#'"),
1032 ),
1033 (
1034 r#"{foo:a="bar"}"#,
1035 vec![(T_LEFT_BRACE, 0, 1), (T_IDENTIFIER, 1, 3)],
1036 Some("unexpected character inside braces: ':'"),
1037 ),
1038 ];
1039 assert_matches(cases);
1040 }
1041
1042 #[test]
1043 fn test_common_errors() {
1044 let cases = vec![
1045 ("=~", vec![], Some("unexpected character after '=': '~'")),
1046 ("!~", vec![], Some("unexpected character after '!': '~'")),
1047 ("!(", vec![], Some("unexpected character after '!': '('")),
1048 ("1a", vec![], Some("bad number or duration syntax: 1a")),
1049 ];
1050 assert_matches(cases);
1051 }
1052
1053 #[test]
1054 fn test_mismatched_parentheses() {
1055 let cases = vec![
1056 (
1057 "(",
1058 vec![(T_LEFT_PAREN, 0, 1)],
1059 Some("unclosed left parenthesis"),
1060 ),
1061 (")", vec![], Some("unexpected right parenthesis ')'")),
1062 (
1063 "())",
1064 vec![(T_LEFT_PAREN, 0, 1), (T_RIGHT_PAREN, 1, 1)],
1065 Some("unexpected right parenthesis ')'"),
1066 ),
1067 (
1068 "(()",
1069 vec![
1070 (T_LEFT_PAREN, 0, 1),
1071 (T_LEFT_PAREN, 1, 1),
1072 (T_RIGHT_PAREN, 2, 1),
1073 ],
1074 Some("unclosed left parenthesis"),
1075 ),
1076 (
1077 "{",
1078 vec![(T_LEFT_BRACE, 0, 1)],
1079 Some("unexpected end of input inside braces"),
1080 ),
1081 ("}", vec![], Some("unexpected right brace '}'")),
1082 (
1083 "{{",
1084 vec![(T_LEFT_BRACE, 0, 1)],
1085 Some("unexpected left brace '{' inside braces"),
1086 ),
1087 (
1088 "{{}}",
1089 vec![(T_LEFT_BRACE, 0, 1)],
1090 Some("unexpected left brace '{' inside braces"),
1091 ),
1092 (
1093 "[",
1094 vec![(T_LEFT_BRACKET, 0, 1)],
1095 Some("unexpected end of input inside brackets"),
1096 ),
1097 (
1098 "[[",
1099 vec![(T_LEFT_BRACKET, 0, 1)],
1100 Some("unexpected left brace '[' inside brackets"),
1101 ),
1102 (
1103 "[]]",
1104 vec![(T_LEFT_BRACKET, 0, 1), (T_RIGHT_BRACKET, 1, 1)],
1105 Some("unexpected right bracket ']'"),
1106 ),
1107 (
1108 "[[]]",
1109 vec![(T_LEFT_BRACKET, 0, 1)],
1110 Some("unexpected left brace '[' inside brackets"),
1111 ),
1112 ("]", vec![], Some("unexpected right bracket ']'")),
1113 ];
1114 assert_matches(cases);
1115 }
1116
1117 #[test]
1118 fn test_subqueries() {
1119 let cases = vec![
1120 (
1121 r#"test_name{on!~"bar"}[4m:4s]"#,
1122 vec![
1123 (T_IDENTIFIER, 0, 9),
1124 (T_LEFT_BRACE, 9, 1),
1125 (T_IDENTIFIER, 10, 2),
1126 (T_NEQ_REGEX, 12, 2),
1127 (T_STRING, 14, 5),
1128 (T_RIGHT_BRACE, 19, 1),
1129 (T_LEFT_BRACKET, 20, 1),
1130 (T_DURATION, 21, 2),
1131 (T_COLON, 23, 1),
1132 (T_DURATION, 24, 2),
1133 (T_RIGHT_BRACKET, 26, 1),
1134 ],
1135 None,
1136 ),
1137 (
1138 r#"test:name{on!~"bar"}[4m:4s]"#,
1139 vec![
1140 (T_METRIC_IDENTIFIER, 0, 9),
1141 (T_LEFT_BRACE, 9, 1),
1142 (T_IDENTIFIER, 10, 2),
1143 (T_NEQ_REGEX, 12, 2),
1144 (T_STRING, 14, 5),
1145 (T_RIGHT_BRACE, 19, 1),
1146 (T_LEFT_BRACKET, 20, 1),
1147 (T_DURATION, 21, 2),
1148 (T_COLON, 23, 1),
1149 (T_DURATION, 24, 2),
1150 (T_RIGHT_BRACKET, 26, 1),
1151 ],
1152 None,
1153 ),
1154 (
1155 r#"test:name{on!~"b:ar"}[4m:4s]"#,
1156 vec![
1157 (T_METRIC_IDENTIFIER, 0, 9),
1158 (T_LEFT_BRACE, 9, 1),
1159 (T_IDENTIFIER, 10, 2),
1160 (T_NEQ_REGEX, 12, 2),
1161 (T_STRING, 14, 6),
1162 (T_RIGHT_BRACE, 20, 1),
1163 (T_LEFT_BRACKET, 21, 1),
1164 (T_DURATION, 22, 2),
1165 (T_COLON, 24, 1),
1166 (T_DURATION, 25, 2),
1167 (T_RIGHT_BRACKET, 27, 1),
1168 ],
1169 None,
1170 ),
1171 (
1172 r#"test:name{on!~"b:ar"}[4m:]"#,
1173 vec![
1174 (T_METRIC_IDENTIFIER, 0, 9),
1175 (T_LEFT_BRACE, 9, 1),
1176 (T_IDENTIFIER, 10, 2),
1177 (T_NEQ_REGEX, 12, 2),
1178 (T_STRING, 14, 6),
1179 (T_RIGHT_BRACE, 20, 1),
1180 (T_LEFT_BRACKET, 21, 1),
1181 (T_DURATION, 22, 2),
1182 (T_COLON, 24, 1),
1183 (T_RIGHT_BRACKET, 25, 1),
1184 ],
1185 None,
1186 ),
1187 (
1188 r#"min_over_time(rate(foo{bar="baz"}[2s])[5m:])[4m:3s]"#,
1189 vec![
1190 (T_IDENTIFIER, 0, 13),
1191 (T_LEFT_PAREN, 13, 1),
1192 (T_IDENTIFIER, 14, 4),
1193 (T_LEFT_PAREN, 18, 1),
1194 (T_IDENTIFIER, 19, 3),
1195 (T_LEFT_BRACE, 22, 1),
1196 (T_IDENTIFIER, 23, 3),
1197 (T_EQL, 26, 1),
1198 (T_STRING, 27, 5),
1199 (T_RIGHT_BRACE, 32, 1),
1200 (T_LEFT_BRACKET, 33, 1),
1201 (T_DURATION, 34, 2),
1202 (T_RIGHT_BRACKET, 36, 1),
1203 (T_RIGHT_PAREN, 37, 1),
1204 (T_LEFT_BRACKET, 38, 1),
1205 (T_DURATION, 39, 2),
1206 (T_COLON, 41, 1),
1207 (T_RIGHT_BRACKET, 42, 1),
1208 (T_RIGHT_PAREN, 43, 1),
1209 (T_LEFT_BRACKET, 44, 1),
1210 (T_DURATION, 45, 2),
1211 (T_COLON, 47, 1),
1212 (T_DURATION, 48, 2),
1213 (T_RIGHT_BRACKET, 50, 1),
1214 ],
1215 None,
1216 ),
1217 (
1218 r#"test:name{on!~"b:ar"}[4m:4s] offset 10m"#,
1219 vec![
1220 (T_METRIC_IDENTIFIER, 0, 9),
1221 (T_LEFT_BRACE, 9, 1),
1222 (T_IDENTIFIER, 10, 2),
1223 (T_NEQ_REGEX, 12, 2),
1224 (T_STRING, 14, 6),
1225 (T_RIGHT_BRACE, 20, 1),
1226 (T_LEFT_BRACKET, 21, 1),
1227 (T_DURATION, 22, 2),
1228 (T_COLON, 24, 1),
1229 (T_DURATION, 25, 2),
1230 (T_RIGHT_BRACKET, 27, 1),
1231 (T_OFFSET, 29, 6),
1232 (T_DURATION, 36, 3),
1233 ],
1234 None,
1235 ),
1236 (
1237 r#"min_over_time(rate(foo{bar="baz"}[2s])[5m:] offset 6m)[4m:3s]"#,
1238 vec![
1239 (T_IDENTIFIER, 0, 13),
1240 (T_LEFT_PAREN, 13, 1),
1241 (T_IDENTIFIER, 14, 4),
1242 (T_LEFT_PAREN, 18, 1),
1243 (T_IDENTIFIER, 19, 3),
1244 (T_LEFT_BRACE, 22, 1),
1245 (T_IDENTIFIER, 23, 3),
1246 (T_EQL, 26, 1),
1247 (T_STRING, 27, 5),
1248 (T_RIGHT_BRACE, 32, 1),
1249 (T_LEFT_BRACKET, 33, 1),
1250 (T_DURATION, 34, 2),
1251 (T_RIGHT_BRACKET, 36, 1),
1252 (T_RIGHT_PAREN, 37, 1),
1253 (T_LEFT_BRACKET, 38, 1),
1254 (T_DURATION, 39, 2),
1255 (T_COLON, 41, 1),
1256 (T_RIGHT_BRACKET, 42, 1),
1257 (T_OFFSET, 44, 6),
1258 (T_DURATION, 51, 2),
1259 (T_RIGHT_PAREN, 53, 1),
1260 (T_LEFT_BRACKET, 54, 1),
1261 (T_DURATION, 55, 2),
1262 (T_COLON, 57, 1),
1263 (T_DURATION, 58, 2),
1264 (T_RIGHT_BRACKET, 60, 1),
1265 ],
1266 None,
1267 ),
1268 (
1269 r#"test:name[ 5m]"#,
1270 vec![
1271 (T_METRIC_IDENTIFIER, 0, 9),
1272 (T_LEFT_BRACKET, 9, 1),
1273 (T_DURATION, 11, 2),
1274 (T_RIGHT_BRACKET, 13, 1),
1275 ],
1276 None,
1277 ),
1278 (
1279 r#"test:name{o:n!~"bar"}[4m:4s]"#,
1280 vec![
1281 (T_METRIC_IDENTIFIER, 0, 9),
1282 (T_LEFT_BRACE, 9, 1),
1283 (T_IDENTIFIER, 10, 1),
1284 ],
1285 Some("unexpected character inside braces: ':'"),
1286 ),
1287 (
1288 r#"test:name{on!~"bar"}[4m:4s:4h]"#,
1289 vec![
1290 (T_METRIC_IDENTIFIER, 0, 9),
1291 (T_LEFT_BRACE, 9, 1),
1292 (T_IDENTIFIER, 10, 2),
1293 (T_NEQ_REGEX, 12, 2),
1294 (T_STRING, 14, 5),
1295 (T_RIGHT_BRACE, 19, 1),
1296 (T_LEFT_BRACKET, 20, 1),
1297 (T_DURATION, 21, 2),
1298 (T_COLON, 23, 1),
1299 (T_DURATION, 24, 2),
1300 ],
1301 Some("unexpected second colon(:) in brackets"),
1302 ),
1303 (
1304 r#"test:name{on!~"bar"}[4m:4s:]"#,
1305 vec![
1306 (T_METRIC_IDENTIFIER, 0, 9),
1307 (T_LEFT_BRACE, 9, 1),
1308 (T_IDENTIFIER, 10, 2),
1309 (T_NEQ_REGEX, 12, 2),
1310 (T_STRING, 14, 5),
1311 (T_RIGHT_BRACE, 19, 1),
1312 (T_LEFT_BRACKET, 20, 1),
1313 (T_DURATION, 21, 2),
1314 (T_COLON, 23, 1),
1315 (T_DURATION, 24, 2),
1316 ],
1317 Some("unexpected second colon(:) in brackets"),
1318 ),
1319 (
1320 r#"test:name{on!~"bar"}[4m::]"#,
1321 vec![
1322 (T_METRIC_IDENTIFIER, 0, 9),
1323 (T_LEFT_BRACE, 9, 1),
1324 (T_IDENTIFIER, 10, 2),
1325 (T_NEQ_REGEX, 12, 2),
1326 (T_STRING, 14, 5),
1327 (T_RIGHT_BRACE, 19, 1),
1328 (T_LEFT_BRACKET, 20, 1),
1329 (T_DURATION, 21, 2),
1330 (T_COLON, 23, 1),
1331 ],
1332 Some("unexpected second colon(:) in brackets"),
1333 ),
1334 (
1335 r#"test:name{on!~"bar"}[:4s]"#,
1336 vec![
1337 (T_METRIC_IDENTIFIER, 0, 9),
1338 (T_LEFT_BRACE, 9, 1),
1339 (T_IDENTIFIER, 10, 2),
1340 (T_NEQ_REGEX, 12, 2),
1341 (T_STRING, 14, 5),
1342 (T_RIGHT_BRACE, 19, 1),
1343 (T_LEFT_BRACKET, 20, 1),
1344 ],
1345 Some("expect duration before first colon(:) in brackets"),
1346 ),
1347 ];
1348 assert_matches(cases);
1349 }
1350
1351 #[test]
1352 fn test_is_alpha() {
1353 assert!(is_alpha('_'));
1354 assert!(is_alpha('a'));
1355 assert!(is_alpha('z'));
1356 assert!(is_alpha('A'));
1357 assert!(is_alpha('Z'));
1358 assert!(!is_alpha('-'));
1359 assert!(!is_alpha('@'));
1360 assert!(!is_alpha('0'));
1361 assert!(!is_alpha('9'));
1362 }
1363
1364 #[test]
1365 fn test_is_alpha_numeric() {
1366 assert!(is_alpha_numeric('_'));
1367 assert!(is_alpha_numeric('a'));
1368 assert!(is_alpha_numeric('z'));
1369 assert!(is_alpha_numeric('A'));
1370 assert!(is_alpha_numeric('Z'));
1371 assert!(is_alpha_numeric('0'));
1372 assert!(is_alpha_numeric('9'));
1373 assert!(!is_alpha_numeric('-'));
1374 assert!(!is_alpha_numeric('@'));
1375 }
1376
1377 #[test]
1378 fn test_is_label() {
1379 assert!(is_label("_"));
1380 assert!(is_label("_up"));
1381 assert!(is_label("up"));
1382 assert!(is_label("up_"));
1383 assert!(is_label("up_system_1"));
1384
1385 assert!(!is_label(""));
1386 assert!(!is_label("0"));
1387 assert!(!is_label("0up"));
1388 assert!(!is_label("0_up"));
1389 }
1390}