1use crate::parser::token::*;
16use lrlex::{DefaultLexeme, LRNonStreamingLexer};
17use lrpar::Lexeme;
18use std::fmt::Debug;
19
20const ESCAPE_SYMBOLS: &str = r"abfnrtv\01234567xuU";
21const STRING_SYMBOLS: &str = r#"'"`"#;
22
23pub(crate) type LexemeType = DefaultLexeme<TokenId>;
24
25pub fn lexer(s: &str) -> Result<LRNonStreamingLexer<LexemeType, TokenId>, String> {
26 let lexemes: Vec<Result<LexemeType, String>> = Lexer::new(s).collect();
27 match lexemes.last() {
28 Some(Err(info)) => Err(info.into()),
29 Some(Ok(_)) => {
30 let lexemes = lexemes.into_iter().filter_map(|l| l.ok()).map(Ok).collect();
32 Ok(LRNonStreamingLexer::new(s, lexemes, Vec::new()))
33 }
34 None => Err(format!("no expression found in input: '{s}'")),
35 }
36}
37
38#[derive(Debug)]
39enum State {
40 Start,
41 End,
42 Lexeme(TokenId),
43 Identifier,
44 KeywordOrIdentifier,
45 NumberOrDuration,
46 InsideBrackets,
47 InsideBraces,
48 LineComment,
49 Space,
50 String(char), Escape(char), Err(String),
53}
54
55#[derive(Debug)]
56struct Context {
57 chars: Vec<char>,
59 idx: usize, start: usize, pos: usize, paren_depth: usize, brace_open: bool, bracket_open: bool, got_colon: bool, eof: bool, }
69
70impl Context {
71 fn new(input: &str) -> Context {
72 Self {
73 chars: input.chars().collect(),
74 idx: 0,
75 start: 0,
76 pos: 0,
77
78 paren_depth: 0,
79 brace_open: false,
80 bracket_open: false,
81 got_colon: false,
82 eof: false,
83 }
84 }
85
86 fn pop(&mut self) -> Option<char> {
88 let ch = self.peek()?;
89 self.pos += ch.len_utf8();
90 self.idx += 1;
91 Some(ch)
92 }
93
94 fn backup(&mut self) -> bool {
97 if let Some(ch) = self.chars.get(self.idx - 1) {
98 self.pos -= ch.len_utf8();
99 self.idx -= 1;
100 return true;
101 };
102 false
103 }
104
105 fn peek(&self) -> Option<char> {
107 self.chars.get(self.idx).copied()
108 }
109
110 fn lexeme(&mut self, token_id: TokenId) -> LexemeType {
112 let mut start = self.start;
113 let mut len = self.pos - self.start;
114 if token_id == T_STRING {
115 start += 1;
116 len -= 2;
117 }
118 DefaultLexeme::new(token_id, start, len)
119 }
120
121 fn ignore(&mut self) {
123 self.start = self.pos;
124 }
125
126 fn lexeme_string(&self) -> String {
128 let mut s = String::from("");
129 if self.idx == 0 {
130 return s;
131 }
132
133 let mut pos = self.pos;
134 let mut idx = self.idx;
135 while pos > self.start {
136 if let Some(&ch) = self.chars.get(idx - 1) {
137 pos -= ch.len_utf8();
138 idx -= 1;
139 s.push(ch);
140 };
141 }
142 s.chars().rev().collect()
143 }
144}
145
146#[derive(Debug)]
147struct Lexer {
148 state: State,
149 ctx: Context,
150}
151
152impl Lexer {
154 fn new(input: &str) -> Self {
155 let ctx = Context::new(input);
156 let state = State::Start;
157 Self { state, ctx }
158 }
159
160 fn is_inside_braces(&self) -> bool {
161 self.ctx.brace_open
162 }
163
164 fn jump_outof_braces(&mut self) {
165 self.ctx.brace_open = false;
166 }
167
168 fn dive_into_braces(&mut self) {
169 self.ctx.brace_open = true;
170 }
171
172 fn is_inside_brackets(&self) -> bool {
173 self.ctx.bracket_open
174 }
175
176 fn jump_outof_brackets(&mut self) {
177 self.ctx.bracket_open = false;
178 }
179
180 fn dive_into_brackets(&mut self) {
181 self.ctx.bracket_open = true;
182 }
183
184 fn is_colon_scanned(&self) -> bool {
185 self.ctx.got_colon
186 }
187
188 fn set_colon_scanned(&mut self) {
189 self.ctx.got_colon = true;
190 }
191
192 fn reset_colon_scanned(&mut self) {
193 self.ctx.got_colon = false;
194 }
195
196 fn inc_paren_depth(&mut self) -> bool {
198 if self.ctx.paren_depth < usize::MAX {
199 self.ctx.paren_depth += 1;
200 return true;
201 }
202 false
203 }
204
205 fn dec_paren_depth(&mut self) -> bool {
207 if self.ctx.paren_depth >= 1 {
208 self.ctx.paren_depth -= 1;
209 return true;
210 }
211 false
212 }
213
214 fn is_paren_balanced(&self) -> bool {
215 self.ctx.paren_depth == 0
216 }
217
218 fn pop(&mut self) -> Option<char> {
219 self.ctx.pop()
220 }
221
222 fn backup(&mut self) -> bool {
223 self.ctx.backup()
224 }
225
226 fn peek(&self) -> Option<char> {
227 self.ctx.peek()
228 }
229
230 fn lexeme(&mut self, token_id: TokenId) -> LexemeType {
233 let lexeme = self.ctx.lexeme(token_id);
234 self.ctx.ignore();
235 lexeme
236 }
237
238 fn lexeme_string(&self) -> String {
239 self.ctx.lexeme_string()
240 }
241
242 fn ignore(&mut self) {
243 self.ctx.ignore();
244 }
245
246 fn is_eof(&self) -> bool {
247 self.ctx.eof
248 }
249
250 fn set_eof(&mut self) {
251 self.ctx.eof = true;
252 }
253}
254
255impl Lexer {
257 fn shift(&mut self) {
258 self.state = match self.state {
261 State::Start => self.start(),
262 State::End => State::Err("End state can not shift forward.".into()),
263 State::Lexeme(_) => State::Start,
264 State::String(ch) => self.accept_string(ch),
265 State::KeywordOrIdentifier => self.accept_keyword_or_identifier(),
266 State::Identifier => self.accept_identifier(),
267 State::NumberOrDuration => self.accept_number_or_duration(),
268 State::InsideBrackets => self.inside_brackets(),
269 State::InsideBraces => self.inside_braces(),
270 State::LineComment => self.ignore_comment_line(),
271 State::Escape(ch) => self.accept_escape(ch),
272 State::Space => self.ignore_space(),
273 State::Err(_) => State::End,
274 };
275 }
276
277 fn start(&mut self) -> State {
278 if self.is_inside_braces() {
279 return State::InsideBraces;
280 }
281
282 if self.is_inside_brackets() {
283 return State::InsideBrackets;
284 }
285
286 let c = match self.pop() {
287 None => {
288 if !self.is_paren_balanced() {
289 return State::Err("unclosed left parenthesis".into());
290 }
291
292 if !self.is_eof() {
293 self.set_eof();
294 return State::Lexeme(T_EOF);
295 }
296
297 return State::End;
298 }
299 Some(ch) => ch,
300 };
301
302 match c {
305 '#' => State::LineComment,
306 '@' => State::Lexeme(T_AT),
307 ',' => State::Lexeme(T_COMMA),
308 '*' => State::Lexeme(T_MUL),
309 '/' => State::Lexeme(T_DIV),
310 '%' => State::Lexeme(T_MOD),
311 '+' => State::Lexeme(T_ADD),
312 '-' => State::Lexeme(T_SUB),
313 '^' => State::Lexeme(T_POW),
314 '=' => match self.peek() {
315 Some('=') => {
316 self.pop();
317 State::Lexeme(T_EQLC)
318 }
319 Some('~') => State::Err("unexpected character after '=': '~'".into()),
321 _ => State::Lexeme(T_EQL),
322 },
323 '!' => match self.pop() {
324 Some('=') => State::Lexeme(T_NEQ),
325 Some(ch) => State::Err(format!("unexpected character after '!': '{ch}'")),
326 None => State::Err("'!' can not be at the end".into()),
327 },
328 '<' => match self.peek() {
329 Some('=') => {
330 self.pop();
331 State::Lexeme(T_LTE)
332 }
333 _ => State::Lexeme(T_LSS),
334 },
335 '>' => match self.peek() {
336 Some('=') => {
337 self.pop();
338 State::Lexeme(T_GTE)
339 }
340 _ => State::Lexeme(T_GTR),
341 },
342 ch if ch.is_ascii_whitespace() => self.ignore_space(),
343 ch if ch.is_ascii_digit() => State::NumberOrDuration,
344 '.' => match self.peek() {
345 Some(ch) if ch.is_ascii_digit() => State::NumberOrDuration,
346 Some(ch) => State::Err(format!("unexpected character after '.': '{ch}'")),
347 None => State::Err("unexpected character: '.'".into()),
348 },
349 ch if is_alpha(ch) || ch == ':' => State::KeywordOrIdentifier,
350 ch if STRING_SYMBOLS.contains(ch) => State::String(ch),
351 '(' => {
352 if self.inc_paren_depth() {
353 return State::Lexeme(T_LEFT_PAREN);
354 }
355 State::Err("too many left parentheses".into())
356 }
357 ')' => {
358 if self.is_paren_balanced() {
359 return State::Err("unexpected right parenthesis ')'".into());
360 }
361 if self.dec_paren_depth() {
362 return State::Lexeme(T_RIGHT_PAREN);
363 }
364 State::Err("unexpected right parenthesis ')'".into())
365 }
366 '{' => {
367 self.dive_into_braces();
368 State::Lexeme(T_LEFT_BRACE)
369 }
370 '}' => State::Err("unexpected right brace '}'".into()),
372 '[' => {
373 self.reset_colon_scanned();
374 self.dive_into_brackets();
375 State::Lexeme(T_LEFT_BRACKET)
376 }
377 ']' => State::Err("unexpected right bracket ']'".into()),
379 ch => State::Err(format!("unexpected character: {ch:?}")),
380 }
381 }
382
383 fn accept_duration(&mut self) -> State {
385 self.backup();
386 self.scan_number();
387 if !self.accept_remaining_duration() {
388 self.pop(); return State::Err(format!("bad duration syntax: {}", self.lexeme_string()));
390 }
391 State::Lexeme(T_DURATION)
392 }
393
394 fn accept_number_or_duration(&mut self) -> State {
396 self.backup();
397 if self.scan_number() {
398 return State::Lexeme(T_NUMBER);
399 }
400
401 if self.accept_remaining_duration() {
403 return State::Lexeme(T_DURATION);
404 }
405
406 self.pop();
408 State::Err(format!(
409 "bad number or duration syntax: {}",
410 self.lexeme_string()
411 ))
412 }
413
414 fn accept_keyword_or_identifier(&mut self) -> State {
416 while let Some(ch) = self.peek() {
417 if is_alpha_numeric(ch) || ch == ':' {
418 self.pop();
419 } else {
420 break;
421 }
422 }
423
424 let s = self.lexeme_string();
425 match get_keyword_token(&s.to_lowercase()) {
426 Some(token_id) => State::Lexeme(token_id),
427 None if s.contains(':') => State::Lexeme(T_METRIC_IDENTIFIER),
428 _ => State::Lexeme(T_IDENTIFIER),
429 }
430 }
431
432 fn ignore_comment_line(&mut self) -> State {
434 while let Some(ch) = self.pop() {
435 if ch == '\r' || ch == '\n' {
436 break;
437 }
438 }
439 self.ignore();
440 State::Start
441 }
442
443 fn accept<F>(&mut self, f: F) -> bool
445 where
446 F: Fn(char) -> bool,
447 {
448 if let Some(ch) = self.peek() {
449 if f(ch) {
450 self.pop();
451 return true;
452 }
453 }
454 false
455 }
456
457 fn accept_run<F>(&mut self, f: F)
459 where
460 F: Fn(char) -> bool,
461 {
462 while let Some(ch) = self.peek() {
463 if f(ch) {
464 self.pop();
465 } else {
466 break;
467 }
468 }
469 }
470
471 fn ignore_space(&mut self) -> State {
473 self.backup(); self.accept_run(|ch| ch.is_ascii_whitespace());
475 self.ignore();
476 State::Start
477 }
478
479 fn scan_number(&mut self) -> bool {
482 let mut hex_digit = false;
483 if self.accept(|ch| ch == '0') && self.accept(|ch| ch == 'x' || ch == 'X') {
484 hex_digit = true;
485 }
486 let is_valid_digit = |ch: char| -> bool {
487 if hex_digit {
488 ch.is_ascii_hexdigit()
489 } else {
490 ch.is_ascii_digit()
491 }
492 };
493
494 self.accept_run(is_valid_digit);
495 if self.accept(|ch| ch == '.') {
496 self.accept_run(is_valid_digit);
497 }
498 if self.accept(|ch| ch == 'e' || ch == 'E') {
499 self.accept(|ch| ch == '+' || ch == '-');
500 self.accept_run(|ch| ch.is_ascii_digit());
501 }
502
503 !matches!(self.peek(), Some(ch) if is_alpha(ch) || ch == '.')
507 }
508
509 fn accept_remaining_duration(&mut self) -> bool {
512 if !self.accept(|ch| "smhdwy".contains(ch)) {
514 return false;
515 }
516 self.accept(|ch| ch == 's');
519
520 while self.accept(|ch| ch.is_ascii_digit()) {
522 self.accept_run(|ch| ch.is_ascii_digit());
523 if !self.accept(|ch| "smhdw".contains(ch)) {
525 return false;
526 }
527 self.accept(|ch| ch == 's');
530 }
531
532 !matches!(self.peek(), Some(ch) if is_alpha_numeric(ch))
533 }
534
535 fn accept_escape(&mut self, symbol: char) -> State {
539 match self.pop() {
540 Some(ch) if ch == symbol || ESCAPE_SYMBOLS.contains(ch) => State::String(symbol),
541 Some(ch) => State::Err(format!("unknown escape sequence '{ch}'")),
542 None => State::Err("escape sequence not terminated".into()),
543 }
544 }
545
546 fn accept_string(&mut self, symbol: char) -> State {
548 while let Some(ch) = self.pop() {
549 if ch == '\\' {
550 return State::Escape(symbol);
551 }
552
553 if ch == symbol {
554 return State::Lexeme(T_STRING);
555 }
556 }
557
558 State::Err(format!("unterminated quoted string {symbol}"))
559 }
560
561 fn inside_braces(&mut self) -> State {
564 match self.pop() {
565 Some('#') => State::LineComment,
566 Some(',') => State::Lexeme(T_COMMA),
567 Some(ch) if ch.is_ascii_whitespace() => State::Space,
568 Some(ch) if is_alpha(ch) => State::Identifier,
569 Some(ch) if STRING_SYMBOLS.contains(ch) => State::String(ch),
570 Some('=') => match self.peek() {
571 Some('~') => {
572 self.pop();
573 State::Lexeme(T_EQL_REGEX)
574 }
575 _ => State::Lexeme(T_EQL),
576 },
577 Some('!') => match self.pop() {
578 Some('~') => State::Lexeme(T_NEQ_REGEX),
579 Some('=') => State::Lexeme(T_NEQ),
580 Some(ch) => State::Err(format!(
581 "unexpected character after '!' inside braces: '{ch}'"
582 )),
583 None => State::Err("'!' can not be at the end".into()),
584 },
585 Some('{') => State::Err("unexpected left brace '{' inside braces".into()),
586 Some('}') => {
587 self.jump_outof_braces();
588 State::Lexeme(T_RIGHT_BRACE)
589 }
590 Some(ch) => State::Err(format!("unexpected character inside braces: '{ch}'")),
591 None => State::Err("unexpected end of input inside braces".into()),
592 }
593 }
594
595 fn last_char_matches<F>(&mut self, f: F) -> bool
597 where
598 F: Fn(char) -> bool,
599 {
600 if !self.backup() {
602 return false;
603 }
604 let matched = matches!(self.peek(), Some(ch) if f(ch));
605 self.pop();
606 matched
607 }
608
609 fn is_colon_the_first_char_in_brackets(&mut self) -> bool {
611 self.backup();
613 let matched = self.last_char_matches(|ch| ch == '[');
614 self.pop();
615 matched
616 }
617
618 fn inside_brackets(&mut self) -> State {
620 match self.pop() {
621 Some(ch) if ch.is_ascii_whitespace() => State::Space,
622 Some(':') => {
623 if self.is_colon_scanned() {
624 return State::Err("unexpected second colon(:) in brackets".into());
625 }
626
627 if self.is_colon_the_first_char_in_brackets() {
628 return State::Err("expect duration before first colon(:) in brackets".into());
629 }
630
631 self.set_colon_scanned();
632 State::Lexeme(T_COLON)
633 }
634 Some(ch) if ch.is_ascii_digit() => self.accept_duration(),
635 Some(']') => {
636 self.jump_outof_brackets();
637 self.reset_colon_scanned();
638 State::Lexeme(T_RIGHT_BRACKET)
639 }
640 Some('[') => State::Err("unexpected left brace '[' inside brackets".into()),
641 Some(ch) => State::Err(format!("unexpected character inside brackets: '{ch}'")),
642 None => State::Err("unexpected end of input inside brackets".into()),
643 }
644 }
645
646 fn accept_identifier(&mut self) -> State {
649 self.accept_run(is_alpha_numeric);
650 State::Lexeme(T_IDENTIFIER)
651 }
652}
653
654impl Iterator for Lexer {
656 type Item = Result<LexemeType, String>;
657
658 fn next(&mut self) -> Option<Self::Item> {
659 self.shift();
660 match &self.state {
661 State::Lexeme(token_id) => Some(Ok(self.lexeme(*token_id))),
662 State::Err(info) => Some(Err(info.clone())),
663 State::End => None,
664 _ => self.next(),
665 }
666 }
667}
668
669fn is_alpha_numeric(ch: char) -> bool {
670 is_alpha(ch) || ch.is_ascii_digit()
671}
672
673fn is_alpha(ch: char) -> bool {
674 ch == '_' || ch.is_ascii_alphabetic()
675}
676
677pub(crate) fn is_label(s: &str) -> bool {
678 if s.is_empty() {
679 return false;
680 }
681 let mut chars = s.chars();
682 match chars.next() {
683 None => false,
684 Some(ch) if !is_alpha(ch) => false,
685 Some(_) => {
686 for ch in chars {
687 if !is_alpha_numeric(ch) {
688 return false;
689 }
690 }
691 true
692 }
693 }
694}
695
696#[cfg(test)]
697mod tests {
698 use super::*;
699
700 type LexemeTuple = (TokenId, usize, usize);
701 type MatchTuple = (&'static str, Vec<LexemeTuple>, Option<&'static str>);
705
706 type Case = (
707 &'static str,
708 Vec<Result<LexemeType, String>>,
709 Vec<Result<LexemeType, String>>,
710 );
711
712 fn assert_matches(v: Vec<MatchTuple>) {
713 let cases: Vec<Case> = v
714 .into_iter()
715 .map(|(input, lexemes, err)| {
716 let mut expected: Vec<Result<LexemeType, String>> = lexemes
717 .into_iter()
718 .map(|(token_id, start, len)| Ok(LexemeType::new(token_id, start, len)))
719 .collect();
720
721 if let Some(s) = err {
722 expected.push(Err(s.to_string()));
723 }
724
725 let actual: Vec<Result<LexemeType, String>> = Lexer::new(input)
726 .filter(|r| !matches!(r, Ok(l) if l.tok_id() == T_EOF))
728 .collect();
729 (input, expected, actual)
730 })
731 .collect();
732
733 for (input, expected, actual) in cases.iter() {
734 assert_eq!(expected, actual, "\n<input>: {}", input);
735 }
736 }
737
738 #[test]
739 fn test_common() {
740 let cases = vec![
741 (",", vec![(T_COMMA, 0, 1)], None),
742 (
743 "()",
744 vec![(T_LEFT_PAREN, 0, 1), (T_RIGHT_PAREN, 1, 1)],
745 None,
746 ),
747 (
748 "{}",
749 vec![(T_LEFT_BRACE, 0, 1), (T_RIGHT_BRACE, 1, 1)],
750 None,
751 ),
752 (
753 "[5m]",
754 vec![
755 (T_LEFT_BRACKET, 0, 1),
756 (T_DURATION, 1, 2),
757 (T_RIGHT_BRACKET, 3, 1),
758 ],
759 None,
760 ),
761 (
762 "[ 5m]",
763 vec![
764 (T_LEFT_BRACKET, 0, 1),
765 (T_DURATION, 2, 2),
766 (T_RIGHT_BRACKET, 4, 1),
767 ],
768 None,
769 ),
770 (
771 "[ 5m]",
772 vec![
773 (T_LEFT_BRACKET, 0, 1),
774 (T_DURATION, 3, 2),
775 (T_RIGHT_BRACKET, 5, 1),
776 ],
777 None,
778 ),
779 (
780 "[ 5m ]",
781 vec![
782 (T_LEFT_BRACKET, 0, 1),
783 (T_DURATION, 3, 2),
784 (T_RIGHT_BRACKET, 6, 1),
785 ],
786 None,
787 ),
788 ("\r\n\r", vec![], None),
789 ];
790
791 assert_matches(cases);
792 }
793
794 #[test]
795 fn test_numbers() {
796 let cases = vec![
797 ("1", vec![(T_NUMBER, 0, 1)], None),
798 ("4.23", vec![(T_NUMBER, 0, 4)], None),
799 (".3", vec![(T_NUMBER, 0, 2)], None),
800 ("5.", vec![(T_NUMBER, 0, 2)], None),
801 ("NaN", vec![(T_NUMBER, 0, 3)], None),
802 ("nAN", vec![(T_NUMBER, 0, 3)], None),
803 ("NaN 123", vec![(T_NUMBER, 0, 3), (T_NUMBER, 4, 3)], None),
804 ("NaN123", vec![(T_IDENTIFIER, 0, 6)], None),
805 ("iNf", vec![(T_NUMBER, 0, 3)], None),
806 ("Inf", vec![(T_NUMBER, 0, 3)], None),
807 ("+Inf", vec![(T_ADD, 0, 1), (T_NUMBER, 1, 3)], None),
808 (
809 "+Inf 123",
810 vec![(T_ADD, 0, 1), (T_NUMBER, 1, 3), (T_NUMBER, 5, 3)],
811 None,
812 ),
813 (
814 "-Inf 123",
815 vec![(T_SUB, 0, 1), (T_NUMBER, 1, 3), (T_NUMBER, 5, 3)],
816 None,
817 ),
818 ("Infoo", vec![(T_IDENTIFIER, 0, 5)], None),
819 ("-Inf123", vec![(T_SUB, 0, 1), (T_IDENTIFIER, 1, 6)], None),
820 (
821 "-Inf 123",
822 vec![(T_SUB, 0, 1), (T_NUMBER, 1, 3), (T_NUMBER, 5, 3)],
823 None,
824 ),
825 ("0x123", vec![(T_NUMBER, 0, 5)], None),
826 ];
827 assert_matches(cases);
828 }
829
830 #[test]
831 fn test_strings() {
832 let cases = vec![
833 ("\"test\\tsequence\"", vec![(T_STRING, 1, 14)], None),
834 ("\"test\\\\.expression\"", vec![(T_STRING, 1, 17)], None),
835 (
836 "\"test\\.expression\"",
837 vec![],
838 Some("unknown escape sequence '.'"),
839 ),
840 (
841 "`test\\.expression`",
842 vec![],
843 Some("unknown escape sequence '.'"),
844 ),
845 (".٩", vec![], Some("unexpected character after '.': '٩'")),
846 ];
850 assert_matches(cases);
851 }
852
853 #[test]
854 fn test_durations() {
855 let cases = vec![
856 ("5s", vec![(T_DURATION, 0, 2)], None),
857 ("123m", vec![(T_DURATION, 0, 4)], None),
858 ("1h", vec![(T_DURATION, 0, 2)], None),
859 ("3w", vec![(T_DURATION, 0, 2)], None),
860 ("1y", vec![(T_DURATION, 0, 2)], None),
861 ];
862 assert_matches(cases);
863 }
864
865 #[test]
866 fn test_identifiers() {
867 let cases = vec![
868 ("abc", vec![(T_IDENTIFIER, 0, 3)], None),
869 ("a:bc", vec![(T_METRIC_IDENTIFIER, 0, 4)], None),
870 (
871 "abc d",
872 vec![(T_IDENTIFIER, 0, 3), (T_IDENTIFIER, 4, 1)],
873 None,
874 ),
875 (":bc", vec![(T_METRIC_IDENTIFIER, 0, 3)], None),
876 ("0a:bc", vec![], Some("bad number or duration syntax: 0a")),
877 ];
878 assert_matches(cases);
879 }
880
881 #[test]
882 fn test_comments() {
883 let cases = vec![
884 ("# some comment", vec![], None),
885 ("5 # 1+1\n5", vec![(T_NUMBER, 0, 1), (T_NUMBER, 8, 1)], None),
886 ];
887 assert_matches(cases);
888 }
889
890 #[test]
891 fn test_operators() {
892 let cases = vec![
893 ("=", vec![(T_EQL, 0, 1)], None),
894 (
895 "{=}",
896 vec![(T_LEFT_BRACE, 0, 1), (T_EQL, 1, 1), (T_RIGHT_BRACE, 2, 1)],
897 None,
898 ),
899 ("==", vec![(T_EQLC, 0, 2)], None),
900 ("!=", vec![(T_NEQ, 0, 2)], None),
901 ("<", vec![(T_LSS, 0, 1)], None),
902 (">", vec![(T_GTR, 0, 1)], None),
903 (">=", vec![(T_GTE, 0, 2)], None),
904 ("<=", vec![(T_LTE, 0, 2)], None),
905 ("+", vec![(T_ADD, 0, 1)], None),
906 ("-", vec![(T_SUB, 0, 1)], None),
907 ("*", vec![(T_MUL, 0, 1)], None),
908 ("/", vec![(T_DIV, 0, 1)], None),
909 ("^", vec![(T_POW, 0, 1)], None),
910 ("%", vec![(T_MOD, 0, 1)], None),
911 ("AND", vec![(T_LAND, 0, 3)], None),
912 ("or", vec![(T_LOR, 0, 2)], None),
913 ("unless", vec![(T_LUNLESS, 0, 6)], None),
914 ("@", vec![(T_AT, 0, 1)], None),
915 ];
916 assert_matches(cases);
917 }
918
919 #[test]
920 fn test_aggregators() {
921 let cases = vec![
922 ("sum", vec![(T_SUM, 0, 3)], None),
923 ("AVG", vec![(T_AVG, 0, 3)], None),
924 ("Max", vec![(T_MAX, 0, 3)], None),
925 ("min", vec![(T_MIN, 0, 3)], None),
926 ("count", vec![(T_COUNT, 0, 5)], None),
927 ("stdvar", vec![(T_STDVAR, 0, 6)], None),
928 ("stddev", vec![(T_STDDEV, 0, 6)], None),
929 ];
930 assert_matches(cases);
931 }
932
933 #[test]
934 fn test_keywords() {
935 let cases = vec![
936 ("offset", vec![(T_OFFSET, 0, 6)], None),
937 ("by", vec![(T_BY, 0, 2)], None),
938 ("without", vec![(T_WITHOUT, 0, 7)], None),
939 ("on", vec![(T_ON, 0, 2)], None),
940 ("ignoring", vec![(T_IGNORING, 0, 8)], None),
941 ("group_left", vec![(T_GROUP_LEFT, 0, 10)], None),
942 ("group_right", vec![(T_GROUP_RIGHT, 0, 11)], None),
943 ("bool", vec![(T_BOOL, 0, 4)], None),
944 ("atan2", vec![(T_ATAN2, 0, 5)], None),
945 ];
946 assert_matches(cases);
947 }
948
949 #[test]
950 fn test_preprocessors() {
951 let cases = vec![
952 ("start", vec![(T_START, 0, 5)], None),
953 ("end", vec![(T_END, 0, 3)], None),
954 ];
955 assert_matches(cases);
956 }
957
958 #[test]
959 fn test_selectors() {
960 let cases = vec![
961 ("北京", vec![], Some("unexpected character: '北'")),
962 ("北京='a'", vec![], Some("unexpected character: '北'")),
963 ("0a='a'", vec![], Some("bad number or duration syntax: 0a")),
964 (
965 "{foo='bar'}",
966 vec![
967 (T_LEFT_BRACE, 0, 1),
968 (T_IDENTIFIER, 1, 3),
969 (T_EQL, 4, 1),
970 (T_STRING, 6, 3),
971 (T_RIGHT_BRACE, 10, 1),
972 ],
973 None,
974 ),
975 (
976 r#"{foo="bar"}"#,
977 vec![
978 (T_LEFT_BRACE, 0, 1),
979 (T_IDENTIFIER, 1, 3),
980 (T_EQL, 4, 1),
981 (T_STRING, 6, 3),
982 (T_RIGHT_BRACE, 10, 1),
983 ],
984 None,
985 ),
986 (
987 r#"{foo="bar\"bar"}"#,
988 vec![
989 (T_LEFT_BRACE, 0, 1),
990 (T_IDENTIFIER, 1, 3),
991 (T_EQL, 4, 1),
992 (T_STRING, 6, 8),
993 (T_RIGHT_BRACE, 15, 1),
994 ],
995 None,
996 ),
997 (
998 r#"{NaN != "bar" }"#,
999 vec![
1000 (T_LEFT_BRACE, 0, 1),
1001 (T_IDENTIFIER, 1, 3),
1002 (T_NEQ, 5, 2),
1003 (T_STRING, 9, 3),
1004 (T_RIGHT_BRACE, 14, 1),
1005 ],
1006 None,
1007 ),
1008 (
1009 r#"{alert=~"bar" }"#,
1010 vec![
1011 (T_LEFT_BRACE, 0, 1),
1012 (T_IDENTIFIER, 1, 5),
1013 (T_EQL_REGEX, 6, 2),
1014 (T_STRING, 9, 3),
1015 (T_RIGHT_BRACE, 14, 1),
1016 ],
1017 None,
1018 ),
1019 (
1020 r#"{on!~"bar"}"#,
1021 vec![
1022 (T_LEFT_BRACE, 0, 1),
1023 (T_IDENTIFIER, 1, 2),
1024 (T_NEQ_REGEX, 3, 2),
1025 (T_STRING, 6, 3),
1026 (T_RIGHT_BRACE, 10, 1),
1027 ],
1028 None,
1029 ),
1030 (
1031 r#"{alert!#"bar"}"#,
1032 vec![(T_LEFT_BRACE, 0, 1), (T_IDENTIFIER, 1, 5)],
1033 Some("unexpected character after '!' inside braces: '#'"),
1034 ),
1035 (
1036 r#"{foo:a="bar"}"#,
1037 vec![(T_LEFT_BRACE, 0, 1), (T_IDENTIFIER, 1, 3)],
1038 Some("unexpected character inside braces: ':'"),
1039 ),
1040 ];
1041 assert_matches(cases);
1042 }
1043
1044 #[test]
1045 fn test_common_errors() {
1046 let cases = vec![
1047 ("=~", vec![], Some("unexpected character after '=': '~'")),
1048 ("!~", vec![], Some("unexpected character after '!': '~'")),
1049 ("!(", vec![], Some("unexpected character after '!': '('")),
1050 ("1a", vec![], Some("bad number or duration syntax: 1a")),
1051 ];
1052 assert_matches(cases);
1053 }
1054
1055 #[test]
1056 fn test_mismatched_parentheses() {
1057 let cases = vec![
1058 (
1059 "(",
1060 vec![(T_LEFT_PAREN, 0, 1)],
1061 Some("unclosed left parenthesis"),
1062 ),
1063 (")", vec![], Some("unexpected right parenthesis ')'")),
1064 (
1065 "())",
1066 vec![(T_LEFT_PAREN, 0, 1), (T_RIGHT_PAREN, 1, 1)],
1067 Some("unexpected right parenthesis ')'"),
1068 ),
1069 (
1070 "(()",
1071 vec![
1072 (T_LEFT_PAREN, 0, 1),
1073 (T_LEFT_PAREN, 1, 1),
1074 (T_RIGHT_PAREN, 2, 1),
1075 ],
1076 Some("unclosed left parenthesis"),
1077 ),
1078 (
1079 "{",
1080 vec![(T_LEFT_BRACE, 0, 1)],
1081 Some("unexpected end of input inside braces"),
1082 ),
1083 ("}", vec![], Some("unexpected right brace '}'")),
1084 (
1085 "{{",
1086 vec![(T_LEFT_BRACE, 0, 1)],
1087 Some("unexpected left brace '{' inside braces"),
1088 ),
1089 (
1090 "{{}}",
1091 vec![(T_LEFT_BRACE, 0, 1)],
1092 Some("unexpected left brace '{' inside braces"),
1093 ),
1094 (
1095 "[",
1096 vec![(T_LEFT_BRACKET, 0, 1)],
1097 Some("unexpected end of input inside brackets"),
1098 ),
1099 (
1100 "[[",
1101 vec![(T_LEFT_BRACKET, 0, 1)],
1102 Some("unexpected left brace '[' inside brackets"),
1103 ),
1104 (
1105 "[]]",
1106 vec![(T_LEFT_BRACKET, 0, 1), (T_RIGHT_BRACKET, 1, 1)],
1107 Some("unexpected right bracket ']'"),
1108 ),
1109 (
1110 "[[]]",
1111 vec![(T_LEFT_BRACKET, 0, 1)],
1112 Some("unexpected left brace '[' inside brackets"),
1113 ),
1114 ("]", vec![], Some("unexpected right bracket ']'")),
1115 ];
1116 assert_matches(cases);
1117 }
1118
1119 #[test]
1120 fn test_subqueries() {
1121 let cases = vec![
1122 (
1123 r#"test_name{on!~"bar"}[4m:4s]"#,
1124 vec![
1125 (T_IDENTIFIER, 0, 9),
1126 (T_LEFT_BRACE, 9, 1),
1127 (T_IDENTIFIER, 10, 2),
1128 (T_NEQ_REGEX, 12, 2),
1129 (T_STRING, 15, 3),
1130 (T_RIGHT_BRACE, 19, 1),
1131 (T_LEFT_BRACKET, 20, 1),
1132 (T_DURATION, 21, 2),
1133 (T_COLON, 23, 1),
1134 (T_DURATION, 24, 2),
1135 (T_RIGHT_BRACKET, 26, 1),
1136 ],
1137 None,
1138 ),
1139 (
1140 r#"test:name{on!~"bar"}[4m:4s]"#,
1141 vec![
1142 (T_METRIC_IDENTIFIER, 0, 9),
1143 (T_LEFT_BRACE, 9, 1),
1144 (T_IDENTIFIER, 10, 2),
1145 (T_NEQ_REGEX, 12, 2),
1146 (T_STRING, 15, 3),
1147 (T_RIGHT_BRACE, 19, 1),
1148 (T_LEFT_BRACKET, 20, 1),
1149 (T_DURATION, 21, 2),
1150 (T_COLON, 23, 1),
1151 (T_DURATION, 24, 2),
1152 (T_RIGHT_BRACKET, 26, 1),
1153 ],
1154 None,
1155 ),
1156 (
1157 r#"test:name{on!~"b:ar"}[4m:4s]"#,
1158 vec![
1159 (T_METRIC_IDENTIFIER, 0, 9),
1160 (T_LEFT_BRACE, 9, 1),
1161 (T_IDENTIFIER, 10, 2),
1162 (T_NEQ_REGEX, 12, 2),
1163 (T_STRING, 15, 4),
1164 (T_RIGHT_BRACE, 20, 1),
1165 (T_LEFT_BRACKET, 21, 1),
1166 (T_DURATION, 22, 2),
1167 (T_COLON, 24, 1),
1168 (T_DURATION, 25, 2),
1169 (T_RIGHT_BRACKET, 27, 1),
1170 ],
1171 None,
1172 ),
1173 (
1174 r#"test:name{on!~"b:ar"}[4m:]"#,
1175 vec![
1176 (T_METRIC_IDENTIFIER, 0, 9),
1177 (T_LEFT_BRACE, 9, 1),
1178 (T_IDENTIFIER, 10, 2),
1179 (T_NEQ_REGEX, 12, 2),
1180 (T_STRING, 15, 4),
1181 (T_RIGHT_BRACE, 20, 1),
1182 (T_LEFT_BRACKET, 21, 1),
1183 (T_DURATION, 22, 2),
1184 (T_COLON, 24, 1),
1185 (T_RIGHT_BRACKET, 25, 1),
1186 ],
1187 None,
1188 ),
1189 (
1190 r#"min_over_time(rate(foo{bar="baz"}[2s])[5m:])[4m:3s]"#,
1191 vec![
1192 (T_IDENTIFIER, 0, 13),
1193 (T_LEFT_PAREN, 13, 1),
1194 (T_IDENTIFIER, 14, 4),
1195 (T_LEFT_PAREN, 18, 1),
1196 (T_IDENTIFIER, 19, 3),
1197 (T_LEFT_BRACE, 22, 1),
1198 (T_IDENTIFIER, 23, 3),
1199 (T_EQL, 26, 1),
1200 (T_STRING, 28, 3),
1201 (T_RIGHT_BRACE, 32, 1),
1202 (T_LEFT_BRACKET, 33, 1),
1203 (T_DURATION, 34, 2),
1204 (T_RIGHT_BRACKET, 36, 1),
1205 (T_RIGHT_PAREN, 37, 1),
1206 (T_LEFT_BRACKET, 38, 1),
1207 (T_DURATION, 39, 2),
1208 (T_COLON, 41, 1),
1209 (T_RIGHT_BRACKET, 42, 1),
1210 (T_RIGHT_PAREN, 43, 1),
1211 (T_LEFT_BRACKET, 44, 1),
1212 (T_DURATION, 45, 2),
1213 (T_COLON, 47, 1),
1214 (T_DURATION, 48, 2),
1215 (T_RIGHT_BRACKET, 50, 1),
1216 ],
1217 None,
1218 ),
1219 (
1220 r#"test:name{on!~"b:ar"}[4m:4s] offset 10m"#,
1221 vec![
1222 (T_METRIC_IDENTIFIER, 0, 9),
1223 (T_LEFT_BRACE, 9, 1),
1224 (T_IDENTIFIER, 10, 2),
1225 (T_NEQ_REGEX, 12, 2),
1226 (T_STRING, 15, 4),
1227 (T_RIGHT_BRACE, 20, 1),
1228 (T_LEFT_BRACKET, 21, 1),
1229 (T_DURATION, 22, 2),
1230 (T_COLON, 24, 1),
1231 (T_DURATION, 25, 2),
1232 (T_RIGHT_BRACKET, 27, 1),
1233 (T_OFFSET, 29, 6),
1234 (T_DURATION, 36, 3),
1235 ],
1236 None,
1237 ),
1238 (
1239 r#"min_over_time(rate(foo{bar="baz"}[2s])[5m:] offset 6m)[4m:3s]"#,
1240 vec![
1241 (T_IDENTIFIER, 0, 13),
1242 (T_LEFT_PAREN, 13, 1),
1243 (T_IDENTIFIER, 14, 4),
1244 (T_LEFT_PAREN, 18, 1),
1245 (T_IDENTIFIER, 19, 3),
1246 (T_LEFT_BRACE, 22, 1),
1247 (T_IDENTIFIER, 23, 3),
1248 (T_EQL, 26, 1),
1249 (T_STRING, 28, 3),
1250 (T_RIGHT_BRACE, 32, 1),
1251 (T_LEFT_BRACKET, 33, 1),
1252 (T_DURATION, 34, 2),
1253 (T_RIGHT_BRACKET, 36, 1),
1254 (T_RIGHT_PAREN, 37, 1),
1255 (T_LEFT_BRACKET, 38, 1),
1256 (T_DURATION, 39, 2),
1257 (T_COLON, 41, 1),
1258 (T_RIGHT_BRACKET, 42, 1),
1259 (T_OFFSET, 44, 6),
1260 (T_DURATION, 51, 2),
1261 (T_RIGHT_PAREN, 53, 1),
1262 (T_LEFT_BRACKET, 54, 1),
1263 (T_DURATION, 55, 2),
1264 (T_COLON, 57, 1),
1265 (T_DURATION, 58, 2),
1266 (T_RIGHT_BRACKET, 60, 1),
1267 ],
1268 None,
1269 ),
1270 (
1271 r#"test:name[ 5m]"#,
1272 vec![
1273 (T_METRIC_IDENTIFIER, 0, 9),
1274 (T_LEFT_BRACKET, 9, 1),
1275 (T_DURATION, 11, 2),
1276 (T_RIGHT_BRACKET, 13, 1),
1277 ],
1278 None,
1279 ),
1280 (
1281 r#"test:name{o:n!~"bar"}[4m:4s]"#,
1282 vec![
1283 (T_METRIC_IDENTIFIER, 0, 9),
1284 (T_LEFT_BRACE, 9, 1),
1285 (T_IDENTIFIER, 10, 1),
1286 ],
1287 Some("unexpected character inside braces: ':'"),
1288 ),
1289 (
1290 r#"test:name{on!~"bar"}[4m:4s:4h]"#,
1291 vec![
1292 (T_METRIC_IDENTIFIER, 0, 9),
1293 (T_LEFT_BRACE, 9, 1),
1294 (T_IDENTIFIER, 10, 2),
1295 (T_NEQ_REGEX, 12, 2),
1296 (T_STRING, 15, 3),
1297 (T_RIGHT_BRACE, 19, 1),
1298 (T_LEFT_BRACKET, 20, 1),
1299 (T_DURATION, 21, 2),
1300 (T_COLON, 23, 1),
1301 (T_DURATION, 24, 2),
1302 ],
1303 Some("unexpected second colon(:) in brackets"),
1304 ),
1305 (
1306 r#"test:name{on!~"bar"}[4m:4s:]"#,
1307 vec![
1308 (T_METRIC_IDENTIFIER, 0, 9),
1309 (T_LEFT_BRACE, 9, 1),
1310 (T_IDENTIFIER, 10, 2),
1311 (T_NEQ_REGEX, 12, 2),
1312 (T_STRING, 15, 3),
1313 (T_RIGHT_BRACE, 19, 1),
1314 (T_LEFT_BRACKET, 20, 1),
1315 (T_DURATION, 21, 2),
1316 (T_COLON, 23, 1),
1317 (T_DURATION, 24, 2),
1318 ],
1319 Some("unexpected second colon(:) in brackets"),
1320 ),
1321 (
1322 r#"test:name{on!~"bar"}[4m::]"#,
1323 vec![
1324 (T_METRIC_IDENTIFIER, 0, 9),
1325 (T_LEFT_BRACE, 9, 1),
1326 (T_IDENTIFIER, 10, 2),
1327 (T_NEQ_REGEX, 12, 2),
1328 (T_STRING, 15, 3),
1329 (T_RIGHT_BRACE, 19, 1),
1330 (T_LEFT_BRACKET, 20, 1),
1331 (T_DURATION, 21, 2),
1332 (T_COLON, 23, 1),
1333 ],
1334 Some("unexpected second colon(:) in brackets"),
1335 ),
1336 (
1337 r#"test:name{on!~"bar"}[:4s]"#,
1338 vec![
1339 (T_METRIC_IDENTIFIER, 0, 9),
1340 (T_LEFT_BRACE, 9, 1),
1341 (T_IDENTIFIER, 10, 2),
1342 (T_NEQ_REGEX, 12, 2),
1343 (T_STRING, 15, 3),
1344 (T_RIGHT_BRACE, 19, 1),
1345 (T_LEFT_BRACKET, 20, 1),
1346 ],
1347 Some("expect duration before first colon(:) in brackets"),
1348 ),
1349 ];
1350 assert_matches(cases);
1351 }
1352
1353 #[test]
1354 fn test_is_alpha() {
1355 assert!(is_alpha('_'));
1356 assert!(is_alpha('a'));
1357 assert!(is_alpha('z'));
1358 assert!(is_alpha('A'));
1359 assert!(is_alpha('Z'));
1360 assert!(!is_alpha('-'));
1361 assert!(!is_alpha('@'));
1362 assert!(!is_alpha('0'));
1363 assert!(!is_alpha('9'));
1364 }
1365
1366 #[test]
1367 fn test_is_alpha_numeric() {
1368 assert!(is_alpha_numeric('_'));
1369 assert!(is_alpha_numeric('a'));
1370 assert!(is_alpha_numeric('z'));
1371 assert!(is_alpha_numeric('A'));
1372 assert!(is_alpha_numeric('Z'));
1373 assert!(is_alpha_numeric('0'));
1374 assert!(is_alpha_numeric('9'));
1375 assert!(!is_alpha_numeric('-'));
1376 assert!(!is_alpha_numeric('@'));
1377 }
1378
1379 #[test]
1380 fn test_is_label() {
1381 assert!(is_label("_"));
1382 assert!(is_label("_up"));
1383 assert!(is_label("up"));
1384 assert!(is_label("up_"));
1385 assert!(is_label("up_system_1"));
1386
1387 assert!(!is_label(""));
1388 assert!(!is_label("0"));
1389 assert!(!is_label("0up"));
1390 assert!(!is_label("0_up"));
1391 }
1392}