Skip to main content

varpulis_parser/
lexer.rs

1//! Lexer for VPL using Logos
2
3use std::fmt;
4
5use logos::Logos;
6
7/// Token type for VPL
8#[derive(Logos, Debug, Clone, PartialEq)]
9#[logos(skip r"[ \t\r\n\f]+")]
10#[logos(skip(r"#[^\n]*", allow_greedy = true))]
11#[logos(skip r"/\*([^*]|\*[^/])*\*/")]
12pub enum Token {
13    // === Keywords ===
14    /// `stream` keyword.
15    #[token("stream")]
16    Stream,
17    /// `event` keyword.
18    #[token("event")]
19    Event,
20    /// `type` keyword.
21    #[token("type")]
22    Type,
23    /// `let` keyword.
24    #[token("let")]
25    Let,
26    /// `var` keyword.
27    #[token("var")]
28    Var,
29    /// `const` keyword.
30    #[token("const")]
31    Const,
32    /// `fn` keyword.
33    #[token("fn")]
34    Fn,
35    /// `config` keyword.
36    #[token("config")]
37    Config,
38
39    /// `if` keyword.
40    #[token("if")]
41    If,
42    /// `else` keyword.
43    #[token("else")]
44    Else,
45    /// `elif` keyword.
46    #[token("elif")]
47    Elif,
48    /// `then` keyword.
49    #[token("then")]
50    Then,
51    /// `match` keyword.
52    #[token("match")]
53    Match,
54    /// `for` keyword.
55    #[token("for")]
56    For,
57    /// `while` keyword.
58    #[token("while")]
59    While,
60    /// `break` keyword.
61    #[token("break")]
62    Break,
63    /// `continue` keyword.
64    #[token("continue")]
65    Continue,
66    /// `return` keyword.
67    #[token("return")]
68    Return,
69
70    /// `from` keyword.
71    #[token("from")]
72    From,
73    /// `where` keyword.
74    #[token("where")]
75    Where,
76    /// `select` keyword.
77    #[token("select")]
78    Select,
79    /// `join` keyword.
80    #[token("join")]
81    Join,
82    /// `merge` keyword.
83    #[token("merge")]
84    Merge,
85    /// `window` keyword.
86    #[token("window")]
87    Window,
88    /// `aggregate` keyword.
89    #[token("aggregate")]
90    Aggregate,
91    /// `partition_by` keyword.
92    #[token("partition_by")]
93    PartitionBy,
94    /// `order_by` keyword.
95    #[token("order_by")]
96    OrderBy,
97    /// `limit` keyword.
98    #[token("limit")]
99    Limit,
100    /// `distinct` keyword.
101    #[token("distinct")]
102    Distinct,
103    /// `emit` keyword.
104    #[token("emit")]
105    Emit,
106    /// `to` keyword.
107    #[token("to")]
108    To,
109    /// `on` keyword.
110    #[token("on")]
111    On,
112    /// `all` keyword.
113    #[token("all")]
114    All,
115    /// `within` keyword.
116    #[token("within")]
117    Within,
118
119    /// `pattern` keyword.
120    #[token("pattern")]
121    Pattern,
122    // Note: Stream operation names (map, filter, etc.) are NOT keywords
123    // They are parsed contextually after '.' and can be used as identifiers
124    /// Boolean literal `true`.
125    #[token("true")]
126    True,
127    /// Boolean literal `false`.
128    #[token("false")]
129    False,
130    /// Null literal.
131    #[token("null")]
132    Null,
133
134    /// Logical `and` operator.
135    #[token("and")]
136    And,
137    /// Logical `or` operator.
138    #[token("or")]
139    Or,
140    /// Logical `xor` operator.
141    #[token("xor")]
142    Xor,
143    /// Logical `not` operator.
144    #[token("not")]
145    Not,
146    /// `in` keyword (membership test / for loops).
147    #[token("in")]
148    In,
149    /// `is` keyword (type check).
150    #[token("is")]
151    Is,
152
153    /// `as` keyword (alias / cast).
154    #[token("as")]
155    As,
156    /// `extends` keyword (event inheritance).
157    #[token("extends")]
158    Extends,
159    /// `import` keyword.
160    #[token("import")]
161    Import,
162    /// `export` keyword.
163    #[token("export")]
164    Export,
165
166    // Type keywords
167    /// `int` type keyword.
168    #[token("int")]
169    IntType,
170    /// `float` type keyword.
171    #[token("float")]
172    FloatType,
173    /// `bool` type keyword.
174    #[token("bool")]
175    BoolType,
176    /// `str` type keyword.
177    #[token("str")]
178    StrType,
179    /// `timestamp` type keyword.
180    #[token("timestamp")]
181    TimestampType,
182    /// `duration` type keyword.
183    #[token("duration")]
184    DurationType,
185    /// `Stream` type keyword.
186    #[token("Stream")]
187    StreamType,
188
189    // === Operators ===
190    /// `+` operator.
191    #[token("+")]
192    Plus,
193    /// `-` operator.
194    #[token("-")]
195    Minus,
196    /// `*` operator.
197    #[token("*")]
198    Star,
199    /// `/` operator.
200    #[token("/")]
201    Slash,
202    /// `%` operator (modulo).
203    #[token("%")]
204    Percent,
205    /// `**` operator (exponentiation).
206    #[token("**")]
207    DoubleStar,
208
209    /// `==` equality comparison.
210    #[token("==")]
211    EqEq,
212    /// `!=` inequality comparison.
213    #[token("!=")]
214    NotEq,
215    /// `<` less-than comparison.
216    #[token("<")]
217    Lt,
218    /// `<=` less-than-or-equal comparison.
219    #[token("<=")]
220    Le,
221    /// `>` greater-than comparison.
222    #[token(">")]
223    Gt,
224    /// `>=` greater-than-or-equal comparison.
225    #[token(">=")]
226    Ge,
227
228    /// `&` bitwise AND.
229    #[token("&")]
230    Amp,
231    /// `|` bitwise OR.
232    #[token("|")]
233    Pipe,
234    /// `^` bitwise XOR.
235    #[token("^")]
236    Caret,
237    /// `~` bitwise NOT.
238    #[token("~")]
239    Tilde,
240    /// `<<` left shift.
241    #[token("<<")]
242    Shl,
243    /// `>>` right shift.
244    #[token(">>")]
245    Shr,
246
247    /// `=` assignment.
248    #[token("=")]
249    Eq,
250    /// `+=` add-assign.
251    #[token("+=")]
252    PlusEq,
253    /// `-=` subtract-assign.
254    #[token("-=")]
255    MinusEq,
256    /// `*=` multiply-assign.
257    #[token("*=")]
258    StarEq,
259    /// `/=` divide-assign.
260    #[token("/=")]
261    SlashEq,
262    /// `%=` modulo-assign.
263    #[token("%=")]
264    PercentEq,
265
266    /// `.` member access.
267    #[token(".")]
268    Dot,
269    /// `?.` optional chaining.
270    #[token("?.")]
271    QuestionDot,
272    /// `??` null coalescing.
273    #[token("??")]
274    QuestionQuestion,
275    /// `=>` fat arrow (lambdas / match arms).
276    #[token("=>")]
277    FatArrow,
278    /// `->` thin arrow (return type annotation).
279    #[token("->")]
280    Arrow,
281    /// `..` exclusive range.
282    #[token("..")]
283    DotDot,
284    /// `..=` inclusive range.
285    #[token("..=")]
286    DotDotEq,
287    /// `$` dollar sign (special variable prefix).
288    #[token("$")]
289    Dollar,
290
291    // === Delimiters ===
292    /// `(` left parenthesis.
293    #[token("(")]
294    LParen,
295    /// `)` right parenthesis.
296    #[token(")")]
297    RParen,
298    /// `[` left bracket.
299    #[token("[")]
300    LBracket,
301    /// `]` right bracket.
302    #[token("]")]
303    RBracket,
304    /// `{` left brace.
305    #[token("{")]
306    LBrace,
307    /// `}` right brace.
308    #[token("}")]
309    RBrace,
310    /// `,` comma separator.
311    #[token(",")]
312    Comma,
313    /// `:` colon (type annotations, block starts).
314    #[token(":")]
315    Colon,
316    /// `?` question mark (ternary / optional).
317    #[token("?")]
318    Question,
319    /// `@` at sign (timestamp literal prefix / decorator).
320    #[token("@")]
321    At,
322
323    // === Literals ===
324    /// Integer literal (e.g., `42`).
325    #[regex(r"[0-9]+", |lex| lex.slice().parse::<i64>().ok())]
326    Integer(i64),
327
328    /// Floating-point literal (e.g., `3.14`, `1.0e10`).
329    #[regex(r"[0-9]+\.[0-9]+([eE][+-]?[0-9]+)?", |lex| lex.slice().parse::<f64>().ok())]
330    Float(f64),
331
332    /// String literal (double- or single-quoted).
333    #[regex(r#""([^"\\]|\\.)*""#, |lex| {
334        let s = lex.slice();
335        Some(s[1..s.len()-1].to_string())
336    })]
337    #[regex(r#"'([^'\\]|\\.)*'"#, |lex| {
338        let s = lex.slice();
339        Some(s[1..s.len()-1].to_string())
340    })]
341    String(String),
342
343    /// Duration literal (e.g., `5s`, `100ms`, `2h`).
344    #[regex(r"[0-9]+(ns|us|ms|s|m|h|d)", |lex| Some(lex.slice().to_string()))]
345    Duration(String),
346
347    /// Timestamp literal (e.g., `@2024-01-15T10:30:00Z`).
348    #[regex(r"@[0-9]{4}-[0-9]{2}-[0-9]{2}(T[0-9]{2}:[0-9]{2}:[0-9]{2}(Z|[+-][0-9]{2}:[0-9]{2})?)?", |lex| Some(lex.slice().to_string()))]
349    Timestamp(String),
350
351    // === Identifier ===
352    /// Identifier (e.g., variable name, event type).
353    #[regex(r"[a-zA-Z_][a-zA-Z0-9_]*", |lex| Some(lex.slice().to_string()))]
354    Ident(String),
355
356    // === Special ===
357    /// End-of-file sentinel token.
358    Eof,
359}
360
361impl fmt::Display for Token {
362    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
363        match self {
364            Self::Stream => write!(f, "stream"),
365            Self::Event => write!(f, "event"),
366            Self::Type => write!(f, "type"),
367            Self::Let => write!(f, "let"),
368            Self::Var => write!(f, "var"),
369            Self::Const => write!(f, "const"),
370            Self::Fn => write!(f, "fn"),
371            Self::Config => write!(f, "config"),
372            Self::If => write!(f, "if"),
373            Self::Else => write!(f, "else"),
374            Self::Elif => write!(f, "elif"),
375            Self::Then => write!(f, "then"),
376            Self::Match => write!(f, "match"),
377            Self::For => write!(f, "for"),
378            Self::While => write!(f, "while"),
379            Self::Break => write!(f, "break"),
380            Self::Continue => write!(f, "continue"),
381            Self::Return => write!(f, "return"),
382            Self::From => write!(f, "from"),
383            Self::Where => write!(f, "where"),
384            Self::Select => write!(f, "select"),
385            Self::Join => write!(f, "join"),
386            Self::Merge => write!(f, "merge"),
387            Self::Window => write!(f, "window"),
388            Self::Aggregate => write!(f, "aggregate"),
389            Self::PartitionBy => write!(f, "partition_by"),
390            Self::OrderBy => write!(f, "order_by"),
391            Self::Limit => write!(f, "limit"),
392            Self::Distinct => write!(f, "distinct"),
393            Self::Emit => write!(f, "emit"),
394            Self::To => write!(f, "to"),
395            Self::On => write!(f, "on"),
396            Self::All => write!(f, "all"),
397            Self::Within => write!(f, "within"),
398            Self::Pattern => write!(f, "pattern"),
399            Self::True => write!(f, "true"),
400            Self::False => write!(f, "false"),
401            Self::Null => write!(f, "null"),
402            Self::And => write!(f, "and"),
403            Self::Or => write!(f, "or"),
404            Self::Xor => write!(f, "xor"),
405            Self::Not => write!(f, "not"),
406            Self::In => write!(f, "in"),
407            Self::Is => write!(f, "is"),
408            Self::As => write!(f, "as"),
409            Self::Extends => write!(f, "extends"),
410            Self::Import => write!(f, "import"),
411            Self::Export => write!(f, "export"),
412            Self::IntType => write!(f, "int"),
413            Self::FloatType => write!(f, "float"),
414            Self::BoolType => write!(f, "bool"),
415            Self::StrType => write!(f, "str"),
416            Self::TimestampType => write!(f, "timestamp"),
417            Self::DurationType => write!(f, "duration"),
418            Self::StreamType => write!(f, "Stream"),
419            Self::Plus => write!(f, "+"),
420            Self::Minus => write!(f, "-"),
421            Self::Star => write!(f, "*"),
422            Self::Slash => write!(f, "/"),
423            Self::Percent => write!(f, "%"),
424            Self::DoubleStar => write!(f, "**"),
425            Self::EqEq => write!(f, "=="),
426            Self::NotEq => write!(f, "!="),
427            Self::Lt => write!(f, "<"),
428            Self::Le => write!(f, "<="),
429            Self::Gt => write!(f, ">"),
430            Self::Ge => write!(f, ">="),
431            Self::Amp => write!(f, "&"),
432            Self::Pipe => write!(f, "|"),
433            Self::Caret => write!(f, "^"),
434            Self::Tilde => write!(f, "~"),
435            Self::Shl => write!(f, "<<"),
436            Self::Shr => write!(f, ">>"),
437            Self::Eq => write!(f, "="),
438            Self::PlusEq => write!(f, "+="),
439            Self::MinusEq => write!(f, "-="),
440            Self::StarEq => write!(f, "*="),
441            Self::SlashEq => write!(f, "/="),
442            Self::PercentEq => write!(f, "%="),
443            Self::Dot => write!(f, "."),
444            Self::QuestionDot => write!(f, "?."),
445            Self::QuestionQuestion => write!(f, "??"),
446            Self::FatArrow => write!(f, "=>"),
447            Self::Arrow => write!(f, "->"),
448            Self::DotDot => write!(f, ".."),
449            Self::DotDotEq => write!(f, "..="),
450            Self::Dollar => write!(f, "$"),
451            Self::LParen => write!(f, "("),
452            Self::RParen => write!(f, ")"),
453            Self::LBracket => write!(f, "["),
454            Self::RBracket => write!(f, "]"),
455            Self::LBrace => write!(f, "{{"),
456            Self::RBrace => write!(f, "}}"),
457            Self::Comma => write!(f, ","),
458            Self::Colon => write!(f, ":"),
459            Self::Question => write!(f, "?"),
460            Self::At => write!(f, "@"),
461            Self::Integer(n) => write!(f, "{n}"),
462            Self::Float(n) => write!(f, "{n}"),
463            Self::String(s) => write!(f, "\"{s}\""),
464            Self::Duration(d) => write!(f, "{d}"),
465            Self::Timestamp(t) => write!(f, "{t}"),
466            Self::Ident(s) => write!(f, "{s}"),
467            Self::Eof => write!(f, "EOF"),
468        }
469    }
470}
471
472/// Spanned token with position information
473#[derive(Debug, Clone, PartialEq)]
474pub struct SpannedToken {
475    /// The token value.
476    pub token: Token,
477    /// Byte offset of the first character of this token.
478    pub start: usize,
479    /// Byte offset past the last character of this token.
480    pub end: usize,
481}
482
483/// Lexer wrapper that produces spanned tokens
484pub struct Lexer<'source> {
485    inner: logos::Lexer<'source, Token>,
486    peeked: Option<SpannedToken>,
487    eof_emitted: bool,
488}
489
490impl std::fmt::Debug for Lexer<'_> {
491    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
492        f.debug_struct("Lexer").finish_non_exhaustive()
493    }
494}
495
496impl<'source> Lexer<'source> {
497    /// Create a new lexer over the given source string.
498    pub fn new(source: &'source str) -> Self {
499        Self {
500            inner: Token::lexer(source),
501            peeked: None,
502            eof_emitted: false,
503        }
504    }
505
506    /// Peek at the next token without consuming it.
507    pub fn peek(&mut self) -> Option<&SpannedToken> {
508        if self.peeked.is_none() {
509            self.peeked = self.next_token();
510        }
511        self.peeked.as_ref()
512    }
513
514    fn next_token(&mut self) -> Option<SpannedToken> {
515        match self.inner.next() {
516            Some(Ok(token)) => {
517                let span = self.inner.span();
518                Some(SpannedToken {
519                    token,
520                    start: span.start,
521                    end: span.end,
522                })
523            }
524            Some(Err(())) => {
525                let span = self.inner.span();
526                Some(SpannedToken {
527                    token: Token::Ident(self.inner.slice().to_string()),
528                    start: span.start,
529                    end: span.end,
530                })
531            }
532            None if !self.eof_emitted => {
533                self.eof_emitted = true;
534                let pos = self.inner.span().end;
535                Some(SpannedToken {
536                    token: Token::Eof,
537                    start: pos,
538                    end: pos,
539                })
540            }
541            None => None,
542        }
543    }
544}
545
546impl Iterator for Lexer<'_> {
547    type Item = SpannedToken;
548
549    fn next(&mut self) -> Option<Self::Item> {
550        if let Some(peeked) = self.peeked.take() {
551            return Some(peeked);
552        }
553        self.next_token()
554    }
555}
556
557/// Tokenize a source string into a vector of spanned tokens
558pub fn tokenize(source: &str) -> Vec<SpannedToken> {
559    Lexer::new(source).collect()
560}
561
562#[cfg(test)]
563mod tests {
564    use super::*;
565
566    #[test]
567    fn test_keywords() {
568        let tokens: Vec<_> = tokenize("stream event let var const fn")
569            .into_iter()
570            .map(|t| t.token)
571            .collect();
572        assert_eq!(
573            tokens,
574            vec![
575                Token::Stream,
576                Token::Event,
577                Token::Let,
578                Token::Var,
579                Token::Const,
580                Token::Fn,
581                Token::Eof,
582            ]
583        );
584    }
585
586    #[test]
587    fn test_literals() {
588        let tokens: Vec<_> = tokenize("42 2.5 \"hello\" 5s true null")
589            .into_iter()
590            .map(|t| t.token)
591            .collect();
592        assert_eq!(
593            tokens,
594            vec![
595                Token::Integer(42),
596                Token::Float(2.5),
597                Token::String("hello".to_string()),
598                Token::Duration("5s".to_string()),
599                Token::True,
600                Token::Null,
601                Token::Eof,
602            ]
603        );
604    }
605
606    #[test]
607    fn test_operators() {
608        let tokens: Vec<_> = tokenize("+ - * / == != <= >=")
609            .into_iter()
610            .map(|t| t.token)
611            .collect();
612        assert_eq!(
613            tokens,
614            vec![
615                Token::Plus,
616                Token::Minus,
617                Token::Star,
618                Token::Slash,
619                Token::EqEq,
620                Token::NotEq,
621                Token::Le,
622                Token::Ge,
623                Token::Eof,
624            ]
625        );
626    }
627
628    #[test]
629    fn test_stream_decl() {
630        let tokens: Vec<_> = tokenize("stream Trades = TradeEvent")
631            .into_iter()
632            .map(|t| t.token)
633            .collect();
634        assert_eq!(
635            tokens,
636            vec![
637                Token::Stream,
638                Token::Ident("Trades".to_string()),
639                Token::Eq,
640                Token::Ident("TradeEvent".to_string()),
641                Token::Eof,
642            ]
643        );
644    }
645
646    #[test]
647    fn test_comments() {
648        let tokens: Vec<_> = tokenize("# comment\nstream /* inline */ Trades")
649            .into_iter()
650            .map(|t| t.token)
651            .collect();
652        assert_eq!(
653            tokens,
654            vec![
655                Token::Stream,
656                Token::Ident("Trades".to_string()),
657                Token::Eof,
658            ]
659        );
660    }
661
662    // ==========================================================================
663    // Additional Coverage Tests
664    // ==========================================================================
665
666    #[test]
667    fn test_more_keywords() {
668        let tokens: Vec<_> = tokenize("if else elif then match for while break continue return")
669            .into_iter()
670            .map(|t| t.token)
671            .collect();
672        assert!(tokens.contains(&Token::If));
673        assert!(tokens.contains(&Token::Else));
674        assert!(tokens.contains(&Token::Match));
675        assert!(tokens.contains(&Token::For));
676        assert!(tokens.contains(&Token::While));
677        assert!(tokens.contains(&Token::Break));
678        assert!(tokens.contains(&Token::Return));
679    }
680
681    #[test]
682    fn test_stream_keywords() {
683        let tokens: Vec<_> = tokenize("where select join merge window aggregate emit")
684            .into_iter()
685            .map(|t| t.token)
686            .collect();
687        assert!(tokens.contains(&Token::Where));
688        assert!(tokens.contains(&Token::Select));
689        assert!(tokens.contains(&Token::Join));
690        assert!(tokens.contains(&Token::Merge));
691        assert!(tokens.contains(&Token::Window));
692        assert!(tokens.contains(&Token::Aggregate));
693        assert!(tokens.contains(&Token::Emit));
694    }
695
696    #[test]
697    fn test_more_operators() {
698        let tokens: Vec<_> = tokenize("% ** < > & | ^ ~ << >> = += -= *= /= %=")
699            .into_iter()
700            .map(|t| t.token)
701            .collect();
702        assert!(tokens.contains(&Token::Percent));
703        assert!(tokens.contains(&Token::DoubleStar));
704        assert!(tokens.contains(&Token::Lt));
705        assert!(tokens.contains(&Token::Gt));
706        assert!(tokens.contains(&Token::Amp));
707        assert!(tokens.contains(&Token::Pipe));
708        assert!(tokens.contains(&Token::Caret));
709        assert!(tokens.contains(&Token::Tilde));
710        assert!(tokens.contains(&Token::Eq));
711        assert!(tokens.contains(&Token::PlusEq));
712    }
713
714    #[test]
715    fn test_delimiters() {
716        let tokens: Vec<_> = tokenize("( ) [ ] { } , : ? @")
717            .into_iter()
718            .map(|t| t.token)
719            .collect();
720        assert!(tokens.contains(&Token::LParen));
721        assert!(tokens.contains(&Token::RParen));
722        assert!(tokens.contains(&Token::LBracket));
723        assert!(tokens.contains(&Token::RBracket));
724        assert!(tokens.contains(&Token::LBrace));
725        assert!(tokens.contains(&Token::RBrace));
726        assert!(tokens.contains(&Token::Comma));
727        assert!(tokens.contains(&Token::Colon));
728        assert!(tokens.contains(&Token::Question));
729        assert!(tokens.contains(&Token::At));
730    }
731
732    #[test]
733    fn test_special_operators() {
734        let tokens: Vec<_> = tokenize(". ?. ?? => -> .. ..= $")
735            .into_iter()
736            .map(|t| t.token)
737            .collect();
738        assert!(tokens.contains(&Token::Dot));
739        assert!(tokens.contains(&Token::QuestionDot));
740        assert!(tokens.contains(&Token::QuestionQuestion));
741        assert!(tokens.contains(&Token::FatArrow));
742        assert!(tokens.contains(&Token::Arrow));
743        assert!(tokens.contains(&Token::DotDot));
744        assert!(tokens.contains(&Token::DotDotEq));
745        assert!(tokens.contains(&Token::Dollar));
746    }
747
748    #[test]
749    fn test_type_keywords() {
750        let tokens: Vec<_> = tokenize("int float bool str timestamp duration Stream")
751            .into_iter()
752            .map(|t| t.token)
753            .collect();
754        assert!(tokens.contains(&Token::IntType));
755        assert!(tokens.contains(&Token::FloatType));
756        assert!(tokens.contains(&Token::BoolType));
757        assert!(tokens.contains(&Token::StrType));
758        assert!(tokens.contains(&Token::TimestampType));
759        assert!(tokens.contains(&Token::DurationType));
760        assert!(tokens.contains(&Token::StreamType));
761    }
762
763    #[test]
764    fn test_logical_keywords() {
765        let tokens: Vec<_> = tokenize("and or not in is as")
766            .into_iter()
767            .map(|t| t.token)
768            .collect();
769        assert!(tokens.contains(&Token::And));
770        assert!(tokens.contains(&Token::Or));
771        assert!(tokens.contains(&Token::Not));
772        assert!(tokens.contains(&Token::In));
773        assert!(tokens.contains(&Token::Is));
774        assert!(tokens.contains(&Token::As));
775    }
776
777    #[test]
778    fn test_duration_variants() {
779        let tokens: Vec<_> = tokenize("100ms 5s 10m 2h 1d")
780            .into_iter()
781            .map(|t| t.token)
782            .collect();
783        assert!(matches!(&tokens[0], Token::Duration(s) if s == "100ms"));
784        assert!(matches!(&tokens[1], Token::Duration(s) if s == "5s"));
785        assert!(matches!(&tokens[2], Token::Duration(s) if s == "10m"));
786        assert!(matches!(&tokens[3], Token::Duration(s) if s == "2h"));
787        assert!(matches!(&tokens[4], Token::Duration(s) if s == "1d"));
788    }
789
790    #[test]
791    fn test_string_escapes() {
792        let tokens: Vec<_> = tokenize(r#""hello\nworld" "tab\there""#)
793            .into_iter()
794            .map(|t| t.token)
795            .collect();
796        assert!(matches!(&tokens[0], Token::String(_)));
797        assert!(matches!(&tokens[1], Token::String(_)));
798    }
799
800    #[test]
801    fn test_lexer_peek() {
802        let mut lexer = Lexer::new("a b c");
803        assert_eq!(lexer.peek().unwrap().token, Token::Ident("a".to_string()));
804        assert_eq!(lexer.peek().unwrap().token, Token::Ident("a".to_string())); // Still 'a'
805        assert_eq!(lexer.next().unwrap().token, Token::Ident("a".to_string()));
806        assert_eq!(lexer.peek().unwrap().token, Token::Ident("b".to_string()));
807    }
808
809    #[test]
810    fn test_lexer_empty() {
811        let tokens: Vec<_> = tokenize("").into_iter().map(|t| t.token).collect();
812        assert_eq!(tokens, vec![Token::Eof]);
813    }
814
815    #[test]
816    fn test_token_display() {
817        assert_eq!(format!("{}", Token::Stream), "stream");
818        assert_eq!(format!("{}", Token::Plus), "+");
819        assert_eq!(format!("{}", Token::Integer(42)), "42");
820        assert_eq!(format!("{}", Token::Float(2.5)), "2.5");
821        assert_eq!(format!("{}", Token::String("test".to_string())), "\"test\"");
822        assert_eq!(format!("{}", Token::Ident("foo".to_string())), "foo");
823        assert_eq!(format!("{}", Token::Eof), "EOF");
824    }
825
826    #[test]
827    fn test_more_token_display() {
828        assert_eq!(format!("{}", Token::Event), "event");
829        assert_eq!(format!("{}", Token::Type), "type");
830        assert_eq!(format!("{}", Token::Config), "config");
831        assert_eq!(format!("{}", Token::Elif), "elif");
832        assert_eq!(format!("{}", Token::Then), "then");
833        assert_eq!(format!("{}", Token::Continue), "continue");
834        assert_eq!(format!("{}", Token::PartitionBy), "partition_by");
835        assert_eq!(format!("{}", Token::OrderBy), "order_by");
836        assert_eq!(format!("{}", Token::Limit), "limit");
837        assert_eq!(format!("{}", Token::Distinct), "distinct");
838        assert_eq!(format!("{}", Token::To), "to");
839        assert_eq!(format!("{}", Token::On), "on");
840        assert_eq!(format!("{}", Token::All), "all");
841        assert_eq!(format!("{}", Token::Within), "within");
842        assert_eq!(format!("{}", Token::Pattern), "pattern");
843        assert_eq!(format!("{}", Token::False), "false");
844        assert_eq!(format!("{}", Token::Extends), "extends");
845        assert_eq!(format!("{}", Token::Import), "import");
846        assert_eq!(format!("{}", Token::Export), "export");
847    }
848
849    #[test]
850    fn test_remaining_token_display() {
851        assert_eq!(format!("{}", Token::Shl), "<<");
852        assert_eq!(format!("{}", Token::Shr), ">>");
853        assert_eq!(format!("{}", Token::MinusEq), "-=");
854        assert_eq!(format!("{}", Token::StarEq), "*=");
855        assert_eq!(format!("{}", Token::SlashEq), "/=");
856        assert_eq!(format!("{}", Token::PercentEq), "%=");
857        assert_eq!(format!("{}", Token::LBrace), "{");
858        assert_eq!(format!("{}", Token::RBrace), "}");
859        assert_eq!(format!("{}", Token::Duration("5m".to_string())), "5m");
860        assert_eq!(
861            format!("{}", Token::Timestamp("2024-01-01".to_string())),
862            "2024-01-01"
863        );
864    }
865
866    #[test]
867    fn test_spanned_token_positions() {
868        let tokens: Vec<_> = tokenize("ab cd").into_iter().collect();
869        assert_eq!(tokens[0].start, 0);
870        assert_eq!(tokens[0].end, 2);
871        assert_eq!(tokens[1].start, 3);
872        assert_eq!(tokens[1].end, 5);
873    }
874
875    #[test]
876    fn test_special_chars_in_code() {
877        let tokens: Vec<_> = tokenize("a.b.c[0]").into_iter().map(|t| t.token).collect();
878        assert!(tokens.contains(&Token::Dot));
879        assert!(tokens.contains(&Token::LBracket));
880        assert!(tokens.contains(&Token::RBracket));
881    }
882
883    #[test]
884    fn test_negative_number() {
885        let tokens: Vec<_> = tokenize("-42 -2.5").into_iter().map(|t| t.token).collect();
886        assert!(tokens.contains(&Token::Minus));
887        assert!(tokens.contains(&Token::Integer(42)));
888        assert!(tokens.contains(&Token::Float(2.5)));
889    }
890}