Skip to main content

varpulis_parser/
lexer.rs

1//! Lexer for VPL using Logos
2
3use logos::Logos;
4use std::fmt;
5
6/// Token type for VPL
7#[derive(Logos, Debug, Clone, PartialEq)]
8#[logos(skip r"[ \t\r\n\f]+")]
9#[logos(skip r"#[^\n]*")]
10#[logos(skip r"/\*([^*]|\*[^/])*\*/")]
11pub enum Token {
12    // === Keywords ===
13    /// `stream` keyword.
14    #[token("stream")]
15    Stream,
16    /// `event` keyword.
17    #[token("event")]
18    Event,
19    /// `type` keyword.
20    #[token("type")]
21    Type,
22    /// `let` keyword.
23    #[token("let")]
24    Let,
25    /// `var` keyword.
26    #[token("var")]
27    Var,
28    /// `const` keyword.
29    #[token("const")]
30    Const,
31    /// `fn` keyword.
32    #[token("fn")]
33    Fn,
34    /// `config` keyword.
35    #[token("config")]
36    Config,
37
38    /// `if` keyword.
39    #[token("if")]
40    If,
41    /// `else` keyword.
42    #[token("else")]
43    Else,
44    /// `elif` keyword.
45    #[token("elif")]
46    Elif,
47    /// `then` keyword.
48    #[token("then")]
49    Then,
50    /// `match` keyword.
51    #[token("match")]
52    Match,
53    /// `for` keyword.
54    #[token("for")]
55    For,
56    /// `while` keyword.
57    #[token("while")]
58    While,
59    /// `break` keyword.
60    #[token("break")]
61    Break,
62    /// `continue` keyword.
63    #[token("continue")]
64    Continue,
65    /// `return` keyword.
66    #[token("return")]
67    Return,
68
69    /// `from` keyword.
70    #[token("from")]
71    From,
72    /// `where` keyword.
73    #[token("where")]
74    Where,
75    /// `select` keyword.
76    #[token("select")]
77    Select,
78    /// `join` keyword.
79    #[token("join")]
80    Join,
81    /// `merge` keyword.
82    #[token("merge")]
83    Merge,
84    /// `window` keyword.
85    #[token("window")]
86    Window,
87    /// `aggregate` keyword.
88    #[token("aggregate")]
89    Aggregate,
90    /// `partition_by` keyword.
91    #[token("partition_by")]
92    PartitionBy,
93    /// `order_by` keyword.
94    #[token("order_by")]
95    OrderBy,
96    /// `limit` keyword.
97    #[token("limit")]
98    Limit,
99    /// `distinct` keyword.
100    #[token("distinct")]
101    Distinct,
102    /// `emit` keyword.
103    #[token("emit")]
104    Emit,
105    /// `to` keyword.
106    #[token("to")]
107    To,
108    /// `on` keyword.
109    #[token("on")]
110    On,
111    /// `all` keyword.
112    #[token("all")]
113    All,
114    /// `within` keyword.
115    #[token("within")]
116    Within,
117
118    /// `pattern` keyword.
119    #[token("pattern")]
120    Pattern,
121    // Note: Stream operation names (map, filter, etc.) are NOT keywords
122    // They are parsed contextually after '.' and can be used as identifiers
123    /// Boolean literal `true`.
124    #[token("true")]
125    True,
126    /// Boolean literal `false`.
127    #[token("false")]
128    False,
129    /// Null literal.
130    #[token("null")]
131    Null,
132
133    /// Logical `and` operator.
134    #[token("and")]
135    And,
136    /// Logical `or` operator.
137    #[token("or")]
138    Or,
139    /// Logical `xor` operator.
140    #[token("xor")]
141    Xor,
142    /// Logical `not` operator.
143    #[token("not")]
144    Not,
145    /// `in` keyword (membership test / for loops).
146    #[token("in")]
147    In,
148    /// `is` keyword (type check).
149    #[token("is")]
150    Is,
151
152    /// `as` keyword (alias / cast).
153    #[token("as")]
154    As,
155    /// `extends` keyword (event inheritance).
156    #[token("extends")]
157    Extends,
158    /// `import` keyword.
159    #[token("import")]
160    Import,
161    /// `export` keyword.
162    #[token("export")]
163    Export,
164
165    // Type keywords
166    /// `int` type keyword.
167    #[token("int")]
168    IntType,
169    /// `float` type keyword.
170    #[token("float")]
171    FloatType,
172    /// `bool` type keyword.
173    #[token("bool")]
174    BoolType,
175    /// `str` type keyword.
176    #[token("str")]
177    StrType,
178    /// `timestamp` type keyword.
179    #[token("timestamp")]
180    TimestampType,
181    /// `duration` type keyword.
182    #[token("duration")]
183    DurationType,
184    /// `Stream` type keyword.
185    #[token("Stream")]
186    StreamType,
187
188    // === Operators ===
189    /// `+` operator.
190    #[token("+")]
191    Plus,
192    /// `-` operator.
193    #[token("-")]
194    Minus,
195    /// `*` operator.
196    #[token("*")]
197    Star,
198    /// `/` operator.
199    #[token("/")]
200    Slash,
201    /// `%` operator (modulo).
202    #[token("%")]
203    Percent,
204    /// `**` operator (exponentiation).
205    #[token("**")]
206    DoubleStar,
207
208    /// `==` equality comparison.
209    #[token("==")]
210    EqEq,
211    /// `!=` inequality comparison.
212    #[token("!=")]
213    NotEq,
214    /// `<` less-than comparison.
215    #[token("<")]
216    Lt,
217    /// `<=` less-than-or-equal comparison.
218    #[token("<=")]
219    Le,
220    /// `>` greater-than comparison.
221    #[token(">")]
222    Gt,
223    /// `>=` greater-than-or-equal comparison.
224    #[token(">=")]
225    Ge,
226
227    /// `&` bitwise AND.
228    #[token("&")]
229    Amp,
230    /// `|` bitwise OR.
231    #[token("|")]
232    Pipe,
233    /// `^` bitwise XOR.
234    #[token("^")]
235    Caret,
236    /// `~` bitwise NOT.
237    #[token("~")]
238    Tilde,
239    /// `<<` left shift.
240    #[token("<<")]
241    Shl,
242    /// `>>` right shift.
243    #[token(">>")]
244    Shr,
245
246    /// `=` assignment.
247    #[token("=")]
248    Eq,
249    /// `+=` add-assign.
250    #[token("+=")]
251    PlusEq,
252    /// `-=` subtract-assign.
253    #[token("-=")]
254    MinusEq,
255    /// `*=` multiply-assign.
256    #[token("*=")]
257    StarEq,
258    /// `/=` divide-assign.
259    #[token("/=")]
260    SlashEq,
261    /// `%=` modulo-assign.
262    #[token("%=")]
263    PercentEq,
264
265    /// `.` member access.
266    #[token(".")]
267    Dot,
268    /// `?.` optional chaining.
269    #[token("?.")]
270    QuestionDot,
271    /// `??` null coalescing.
272    #[token("??")]
273    QuestionQuestion,
274    /// `=>` fat arrow (lambdas / match arms).
275    #[token("=>")]
276    FatArrow,
277    /// `->` thin arrow (return type annotation).
278    #[token("->")]
279    Arrow,
280    /// `..` exclusive range.
281    #[token("..")]
282    DotDot,
283    /// `..=` inclusive range.
284    #[token("..=")]
285    DotDotEq,
286    /// `$` dollar sign (special variable prefix).
287    #[token("$")]
288    Dollar,
289
290    // === Delimiters ===
291    /// `(` left parenthesis.
292    #[token("(")]
293    LParen,
294    /// `)` right parenthesis.
295    #[token(")")]
296    RParen,
297    /// `[` left bracket.
298    #[token("[")]
299    LBracket,
300    /// `]` right bracket.
301    #[token("]")]
302    RBracket,
303    /// `{` left brace.
304    #[token("{")]
305    LBrace,
306    /// `}` right brace.
307    #[token("}")]
308    RBrace,
309    /// `,` comma separator.
310    #[token(",")]
311    Comma,
312    /// `:` colon (type annotations, block starts).
313    #[token(":")]
314    Colon,
315    /// `?` question mark (ternary / optional).
316    #[token("?")]
317    Question,
318    /// `@` at sign (timestamp literal prefix / decorator).
319    #[token("@")]
320    At,
321
322    // === Literals ===
323    /// Integer literal (e.g., `42`).
324    #[regex(r"[0-9]+", |lex| lex.slice().parse::<i64>().ok())]
325    Integer(i64),
326
327    /// Floating-point literal (e.g., `3.14`, `1.0e10`).
328    #[regex(r"[0-9]+\.[0-9]+([eE][+-]?[0-9]+)?", |lex| lex.slice().parse::<f64>().ok())]
329    Float(f64),
330
331    /// String literal (double- or single-quoted).
332    #[regex(r#""([^"\\]|\\.)*""#, |lex| {
333        let s = lex.slice();
334        Some(s[1..s.len()-1].to_string())
335    })]
336    #[regex(r#"'([^'\\]|\\.)*'"#, |lex| {
337        let s = lex.slice();
338        Some(s[1..s.len()-1].to_string())
339    })]
340    String(String),
341
342    /// Duration literal (e.g., `5s`, `100ms`, `2h`).
343    #[regex(r"[0-9]+(ns|us|ms|s|m|h|d)", |lex| Some(lex.slice().to_string()))]
344    Duration(String),
345
346    /// Timestamp literal (e.g., `@2024-01-15T10:30:00Z`).
347    #[regex(r"@[0-9]{4}-[0-9]{2}-[0-9]{2}(T[0-9]{2}:[0-9]{2}:[0-9]{2}(Z|[+-][0-9]{2}:[0-9]{2})?)?", |lex| Some(lex.slice().to_string()))]
348    Timestamp(String),
349
350    // === Identifier ===
351    /// Identifier (e.g., variable name, event type).
352    #[regex(r"[a-zA-Z_][a-zA-Z0-9_]*", |lex| Some(lex.slice().to_string()))]
353    Ident(String),
354
355    // === Special ===
356    /// End-of-file sentinel token.
357    Eof,
358}
359
360impl fmt::Display for Token {
361    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
362        match self {
363            Self::Stream => write!(f, "stream"),
364            Self::Event => write!(f, "event"),
365            Self::Type => write!(f, "type"),
366            Self::Let => write!(f, "let"),
367            Self::Var => write!(f, "var"),
368            Self::Const => write!(f, "const"),
369            Self::Fn => write!(f, "fn"),
370            Self::Config => write!(f, "config"),
371            Self::If => write!(f, "if"),
372            Self::Else => write!(f, "else"),
373            Self::Elif => write!(f, "elif"),
374            Self::Then => write!(f, "then"),
375            Self::Match => write!(f, "match"),
376            Self::For => write!(f, "for"),
377            Self::While => write!(f, "while"),
378            Self::Break => write!(f, "break"),
379            Self::Continue => write!(f, "continue"),
380            Self::Return => write!(f, "return"),
381            Self::From => write!(f, "from"),
382            Self::Where => write!(f, "where"),
383            Self::Select => write!(f, "select"),
384            Self::Join => write!(f, "join"),
385            Self::Merge => write!(f, "merge"),
386            Self::Window => write!(f, "window"),
387            Self::Aggregate => write!(f, "aggregate"),
388            Self::PartitionBy => write!(f, "partition_by"),
389            Self::OrderBy => write!(f, "order_by"),
390            Self::Limit => write!(f, "limit"),
391            Self::Distinct => write!(f, "distinct"),
392            Self::Emit => write!(f, "emit"),
393            Self::To => write!(f, "to"),
394            Self::On => write!(f, "on"),
395            Self::All => write!(f, "all"),
396            Self::Within => write!(f, "within"),
397            Self::Pattern => write!(f, "pattern"),
398            Self::True => write!(f, "true"),
399            Self::False => write!(f, "false"),
400            Self::Null => write!(f, "null"),
401            Self::And => write!(f, "and"),
402            Self::Or => write!(f, "or"),
403            Self::Xor => write!(f, "xor"),
404            Self::Not => write!(f, "not"),
405            Self::In => write!(f, "in"),
406            Self::Is => write!(f, "is"),
407            Self::As => write!(f, "as"),
408            Self::Extends => write!(f, "extends"),
409            Self::Import => write!(f, "import"),
410            Self::Export => write!(f, "export"),
411            Self::IntType => write!(f, "int"),
412            Self::FloatType => write!(f, "float"),
413            Self::BoolType => write!(f, "bool"),
414            Self::StrType => write!(f, "str"),
415            Self::TimestampType => write!(f, "timestamp"),
416            Self::DurationType => write!(f, "duration"),
417            Self::StreamType => write!(f, "Stream"),
418            Self::Plus => write!(f, "+"),
419            Self::Minus => write!(f, "-"),
420            Self::Star => write!(f, "*"),
421            Self::Slash => write!(f, "/"),
422            Self::Percent => write!(f, "%"),
423            Self::DoubleStar => write!(f, "**"),
424            Self::EqEq => write!(f, "=="),
425            Self::NotEq => write!(f, "!="),
426            Self::Lt => write!(f, "<"),
427            Self::Le => write!(f, "<="),
428            Self::Gt => write!(f, ">"),
429            Self::Ge => write!(f, ">="),
430            Self::Amp => write!(f, "&"),
431            Self::Pipe => write!(f, "|"),
432            Self::Caret => write!(f, "^"),
433            Self::Tilde => write!(f, "~"),
434            Self::Shl => write!(f, "<<"),
435            Self::Shr => write!(f, ">>"),
436            Self::Eq => write!(f, "="),
437            Self::PlusEq => write!(f, "+="),
438            Self::MinusEq => write!(f, "-="),
439            Self::StarEq => write!(f, "*="),
440            Self::SlashEq => write!(f, "/="),
441            Self::PercentEq => write!(f, "%="),
442            Self::Dot => write!(f, "."),
443            Self::QuestionDot => write!(f, "?."),
444            Self::QuestionQuestion => write!(f, "??"),
445            Self::FatArrow => write!(f, "=>"),
446            Self::Arrow => write!(f, "->"),
447            Self::DotDot => write!(f, ".."),
448            Self::DotDotEq => write!(f, "..="),
449            Self::Dollar => write!(f, "$"),
450            Self::LParen => write!(f, "("),
451            Self::RParen => write!(f, ")"),
452            Self::LBracket => write!(f, "["),
453            Self::RBracket => write!(f, "]"),
454            Self::LBrace => write!(f, "{{"),
455            Self::RBrace => write!(f, "}}"),
456            Self::Comma => write!(f, ","),
457            Self::Colon => write!(f, ":"),
458            Self::Question => write!(f, "?"),
459            Self::At => write!(f, "@"),
460            Self::Integer(n) => write!(f, "{n}"),
461            Self::Float(n) => write!(f, "{n}"),
462            Self::String(s) => write!(f, "\"{s}\""),
463            Self::Duration(d) => write!(f, "{d}"),
464            Self::Timestamp(t) => write!(f, "{t}"),
465            Self::Ident(s) => write!(f, "{s}"),
466            Self::Eof => write!(f, "EOF"),
467        }
468    }
469}
470
471/// Spanned token with position information
472#[derive(Debug, Clone, PartialEq)]
473pub struct SpannedToken {
474    /// The token value.
475    pub token: Token,
476    /// Byte offset of the first character of this token.
477    pub start: usize,
478    /// Byte offset past the last character of this token.
479    pub end: usize,
480}
481
482/// Lexer wrapper that produces spanned tokens
483pub struct Lexer<'source> {
484    inner: logos::Lexer<'source, Token>,
485    peeked: Option<SpannedToken>,
486    eof_emitted: bool,
487}
488
489impl std::fmt::Debug for Lexer<'_> {
490    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
491        f.debug_struct("Lexer").finish_non_exhaustive()
492    }
493}
494
495impl<'source> Lexer<'source> {
496    /// Create a new lexer over the given source string.
497    pub fn new(source: &'source str) -> Self {
498        Self {
499            inner: Token::lexer(source),
500            peeked: None,
501            eof_emitted: false,
502        }
503    }
504
505    /// Peek at the next token without consuming it.
506    pub fn peek(&mut self) -> Option<&SpannedToken> {
507        if self.peeked.is_none() {
508            self.peeked = self.next_token();
509        }
510        self.peeked.as_ref()
511    }
512
513    fn next_token(&mut self) -> Option<SpannedToken> {
514        match self.inner.next() {
515            Some(Ok(token)) => {
516                let span = self.inner.span();
517                Some(SpannedToken {
518                    token,
519                    start: span.start,
520                    end: span.end,
521                })
522            }
523            Some(Err(())) => {
524                let span = self.inner.span();
525                Some(SpannedToken {
526                    token: Token::Ident(self.inner.slice().to_string()),
527                    start: span.start,
528                    end: span.end,
529                })
530            }
531            None if !self.eof_emitted => {
532                self.eof_emitted = true;
533                let pos = self.inner.span().end;
534                Some(SpannedToken {
535                    token: Token::Eof,
536                    start: pos,
537                    end: pos,
538                })
539            }
540            None => None,
541        }
542    }
543}
544
545impl Iterator for Lexer<'_> {
546    type Item = SpannedToken;
547
548    fn next(&mut self) -> Option<Self::Item> {
549        if let Some(peeked) = self.peeked.take() {
550            return Some(peeked);
551        }
552        self.next_token()
553    }
554}
555
556/// Tokenize a source string into a vector of spanned tokens
557pub fn tokenize(source: &str) -> Vec<SpannedToken> {
558    Lexer::new(source).collect()
559}
560
561#[cfg(test)]
562mod tests {
563    use super::*;
564
565    #[test]
566    fn test_keywords() {
567        let tokens: Vec<_> = tokenize("stream event let var const fn")
568            .into_iter()
569            .map(|t| t.token)
570            .collect();
571        assert_eq!(
572            tokens,
573            vec![
574                Token::Stream,
575                Token::Event,
576                Token::Let,
577                Token::Var,
578                Token::Const,
579                Token::Fn,
580                Token::Eof,
581            ]
582        );
583    }
584
585    #[test]
586    fn test_literals() {
587        let tokens: Vec<_> = tokenize("42 2.5 \"hello\" 5s true null")
588            .into_iter()
589            .map(|t| t.token)
590            .collect();
591        assert_eq!(
592            tokens,
593            vec![
594                Token::Integer(42),
595                Token::Float(2.5),
596                Token::String("hello".to_string()),
597                Token::Duration("5s".to_string()),
598                Token::True,
599                Token::Null,
600                Token::Eof,
601            ]
602        );
603    }
604
605    #[test]
606    fn test_operators() {
607        let tokens: Vec<_> = tokenize("+ - * / == != <= >=")
608            .into_iter()
609            .map(|t| t.token)
610            .collect();
611        assert_eq!(
612            tokens,
613            vec![
614                Token::Plus,
615                Token::Minus,
616                Token::Star,
617                Token::Slash,
618                Token::EqEq,
619                Token::NotEq,
620                Token::Le,
621                Token::Ge,
622                Token::Eof,
623            ]
624        );
625    }
626
627    #[test]
628    fn test_stream_decl() {
629        let tokens: Vec<_> = tokenize("stream Trades = TradeEvent")
630            .into_iter()
631            .map(|t| t.token)
632            .collect();
633        assert_eq!(
634            tokens,
635            vec![
636                Token::Stream,
637                Token::Ident("Trades".to_string()),
638                Token::Eq,
639                Token::Ident("TradeEvent".to_string()),
640                Token::Eof,
641            ]
642        );
643    }
644
645    #[test]
646    fn test_comments() {
647        let tokens: Vec<_> = tokenize("# comment\nstream /* inline */ Trades")
648            .into_iter()
649            .map(|t| t.token)
650            .collect();
651        assert_eq!(
652            tokens,
653            vec![
654                Token::Stream,
655                Token::Ident("Trades".to_string()),
656                Token::Eof,
657            ]
658        );
659    }
660
661    // ==========================================================================
662    // Additional Coverage Tests
663    // ==========================================================================
664
665    #[test]
666    fn test_more_keywords() {
667        let tokens: Vec<_> = tokenize("if else elif then match for while break continue return")
668            .into_iter()
669            .map(|t| t.token)
670            .collect();
671        assert!(tokens.contains(&Token::If));
672        assert!(tokens.contains(&Token::Else));
673        assert!(tokens.contains(&Token::Match));
674        assert!(tokens.contains(&Token::For));
675        assert!(tokens.contains(&Token::While));
676        assert!(tokens.contains(&Token::Break));
677        assert!(tokens.contains(&Token::Return));
678    }
679
680    #[test]
681    fn test_stream_keywords() {
682        let tokens: Vec<_> = tokenize("where select join merge window aggregate emit")
683            .into_iter()
684            .map(|t| t.token)
685            .collect();
686        assert!(tokens.contains(&Token::Where));
687        assert!(tokens.contains(&Token::Select));
688        assert!(tokens.contains(&Token::Join));
689        assert!(tokens.contains(&Token::Merge));
690        assert!(tokens.contains(&Token::Window));
691        assert!(tokens.contains(&Token::Aggregate));
692        assert!(tokens.contains(&Token::Emit));
693    }
694
695    #[test]
696    fn test_more_operators() {
697        let tokens: Vec<_> = tokenize("% ** < > & | ^ ~ << >> = += -= *= /= %=")
698            .into_iter()
699            .map(|t| t.token)
700            .collect();
701        assert!(tokens.contains(&Token::Percent));
702        assert!(tokens.contains(&Token::DoubleStar));
703        assert!(tokens.contains(&Token::Lt));
704        assert!(tokens.contains(&Token::Gt));
705        assert!(tokens.contains(&Token::Amp));
706        assert!(tokens.contains(&Token::Pipe));
707        assert!(tokens.contains(&Token::Caret));
708        assert!(tokens.contains(&Token::Tilde));
709        assert!(tokens.contains(&Token::Eq));
710        assert!(tokens.contains(&Token::PlusEq));
711    }
712
713    #[test]
714    fn test_delimiters() {
715        let tokens: Vec<_> = tokenize("( ) [ ] { } , : ? @")
716            .into_iter()
717            .map(|t| t.token)
718            .collect();
719        assert!(tokens.contains(&Token::LParen));
720        assert!(tokens.contains(&Token::RParen));
721        assert!(tokens.contains(&Token::LBracket));
722        assert!(tokens.contains(&Token::RBracket));
723        assert!(tokens.contains(&Token::LBrace));
724        assert!(tokens.contains(&Token::RBrace));
725        assert!(tokens.contains(&Token::Comma));
726        assert!(tokens.contains(&Token::Colon));
727        assert!(tokens.contains(&Token::Question));
728        assert!(tokens.contains(&Token::At));
729    }
730
731    #[test]
732    fn test_special_operators() {
733        let tokens: Vec<_> = tokenize(". ?. ?? => -> .. ..= $")
734            .into_iter()
735            .map(|t| t.token)
736            .collect();
737        assert!(tokens.contains(&Token::Dot));
738        assert!(tokens.contains(&Token::QuestionDot));
739        assert!(tokens.contains(&Token::QuestionQuestion));
740        assert!(tokens.contains(&Token::FatArrow));
741        assert!(tokens.contains(&Token::Arrow));
742        assert!(tokens.contains(&Token::DotDot));
743        assert!(tokens.contains(&Token::DotDotEq));
744        assert!(tokens.contains(&Token::Dollar));
745    }
746
747    #[test]
748    fn test_type_keywords() {
749        let tokens: Vec<_> = tokenize("int float bool str timestamp duration Stream")
750            .into_iter()
751            .map(|t| t.token)
752            .collect();
753        assert!(tokens.contains(&Token::IntType));
754        assert!(tokens.contains(&Token::FloatType));
755        assert!(tokens.contains(&Token::BoolType));
756        assert!(tokens.contains(&Token::StrType));
757        assert!(tokens.contains(&Token::TimestampType));
758        assert!(tokens.contains(&Token::DurationType));
759        assert!(tokens.contains(&Token::StreamType));
760    }
761
762    #[test]
763    fn test_logical_keywords() {
764        let tokens: Vec<_> = tokenize("and or not in is as")
765            .into_iter()
766            .map(|t| t.token)
767            .collect();
768        assert!(tokens.contains(&Token::And));
769        assert!(tokens.contains(&Token::Or));
770        assert!(tokens.contains(&Token::Not));
771        assert!(tokens.contains(&Token::In));
772        assert!(tokens.contains(&Token::Is));
773        assert!(tokens.contains(&Token::As));
774    }
775
776    #[test]
777    fn test_duration_variants() {
778        let tokens: Vec<_> = tokenize("100ms 5s 10m 2h 1d")
779            .into_iter()
780            .map(|t| t.token)
781            .collect();
782        assert!(matches!(&tokens[0], Token::Duration(s) if s == "100ms"));
783        assert!(matches!(&tokens[1], Token::Duration(s) if s == "5s"));
784        assert!(matches!(&tokens[2], Token::Duration(s) if s == "10m"));
785        assert!(matches!(&tokens[3], Token::Duration(s) if s == "2h"));
786        assert!(matches!(&tokens[4], Token::Duration(s) if s == "1d"));
787    }
788
789    #[test]
790    fn test_string_escapes() {
791        let tokens: Vec<_> = tokenize(r#""hello\nworld" "tab\there""#)
792            .into_iter()
793            .map(|t| t.token)
794            .collect();
795        assert!(matches!(&tokens[0], Token::String(_)));
796        assert!(matches!(&tokens[1], Token::String(_)));
797    }
798
799    #[test]
800    fn test_lexer_peek() {
801        let mut lexer = Lexer::new("a b c");
802        assert_eq!(lexer.peek().unwrap().token, Token::Ident("a".to_string()));
803        assert_eq!(lexer.peek().unwrap().token, Token::Ident("a".to_string())); // Still 'a'
804        assert_eq!(lexer.next().unwrap().token, Token::Ident("a".to_string()));
805        assert_eq!(lexer.peek().unwrap().token, Token::Ident("b".to_string()));
806    }
807
808    #[test]
809    fn test_lexer_empty() {
810        let tokens: Vec<_> = tokenize("").into_iter().map(|t| t.token).collect();
811        assert_eq!(tokens, vec![Token::Eof]);
812    }
813
814    #[test]
815    fn test_token_display() {
816        assert_eq!(format!("{}", Token::Stream), "stream");
817        assert_eq!(format!("{}", Token::Plus), "+");
818        assert_eq!(format!("{}", Token::Integer(42)), "42");
819        assert_eq!(format!("{}", Token::Float(2.5)), "2.5");
820        assert_eq!(format!("{}", Token::String("test".to_string())), "\"test\"");
821        assert_eq!(format!("{}", Token::Ident("foo".to_string())), "foo");
822        assert_eq!(format!("{}", Token::Eof), "EOF");
823    }
824
825    #[test]
826    fn test_more_token_display() {
827        assert_eq!(format!("{}", Token::Event), "event");
828        assert_eq!(format!("{}", Token::Type), "type");
829        assert_eq!(format!("{}", Token::Config), "config");
830        assert_eq!(format!("{}", Token::Elif), "elif");
831        assert_eq!(format!("{}", Token::Then), "then");
832        assert_eq!(format!("{}", Token::Continue), "continue");
833        assert_eq!(format!("{}", Token::PartitionBy), "partition_by");
834        assert_eq!(format!("{}", Token::OrderBy), "order_by");
835        assert_eq!(format!("{}", Token::Limit), "limit");
836        assert_eq!(format!("{}", Token::Distinct), "distinct");
837        assert_eq!(format!("{}", Token::To), "to");
838        assert_eq!(format!("{}", Token::On), "on");
839        assert_eq!(format!("{}", Token::All), "all");
840        assert_eq!(format!("{}", Token::Within), "within");
841        assert_eq!(format!("{}", Token::Pattern), "pattern");
842        assert_eq!(format!("{}", Token::False), "false");
843        assert_eq!(format!("{}", Token::Extends), "extends");
844        assert_eq!(format!("{}", Token::Import), "import");
845        assert_eq!(format!("{}", Token::Export), "export");
846    }
847
848    #[test]
849    fn test_remaining_token_display() {
850        assert_eq!(format!("{}", Token::Shl), "<<");
851        assert_eq!(format!("{}", Token::Shr), ">>");
852        assert_eq!(format!("{}", Token::MinusEq), "-=");
853        assert_eq!(format!("{}", Token::StarEq), "*=");
854        assert_eq!(format!("{}", Token::SlashEq), "/=");
855        assert_eq!(format!("{}", Token::PercentEq), "%=");
856        assert_eq!(format!("{}", Token::LBrace), "{");
857        assert_eq!(format!("{}", Token::RBrace), "}");
858        assert_eq!(format!("{}", Token::Duration("5m".to_string())), "5m");
859        assert_eq!(
860            format!("{}", Token::Timestamp("2024-01-01".to_string())),
861            "2024-01-01"
862        );
863    }
864
865    #[test]
866    fn test_spanned_token_positions() {
867        let tokens: Vec<_> = tokenize("ab cd").into_iter().collect();
868        assert_eq!(tokens[0].start, 0);
869        assert_eq!(tokens[0].end, 2);
870        assert_eq!(tokens[1].start, 3);
871        assert_eq!(tokens[1].end, 5);
872    }
873
874    #[test]
875    fn test_special_chars_in_code() {
876        let tokens: Vec<_> = tokenize("a.b.c[0]").into_iter().map(|t| t.token).collect();
877        assert!(tokens.contains(&Token::Dot));
878        assert!(tokens.contains(&Token::LBracket));
879        assert!(tokens.contains(&Token::RBracket));
880    }
881
882    #[test]
883    fn test_negative_number() {
884        let tokens: Vec<_> = tokenize("-42 -2.5").into_iter().map(|t| t.token).collect();
885        assert!(tokens.contains(&Token::Minus));
886        assert!(tokens.contains(&Token::Integer(42)));
887        assert!(tokens.contains(&Token::Float(2.5)));
888    }
889}