Skip to main content

varpulis_parser/
lexer.rs

1//! Lexer for VPL using Logos
2
3use logos::Logos;
4use std::fmt;
5
6/// Token type for VPL
7#[derive(Logos, Debug, Clone, PartialEq)]
8#[logos(skip r"[ \t\r\n\f]+")]
9#[logos(skip r"#[^\n]*")]
10#[logos(skip r"/\*([^*]|\*[^/])*\*/")]
11pub enum Token {
12    // === Keywords ===
13    #[token("stream")]
14    Stream,
15    #[token("event")]
16    Event,
17    #[token("type")]
18    Type,
19    #[token("let")]
20    Let,
21    #[token("var")]
22    Var,
23    #[token("const")]
24    Const,
25    #[token("fn")]
26    Fn,
27    #[token("config")]
28    Config,
29
30    #[token("if")]
31    If,
32    #[token("else")]
33    Else,
34    #[token("elif")]
35    Elif,
36    #[token("then")]
37    Then,
38    #[token("match")]
39    Match,
40    #[token("for")]
41    For,
42    #[token("while")]
43    While,
44    #[token("break")]
45    Break,
46    #[token("continue")]
47    Continue,
48    #[token("return")]
49    Return,
50
51    #[token("from")]
52    From,
53    #[token("where")]
54    Where,
55    #[token("select")]
56    Select,
57    #[token("join")]
58    Join,
59    #[token("merge")]
60    Merge,
61    #[token("window")]
62    Window,
63    #[token("aggregate")]
64    Aggregate,
65    #[token("partition_by")]
66    PartitionBy,
67    #[token("order_by")]
68    OrderBy,
69    #[token("limit")]
70    Limit,
71    #[token("distinct")]
72    Distinct,
73    #[token("emit")]
74    Emit,
75    #[token("to")]
76    To,
77    #[token("on")]
78    On,
79    #[token("all")]
80    All,
81    #[token("within")]
82    Within,
83
84    #[token("pattern")]
85    Pattern,
86    // Note: Stream operation names (map, filter, etc.) are NOT keywords
87    // They are parsed contextually after '.' and can be used as identifiers
88    #[token("true")]
89    True,
90    #[token("false")]
91    False,
92    #[token("null")]
93    Null,
94
95    #[token("and")]
96    And,
97    #[token("or")]
98    Or,
99    #[token("xor")]
100    Xor,
101    #[token("not")]
102    Not,
103    #[token("in")]
104    In,
105    #[token("is")]
106    Is,
107
108    #[token("as")]
109    As,
110    #[token("extends")]
111    Extends,
112    #[token("import")]
113    Import,
114    #[token("export")]
115    Export,
116
117    // Type keywords
118    #[token("int")]
119    IntType,
120    #[token("float")]
121    FloatType,
122    #[token("bool")]
123    BoolType,
124    #[token("str")]
125    StrType,
126    #[token("timestamp")]
127    TimestampType,
128    #[token("duration")]
129    DurationType,
130    #[token("Stream")]
131    StreamType,
132
133    // === Operators ===
134    #[token("+")]
135    Plus,
136    #[token("-")]
137    Minus,
138    #[token("*")]
139    Star,
140    #[token("/")]
141    Slash,
142    #[token("%")]
143    Percent,
144    #[token("**")]
145    DoubleStar,
146
147    #[token("==")]
148    EqEq,
149    #[token("!=")]
150    NotEq,
151    #[token("<")]
152    Lt,
153    #[token("<=")]
154    Le,
155    #[token(">")]
156    Gt,
157    #[token(">=")]
158    Ge,
159
160    #[token("&")]
161    Amp,
162    #[token("|")]
163    Pipe,
164    #[token("^")]
165    Caret,
166    #[token("~")]
167    Tilde,
168    #[token("<<")]
169    Shl,
170    #[token(">>")]
171    Shr,
172
173    #[token("=")]
174    Eq,
175    #[token("+=")]
176    PlusEq,
177    #[token("-=")]
178    MinusEq,
179    #[token("*=")]
180    StarEq,
181    #[token("/=")]
182    SlashEq,
183    #[token("%=")]
184    PercentEq,
185
186    #[token(".")]
187    Dot,
188    #[token("?.")]
189    QuestionDot,
190    #[token("??")]
191    QuestionQuestion,
192    #[token("=>")]
193    FatArrow,
194    #[token("->")]
195    Arrow,
196    #[token("..")]
197    DotDot,
198    #[token("..=")]
199    DotDotEq,
200    #[token("$")]
201    Dollar,
202
203    // === Delimiters ===
204    #[token("(")]
205    LParen,
206    #[token(")")]
207    RParen,
208    #[token("[")]
209    LBracket,
210    #[token("]")]
211    RBracket,
212    #[token("{")]
213    LBrace,
214    #[token("}")]
215    RBrace,
216    #[token(",")]
217    Comma,
218    #[token(":")]
219    Colon,
220    #[token("?")]
221    Question,
222    #[token("@")]
223    At,
224
225    // === Literals ===
226    #[regex(r"[0-9]+", |lex| lex.slice().parse::<i64>().ok())]
227    Integer(i64),
228
229    #[regex(r"[0-9]+\.[0-9]+([eE][+-]?[0-9]+)?", |lex| lex.slice().parse::<f64>().ok())]
230    Float(f64),
231
232    #[regex(r#""([^"\\]|\\.)*""#, |lex| {
233        let s = lex.slice();
234        Some(s[1..s.len()-1].to_string())
235    })]
236    #[regex(r#"'([^'\\]|\\.)*'"#, |lex| {
237        let s = lex.slice();
238        Some(s[1..s.len()-1].to_string())
239    })]
240    String(String),
241
242    #[regex(r"[0-9]+(ns|us|ms|s|m|h|d)", |lex| Some(lex.slice().to_string()))]
243    Duration(String),
244
245    #[regex(r"@[0-9]{4}-[0-9]{2}-[0-9]{2}(T[0-9]{2}:[0-9]{2}:[0-9]{2}(Z|[+-][0-9]{2}:[0-9]{2})?)?", |lex| Some(lex.slice().to_string()))]
246    Timestamp(String),
247
248    // === Identifier ===
249    #[regex(r"[a-zA-Z_][a-zA-Z0-9_]*", |lex| Some(lex.slice().to_string()))]
250    Ident(String),
251
252    // === Special ===
253    Eof,
254}
255
256impl fmt::Display for Token {
257    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
258        match self {
259            Token::Stream => write!(f, "stream"),
260            Token::Event => write!(f, "event"),
261            Token::Type => write!(f, "type"),
262            Token::Let => write!(f, "let"),
263            Token::Var => write!(f, "var"),
264            Token::Const => write!(f, "const"),
265            Token::Fn => write!(f, "fn"),
266            Token::Config => write!(f, "config"),
267            Token::If => write!(f, "if"),
268            Token::Else => write!(f, "else"),
269            Token::Elif => write!(f, "elif"),
270            Token::Then => write!(f, "then"),
271            Token::Match => write!(f, "match"),
272            Token::For => write!(f, "for"),
273            Token::While => write!(f, "while"),
274            Token::Break => write!(f, "break"),
275            Token::Continue => write!(f, "continue"),
276            Token::Return => write!(f, "return"),
277            Token::From => write!(f, "from"),
278            Token::Where => write!(f, "where"),
279            Token::Select => write!(f, "select"),
280            Token::Join => write!(f, "join"),
281            Token::Merge => write!(f, "merge"),
282            Token::Window => write!(f, "window"),
283            Token::Aggregate => write!(f, "aggregate"),
284            Token::PartitionBy => write!(f, "partition_by"),
285            Token::OrderBy => write!(f, "order_by"),
286            Token::Limit => write!(f, "limit"),
287            Token::Distinct => write!(f, "distinct"),
288            Token::Emit => write!(f, "emit"),
289            Token::To => write!(f, "to"),
290            Token::On => write!(f, "on"),
291            Token::All => write!(f, "all"),
292            Token::Within => write!(f, "within"),
293            Token::Pattern => write!(f, "pattern"),
294            Token::True => write!(f, "true"),
295            Token::False => write!(f, "false"),
296            Token::Null => write!(f, "null"),
297            Token::And => write!(f, "and"),
298            Token::Or => write!(f, "or"),
299            Token::Xor => write!(f, "xor"),
300            Token::Not => write!(f, "not"),
301            Token::In => write!(f, "in"),
302            Token::Is => write!(f, "is"),
303            Token::As => write!(f, "as"),
304            Token::Extends => write!(f, "extends"),
305            Token::Import => write!(f, "import"),
306            Token::Export => write!(f, "export"),
307            Token::IntType => write!(f, "int"),
308            Token::FloatType => write!(f, "float"),
309            Token::BoolType => write!(f, "bool"),
310            Token::StrType => write!(f, "str"),
311            Token::TimestampType => write!(f, "timestamp"),
312            Token::DurationType => write!(f, "duration"),
313            Token::StreamType => write!(f, "Stream"),
314            Token::Plus => write!(f, "+"),
315            Token::Minus => write!(f, "-"),
316            Token::Star => write!(f, "*"),
317            Token::Slash => write!(f, "/"),
318            Token::Percent => write!(f, "%"),
319            Token::DoubleStar => write!(f, "**"),
320            Token::EqEq => write!(f, "=="),
321            Token::NotEq => write!(f, "!="),
322            Token::Lt => write!(f, "<"),
323            Token::Le => write!(f, "<="),
324            Token::Gt => write!(f, ">"),
325            Token::Ge => write!(f, ">="),
326            Token::Amp => write!(f, "&"),
327            Token::Pipe => write!(f, "|"),
328            Token::Caret => write!(f, "^"),
329            Token::Tilde => write!(f, "~"),
330            Token::Shl => write!(f, "<<"),
331            Token::Shr => write!(f, ">>"),
332            Token::Eq => write!(f, "="),
333            Token::PlusEq => write!(f, "+="),
334            Token::MinusEq => write!(f, "-="),
335            Token::StarEq => write!(f, "*="),
336            Token::SlashEq => write!(f, "/="),
337            Token::PercentEq => write!(f, "%="),
338            Token::Dot => write!(f, "."),
339            Token::QuestionDot => write!(f, "?."),
340            Token::QuestionQuestion => write!(f, "??"),
341            Token::FatArrow => write!(f, "=>"),
342            Token::Arrow => write!(f, "->"),
343            Token::DotDot => write!(f, ".."),
344            Token::DotDotEq => write!(f, "..="),
345            Token::Dollar => write!(f, "$"),
346            Token::LParen => write!(f, "("),
347            Token::RParen => write!(f, ")"),
348            Token::LBracket => write!(f, "["),
349            Token::RBracket => write!(f, "]"),
350            Token::LBrace => write!(f, "{{"),
351            Token::RBrace => write!(f, "}}"),
352            Token::Comma => write!(f, ","),
353            Token::Colon => write!(f, ":"),
354            Token::Question => write!(f, "?"),
355            Token::At => write!(f, "@"),
356            Token::Integer(n) => write!(f, "{}", n),
357            Token::Float(n) => write!(f, "{}", n),
358            Token::String(s) => write!(f, "\"{}\"", s),
359            Token::Duration(d) => write!(f, "{}", d),
360            Token::Timestamp(t) => write!(f, "{}", t),
361            Token::Ident(s) => write!(f, "{}", s),
362            Token::Eof => write!(f, "EOF"),
363        }
364    }
365}
366
367/// Spanned token with position information
368#[derive(Debug, Clone, PartialEq)]
369pub struct SpannedToken {
370    pub token: Token,
371    pub start: usize,
372    pub end: usize,
373}
374
375/// Lexer wrapper that produces spanned tokens
376pub struct Lexer<'source> {
377    inner: logos::Lexer<'source, Token>,
378    peeked: Option<SpannedToken>,
379    eof_emitted: bool,
380}
381
382impl<'source> Lexer<'source> {
383    pub fn new(source: &'source str) -> Self {
384        Self {
385            inner: Token::lexer(source),
386            peeked: None,
387            eof_emitted: false,
388        }
389    }
390
391    pub fn peek(&mut self) -> Option<&SpannedToken> {
392        if self.peeked.is_none() {
393            self.peeked = self.next_token();
394        }
395        self.peeked.as_ref()
396    }
397
398    fn next_token(&mut self) -> Option<SpannedToken> {
399        match self.inner.next() {
400            Some(Ok(token)) => {
401                let span = self.inner.span();
402                Some(SpannedToken {
403                    token,
404                    start: span.start,
405                    end: span.end,
406                })
407            }
408            Some(Err(_)) => {
409                let span = self.inner.span();
410                Some(SpannedToken {
411                    token: Token::Ident(self.inner.slice().to_string()),
412                    start: span.start,
413                    end: span.end,
414                })
415            }
416            None if !self.eof_emitted => {
417                self.eof_emitted = true;
418                let pos = self.inner.span().end;
419                Some(SpannedToken {
420                    token: Token::Eof,
421                    start: pos,
422                    end: pos,
423                })
424            }
425            None => None,
426        }
427    }
428}
429
430impl<'source> Iterator for Lexer<'source> {
431    type Item = SpannedToken;
432
433    fn next(&mut self) -> Option<Self::Item> {
434        if let Some(peeked) = self.peeked.take() {
435            return Some(peeked);
436        }
437        self.next_token()
438    }
439}
440
441/// Tokenize a source string into a vector of spanned tokens
442pub fn tokenize(source: &str) -> Vec<SpannedToken> {
443    Lexer::new(source).collect()
444}
445
446#[cfg(test)]
447mod tests {
448    use super::*;
449
450    #[test]
451    fn test_keywords() {
452        let tokens: Vec<_> = tokenize("stream event let var const fn")
453            .into_iter()
454            .map(|t| t.token)
455            .collect();
456        assert_eq!(
457            tokens,
458            vec![
459                Token::Stream,
460                Token::Event,
461                Token::Let,
462                Token::Var,
463                Token::Const,
464                Token::Fn,
465                Token::Eof,
466            ]
467        );
468    }
469
470    #[test]
471    fn test_literals() {
472        let tokens: Vec<_> = tokenize("42 2.5 \"hello\" 5s true null")
473            .into_iter()
474            .map(|t| t.token)
475            .collect();
476        assert_eq!(
477            tokens,
478            vec![
479                Token::Integer(42),
480                Token::Float(2.5),
481                Token::String("hello".to_string()),
482                Token::Duration("5s".to_string()),
483                Token::True,
484                Token::Null,
485                Token::Eof,
486            ]
487        );
488    }
489
490    #[test]
491    fn test_operators() {
492        let tokens: Vec<_> = tokenize("+ - * / == != <= >=")
493            .into_iter()
494            .map(|t| t.token)
495            .collect();
496        assert_eq!(
497            tokens,
498            vec![
499                Token::Plus,
500                Token::Minus,
501                Token::Star,
502                Token::Slash,
503                Token::EqEq,
504                Token::NotEq,
505                Token::Le,
506                Token::Ge,
507                Token::Eof,
508            ]
509        );
510    }
511
512    #[test]
513    fn test_stream_decl() {
514        let tokens: Vec<_> = tokenize("stream Trades = TradeEvent")
515            .into_iter()
516            .map(|t| t.token)
517            .collect();
518        assert_eq!(
519            tokens,
520            vec![
521                Token::Stream,
522                Token::Ident("Trades".to_string()),
523                Token::Eq,
524                Token::Ident("TradeEvent".to_string()),
525                Token::Eof,
526            ]
527        );
528    }
529
530    #[test]
531    fn test_comments() {
532        let tokens: Vec<_> = tokenize("# comment\nstream /* inline */ Trades")
533            .into_iter()
534            .map(|t| t.token)
535            .collect();
536        assert_eq!(
537            tokens,
538            vec![
539                Token::Stream,
540                Token::Ident("Trades".to_string()),
541                Token::Eof,
542            ]
543        );
544    }
545
546    // ==========================================================================
547    // Additional Coverage Tests
548    // ==========================================================================
549
550    #[test]
551    fn test_more_keywords() {
552        let tokens: Vec<_> = tokenize("if else elif then match for while break continue return")
553            .into_iter()
554            .map(|t| t.token)
555            .collect();
556        assert!(tokens.contains(&Token::If));
557        assert!(tokens.contains(&Token::Else));
558        assert!(tokens.contains(&Token::Match));
559        assert!(tokens.contains(&Token::For));
560        assert!(tokens.contains(&Token::While));
561        assert!(tokens.contains(&Token::Break));
562        assert!(tokens.contains(&Token::Return));
563    }
564
565    #[test]
566    fn test_stream_keywords() {
567        let tokens: Vec<_> = tokenize("where select join merge window aggregate emit")
568            .into_iter()
569            .map(|t| t.token)
570            .collect();
571        assert!(tokens.contains(&Token::Where));
572        assert!(tokens.contains(&Token::Select));
573        assert!(tokens.contains(&Token::Join));
574        assert!(tokens.contains(&Token::Merge));
575        assert!(tokens.contains(&Token::Window));
576        assert!(tokens.contains(&Token::Aggregate));
577        assert!(tokens.contains(&Token::Emit));
578    }
579
580    #[test]
581    fn test_more_operators() {
582        let tokens: Vec<_> = tokenize("% ** < > & | ^ ~ << >> = += -= *= /= %=")
583            .into_iter()
584            .map(|t| t.token)
585            .collect();
586        assert!(tokens.contains(&Token::Percent));
587        assert!(tokens.contains(&Token::DoubleStar));
588        assert!(tokens.contains(&Token::Lt));
589        assert!(tokens.contains(&Token::Gt));
590        assert!(tokens.contains(&Token::Amp));
591        assert!(tokens.contains(&Token::Pipe));
592        assert!(tokens.contains(&Token::Caret));
593        assert!(tokens.contains(&Token::Tilde));
594        assert!(tokens.contains(&Token::Eq));
595        assert!(tokens.contains(&Token::PlusEq));
596    }
597
598    #[test]
599    fn test_delimiters() {
600        let tokens: Vec<_> = tokenize("( ) [ ] { } , : ? @")
601            .into_iter()
602            .map(|t| t.token)
603            .collect();
604        assert!(tokens.contains(&Token::LParen));
605        assert!(tokens.contains(&Token::RParen));
606        assert!(tokens.contains(&Token::LBracket));
607        assert!(tokens.contains(&Token::RBracket));
608        assert!(tokens.contains(&Token::LBrace));
609        assert!(tokens.contains(&Token::RBrace));
610        assert!(tokens.contains(&Token::Comma));
611        assert!(tokens.contains(&Token::Colon));
612        assert!(tokens.contains(&Token::Question));
613        assert!(tokens.contains(&Token::At));
614    }
615
616    #[test]
617    fn test_special_operators() {
618        let tokens: Vec<_> = tokenize(". ?. ?? => -> .. ..= $")
619            .into_iter()
620            .map(|t| t.token)
621            .collect();
622        assert!(tokens.contains(&Token::Dot));
623        assert!(tokens.contains(&Token::QuestionDot));
624        assert!(tokens.contains(&Token::QuestionQuestion));
625        assert!(tokens.contains(&Token::FatArrow));
626        assert!(tokens.contains(&Token::Arrow));
627        assert!(tokens.contains(&Token::DotDot));
628        assert!(tokens.contains(&Token::DotDotEq));
629        assert!(tokens.contains(&Token::Dollar));
630    }
631
632    #[test]
633    fn test_type_keywords() {
634        let tokens: Vec<_> = tokenize("int float bool str timestamp duration Stream")
635            .into_iter()
636            .map(|t| t.token)
637            .collect();
638        assert!(tokens.contains(&Token::IntType));
639        assert!(tokens.contains(&Token::FloatType));
640        assert!(tokens.contains(&Token::BoolType));
641        assert!(tokens.contains(&Token::StrType));
642        assert!(tokens.contains(&Token::TimestampType));
643        assert!(tokens.contains(&Token::DurationType));
644        assert!(tokens.contains(&Token::StreamType));
645    }
646
647    #[test]
648    fn test_logical_keywords() {
649        let tokens: Vec<_> = tokenize("and or not in is as")
650            .into_iter()
651            .map(|t| t.token)
652            .collect();
653        assert!(tokens.contains(&Token::And));
654        assert!(tokens.contains(&Token::Or));
655        assert!(tokens.contains(&Token::Not));
656        assert!(tokens.contains(&Token::In));
657        assert!(tokens.contains(&Token::Is));
658        assert!(tokens.contains(&Token::As));
659    }
660
661    #[test]
662    fn test_duration_variants() {
663        let tokens: Vec<_> = tokenize("100ms 5s 10m 2h 1d")
664            .into_iter()
665            .map(|t| t.token)
666            .collect();
667        assert!(matches!(&tokens[0], Token::Duration(s) if s == "100ms"));
668        assert!(matches!(&tokens[1], Token::Duration(s) if s == "5s"));
669        assert!(matches!(&tokens[2], Token::Duration(s) if s == "10m"));
670        assert!(matches!(&tokens[3], Token::Duration(s) if s == "2h"));
671        assert!(matches!(&tokens[4], Token::Duration(s) if s == "1d"));
672    }
673
674    #[test]
675    fn test_string_escapes() {
676        let tokens: Vec<_> = tokenize(r#""hello\nworld" "tab\there""#)
677            .into_iter()
678            .map(|t| t.token)
679            .collect();
680        assert!(matches!(&tokens[0], Token::String(_)));
681        assert!(matches!(&tokens[1], Token::String(_)));
682    }
683
684    #[test]
685    fn test_lexer_peek() {
686        let mut lexer = Lexer::new("a b c");
687        assert_eq!(lexer.peek().unwrap().token, Token::Ident("a".to_string()));
688        assert_eq!(lexer.peek().unwrap().token, Token::Ident("a".to_string())); // Still 'a'
689        assert_eq!(lexer.next().unwrap().token, Token::Ident("a".to_string()));
690        assert_eq!(lexer.peek().unwrap().token, Token::Ident("b".to_string()));
691    }
692
693    #[test]
694    fn test_lexer_empty() {
695        let tokens: Vec<_> = tokenize("").into_iter().map(|t| t.token).collect();
696        assert_eq!(tokens, vec![Token::Eof]);
697    }
698
699    #[test]
700    fn test_token_display() {
701        assert_eq!(format!("{}", Token::Stream), "stream");
702        assert_eq!(format!("{}", Token::Plus), "+");
703        assert_eq!(format!("{}", Token::Integer(42)), "42");
704        assert_eq!(format!("{}", Token::Float(2.5)), "2.5");
705        assert_eq!(format!("{}", Token::String("test".to_string())), "\"test\"");
706        assert_eq!(format!("{}", Token::Ident("foo".to_string())), "foo");
707        assert_eq!(format!("{}", Token::Eof), "EOF");
708    }
709
710    #[test]
711    fn test_more_token_display() {
712        assert_eq!(format!("{}", Token::Event), "event");
713        assert_eq!(format!("{}", Token::Type), "type");
714        assert_eq!(format!("{}", Token::Config), "config");
715        assert_eq!(format!("{}", Token::Elif), "elif");
716        assert_eq!(format!("{}", Token::Then), "then");
717        assert_eq!(format!("{}", Token::Continue), "continue");
718        assert_eq!(format!("{}", Token::PartitionBy), "partition_by");
719        assert_eq!(format!("{}", Token::OrderBy), "order_by");
720        assert_eq!(format!("{}", Token::Limit), "limit");
721        assert_eq!(format!("{}", Token::Distinct), "distinct");
722        assert_eq!(format!("{}", Token::To), "to");
723        assert_eq!(format!("{}", Token::On), "on");
724        assert_eq!(format!("{}", Token::All), "all");
725        assert_eq!(format!("{}", Token::Within), "within");
726        assert_eq!(format!("{}", Token::Pattern), "pattern");
727        assert_eq!(format!("{}", Token::False), "false");
728        assert_eq!(format!("{}", Token::Extends), "extends");
729        assert_eq!(format!("{}", Token::Import), "import");
730        assert_eq!(format!("{}", Token::Export), "export");
731    }
732
733    #[test]
734    fn test_remaining_token_display() {
735        assert_eq!(format!("{}", Token::Shl), "<<");
736        assert_eq!(format!("{}", Token::Shr), ">>");
737        assert_eq!(format!("{}", Token::MinusEq), "-=");
738        assert_eq!(format!("{}", Token::StarEq), "*=");
739        assert_eq!(format!("{}", Token::SlashEq), "/=");
740        assert_eq!(format!("{}", Token::PercentEq), "%=");
741        assert_eq!(format!("{}", Token::LBrace), "{");
742        assert_eq!(format!("{}", Token::RBrace), "}");
743        assert_eq!(format!("{}", Token::Duration("5m".to_string())), "5m");
744        assert_eq!(
745            format!("{}", Token::Timestamp("2024-01-01".to_string())),
746            "2024-01-01"
747        );
748    }
749
750    #[test]
751    fn test_spanned_token_positions() {
752        let tokens: Vec<_> = tokenize("ab cd").into_iter().collect();
753        assert_eq!(tokens[0].start, 0);
754        assert_eq!(tokens[0].end, 2);
755        assert_eq!(tokens[1].start, 3);
756        assert_eq!(tokens[1].end, 5);
757    }
758
759    #[test]
760    fn test_special_chars_in_code() {
761        let tokens: Vec<_> = tokenize("a.b.c[0]").into_iter().map(|t| t.token).collect();
762        assert!(tokens.contains(&Token::Dot));
763        assert!(tokens.contains(&Token::LBracket));
764        assert!(tokens.contains(&Token::RBracket));
765    }
766
767    #[test]
768    fn test_negative_number() {
769        let tokens: Vec<_> = tokenize("-42 -2.5").into_iter().map(|t| t.token).collect();
770        assert!(tokens.contains(&Token::Minus));
771        assert!(tokens.contains(&Token::Integer(42)));
772        assert!(tokens.contains(&Token::Float(2.5)));
773    }
774}