husk_lexer/
lib.rs

1//! Lexical analysis: convert source text into a stream of tokens.
2
3use std::ops::Range;
4
5// ============================================================================
6// Trivia (for formatter support)
7// ============================================================================
8
9/// Trivia represents non-semantic content: whitespace and comments.
10/// Used by the formatter to preserve comments and intentional blank lines.
11#[derive(Debug, Clone, PartialEq, Eq)]
12pub enum Trivia {
13    /// Horizontal whitespace (spaces and tabs)
14    Whitespace(String),
15    /// Line endings (\n or \r\n) - tracked separately for blank line detection
16    Newline(String),
17    /// Line comment including the `//` prefix
18    LineComment(String),
19}
20
21impl Trivia {
22    /// Returns true if this trivia is a newline
23    pub fn is_newline(&self) -> bool {
24        matches!(self, Trivia::Newline(_))
25    }
26
27    /// Returns true if this trivia is a line comment
28    pub fn is_comment(&self) -> bool {
29        matches!(self, Trivia::LineComment(_))
30    }
31
32    /// Returns true if this trivia is a documentation comment (starts with `/// `).
33    pub fn is_doc_comment(&self) -> bool {
34        matches!(self, Trivia::LineComment(s) if s.starts_with("/// "))
35    }
36
37    /// Extract doc content from a doc comment, removing the `/// ` prefix.
38    /// Returns None if this is not a doc comment.
39    pub fn doc_content(&self) -> Option<&str> {
40        match self {
41            Trivia::LineComment(s) if s.starts_with("/// ") => Some(&s[4..]),
42            _ => None,
43        }
44    }
45}
46
47/// List of all Husk keywords.
48pub const KEYWORDS: &[&str] = &[
49    "as", "pub", "use", "fn", "let", "mod", "mut", "struct", "enum", "type", "extern", "if",
50    "else", "while", "loop", "match", "return", "true", "false", "break", "continue", "trait",
51    "impl", "for", "Self", "static", "in", "global", "js",
52];
53
54/// Check if a string is a Husk reserved keyword.
55pub fn is_keyword(name: &str) -> bool {
56    KEYWORDS.contains(&name)
57}
58
59/// Check if a string is a valid Husk identifier.
60///
61/// A valid identifier:
62/// - Starts with an ASCII letter or underscore
63/// - Contains only ASCII alphanumeric characters or underscores
64/// - Is not a reserved keyword
65pub fn is_valid_identifier(name: &str) -> bool {
66    if name.is_empty() {
67        return false;
68    }
69    let mut chars = name.chars();
70    let first = chars.next().unwrap();
71    if !first.is_ascii_alphabetic() && first != '_' {
72        return false;
73    }
74    for ch in chars {
75        if !ch.is_ascii_alphanumeric() && ch != '_' {
76            return false;
77        }
78    }
79    !is_keyword(name)
80}
81
82/// A span in the source file, represented as a byte range.
83#[derive(Debug, Clone, PartialEq, Eq)]
84pub struct Span {
85    pub range: Range<usize>,
86}
87
88impl Span {
89    pub fn new(start: usize, end: usize) -> Self {
90        Self { range: start..end }
91    }
92}
93
94/// Language keywords (subset for the MVP).
95#[derive(Debug, Clone, Copy, PartialEq, Eq)]
96pub enum Keyword {
97    As,
98    Pub,
99    Use,
100    Fn,
101    Let,
102    Mut,
103    Mod,
104    Struct,
105    Enum,
106    Type,
107    Extern,
108    If,
109    Else,
110    While,
111    Loop,
112    Match,
113    Return,
114    True,
115    False,
116    Break,
117    Continue,
118    Trait,
119    Impl,
120    For,
121    In,
122    SelfType, // `Self` keyword (capital S)
123    Static,
124    Global,
125    Js, // `js` keyword for embedded JavaScript blocks
126}
127
128/// Token kinds produced by the lexer.
129#[derive(Debug, Clone, PartialEq)]
130pub enum TokenKind {
131    Ident(String),
132    IntLiteral(String),
133    FloatLiteral(String),
134    StringLiteral(String),
135    Keyword(Keyword),
136    // Punctuation
137    LParen,
138    RParen,
139    LBrace,
140    RBrace,
141    Comma,
142    Colon,
143    ColonColon,
144    Semicolon,
145    Dot,
146    DotDot,   // ..  (exclusive range)
147    DotDotEq, // ..= (inclusive range)
148    Arrow,    // ->
149    FatArrow, // =>
150    Eq,       // =
151    EqEq,     // ==
152    Bang,     // !
153    BangEq,   // !=
154    Lt,       // <
155    Gt,       // >
156    Le,       // <=
157    Ge,       // >=
158    AndAnd,   // &&
159    Amp,      // & (single ampersand for references/self receivers)
160    OrOr,     // ||
161    Pipe,     // | (single pipe for closures)
162    Plus,
163    PlusEq,   // +=
164    Minus,
165    MinusEq,  // -=
166    Star,
167    Slash,
168    Percent,   // %
169    PercentEq, // %=
170    // Attribute-related tokens
171    Hash,      // #
172    LBracket,  // [
173    RBracket,  // ]
174    // End of input
175    Eof,
176}
177
178/// A token with its kind, source span, and associated trivia.
179#[derive(Debug, Clone, PartialEq)]
180pub struct Token {
181    pub kind: TokenKind,
182    pub span: Span,
183    /// Trivia (whitespace, newlines, comments) that appears before this token
184    pub leading_trivia: Vec<Trivia>,
185    /// Trivia that appears after this token on the same line (typically trailing comments)
186    pub trailing_trivia: Vec<Trivia>,
187}
188
189impl Token {
190    /// Create a new token with no trivia (for backwards compatibility)
191    pub fn new(kind: TokenKind, span: Span) -> Self {
192        Self {
193            kind,
194            span,
195            leading_trivia: Vec::new(),
196            trailing_trivia: Vec::new(),
197        }
198    }
199
200    /// Create a new token with trivia
201    pub fn with_trivia(
202        kind: TokenKind,
203        span: Span,
204        leading_trivia: Vec<Trivia>,
205        trailing_trivia: Vec<Trivia>,
206    ) -> Self {
207        Self {
208            kind,
209            span,
210            leading_trivia,
211            trailing_trivia,
212        }
213    }
214
215    /// Returns true if this token has any leading comments
216    pub fn has_leading_comments(&self) -> bool {
217        self.leading_trivia.iter().any(|t| t.is_comment())
218    }
219
220    /// Returns true if this token has a trailing comment
221    pub fn has_trailing_comment(&self) -> bool {
222        self.trailing_trivia.iter().any(|t| t.is_comment())
223    }
224
225    /// Count consecutive newlines in leading trivia (for blank line detection)
226    pub fn leading_blank_lines(&self) -> usize {
227        let newline_count = self.leading_trivia.iter().filter(|t| t.is_newline()).count();
228        // 2 newlines = 1 blank line, 3 newlines = 2 blank lines, etc.
229        newline_count.saturating_sub(1)
230    }
231}
232
233/// Simple lexer over a UTF-8 string.
234pub struct Lexer<'src> {
235    src: &'src str,
236    chars: std::str::CharIndices<'src>,
237    peeked: Option<(usize, char)>,
238    end: usize,
239    finished: bool,
240}
241
242impl<'src> Lexer<'src> {
243    pub fn new(src: &'src str) -> Self {
244        let end = src.len();
245        Self {
246            src,
247            chars: src.char_indices(),
248            peeked: None,
249            end,
250            finished: false,
251        }
252    }
253
254    fn bump(&mut self) -> Option<(usize, char)> {
255        if let Some(p) = self.peeked.take() {
256            Some(p)
257        } else {
258            self.chars.next()
259        }
260    }
261
262    fn peek(&mut self) -> Option<(usize, char)> {
263        if self.peeked.is_none() {
264            self.peeked = self.chars.next();
265        }
266        self.peeked
267    }
268
269    fn make_span(&self, start: usize, end: usize) -> Span {
270        Span::new(start, end)
271    }
272
273    fn consume_while<F>(&mut self, start: usize, mut pred: F) -> (Span, &'src str)
274    where
275        F: FnMut(char) -> bool,
276    {
277        let mut last = start;
278        let mut saw_any = false;
279        while let Some((idx, ch)) = self.peek() {
280            if !pred(ch) {
281                break;
282            }
283            saw_any = true;
284            last = idx;
285            self.bump();
286        }
287        let end = if saw_any { last + 1 } else { start + 1 };
288        let span = self.make_span(start, end);
289        let lexeme = &self.src[span.range.clone()];
290        (span, lexeme)
291    }
292
293    /// Collect leading trivia: whitespace, newlines, and comments before a token.
294    fn collect_leading_trivia(&mut self) -> Vec<Trivia> {
295        let mut trivia = Vec::new();
296        loop {
297            match self.peek() {
298                Some((_, ' ')) | Some((_, '\t')) => {
299                    // Collect horizontal whitespace
300                    let mut ws = String::new();
301                    while let Some((_, ch)) = self.peek() {
302                        if ch == ' ' || ch == '\t' {
303                            ws.push(ch);
304                            self.bump();
305                        } else {
306                            break;
307                        }
308                    }
309                    if !ws.is_empty() {
310                        trivia.push(Trivia::Whitespace(ws));
311                    }
312                }
313                Some((_, '\n')) => {
314                    self.bump();
315                    trivia.push(Trivia::Newline("\n".to_string()));
316                }
317                Some((_, '\r')) => {
318                    self.bump();
319                    if let Some((_, '\n')) = self.peek() {
320                        self.bump();
321                        trivia.push(Trivia::Newline("\r\n".to_string()));
322                    } else {
323                        // Standalone \r - treat as newline
324                        trivia.push(Trivia::Newline("\r".to_string()));
325                    }
326                }
327                Some((start, '/')) => {
328                    // Check if this is a line comment
329                    let mut clone = self.chars.clone();
330                    if let Some((_, '/')) = clone.next() {
331                        // It's a line comment
332                        let comment_start = start;
333                        self.bump(); // consume first '/'
334                        self.bump(); // consume second '/'
335
336                        // Collect until end of line
337                        while let Some((_, ch)) = self.peek() {
338                            if ch == '\n' {
339                                break;
340                            }
341                            self.bump();
342                        }
343
344                        // Extract the comment text from source
345                        let comment_end = self.peek().map(|(i, _)| i).unwrap_or(self.end);
346                        let comment = &self.src[comment_start..comment_end];
347                        trivia.push(Trivia::LineComment(comment.to_string()));
348                    } else {
349                        // Not a comment, done collecting trivia
350                        break;
351                    }
352                }
353                _ => break,
354            }
355        }
356        trivia
357    }
358
359    /// Collect trailing trivia: whitespace and comments on the same line after a token.
360    fn collect_trailing_trivia(&mut self) -> Vec<Trivia> {
361        let mut trivia = Vec::new();
362        loop {
363            match self.peek() {
364                Some((_, ' ')) | Some((_, '\t')) => {
365                    // Collect horizontal whitespace
366                    let mut ws = String::new();
367                    while let Some((_, ch)) = self.peek() {
368                        if ch == ' ' || ch == '\t' {
369                            ws.push(ch);
370                            self.bump();
371                        } else {
372                            break;
373                        }
374                    }
375                    if !ws.is_empty() {
376                        trivia.push(Trivia::Whitespace(ws));
377                    }
378                }
379                Some((start, '/')) => {
380                    // Check if this is a line comment
381                    let mut clone = self.chars.clone();
382                    if let Some((_, '/')) = clone.next() {
383                        // It's a trailing line comment
384                        let comment_start = start;
385                        self.bump(); // consume first '/'
386                        self.bump(); // consume second '/'
387
388                        // Collect until end of line
389                        while let Some((_, ch)) = self.peek() {
390                            if ch == '\n' {
391                                break;
392                            }
393                            self.bump();
394                        }
395
396                        // Extract the comment text from source
397                        let comment_end = self.peek().map(|(i, _)| i).unwrap_or(self.end);
398                        let comment = &self.src[comment_start..comment_end];
399                        trivia.push(Trivia::LineComment(comment.to_string()));
400                        // After a line comment, stop collecting trailing trivia
401                        break;
402                    } else {
403                        // Not a comment, done collecting trailing trivia
404                        break;
405                    }
406                }
407                _ => {
408                    // Newline or other character - stop collecting trailing trivia
409                    break;
410                }
411            }
412        }
413        trivia
414    }
415
416    fn classify_ident_or_keyword(&self, _span: Span, text: &str) -> TokenKind {
417        match text {
418            "as" => TokenKind::Keyword(Keyword::As),
419            "pub" => TokenKind::Keyword(Keyword::Pub),
420            "use" => TokenKind::Keyword(Keyword::Use),
421            "fn" => TokenKind::Keyword(Keyword::Fn),
422            "let" => TokenKind::Keyword(Keyword::Let),
423            "mod" => TokenKind::Keyword(Keyword::Mod),
424            "mut" => TokenKind::Keyword(Keyword::Mut),
425            "struct" => TokenKind::Keyword(Keyword::Struct),
426            "enum" => TokenKind::Keyword(Keyword::Enum),
427            "type" => TokenKind::Keyword(Keyword::Type),
428            "extern" => TokenKind::Keyword(Keyword::Extern),
429            "if" => TokenKind::Keyword(Keyword::If),
430            "else" => TokenKind::Keyword(Keyword::Else),
431            "while" => TokenKind::Keyword(Keyword::While),
432            "loop" => TokenKind::Keyword(Keyword::Loop),
433            "match" => TokenKind::Keyword(Keyword::Match),
434            "break" => TokenKind::Keyword(Keyword::Break),
435            "continue" => TokenKind::Keyword(Keyword::Continue),
436            "return" => TokenKind::Keyword(Keyword::Return),
437            "true" => TokenKind::Keyword(Keyword::True),
438            "false" => TokenKind::Keyword(Keyword::False),
439            "trait" => TokenKind::Keyword(Keyword::Trait),
440            "impl" => TokenKind::Keyword(Keyword::Impl),
441            "for" => TokenKind::Keyword(Keyword::For),
442            "in" => TokenKind::Keyword(Keyword::In),
443            "Self" => TokenKind::Keyword(Keyword::SelfType),
444            "static" => TokenKind::Keyword(Keyword::Static),
445            "global" => TokenKind::Keyword(Keyword::Global),
446            "js" => TokenKind::Keyword(Keyword::Js),
447            _ => TokenKind::Ident(text.to_string()),
448        }
449    }
450
451    fn lex_number(&mut self, start: usize, first_ch: char) -> (TokenKind, Span) {
452        let (span, _text) = self.consume_while(start, |c| c.is_ascii_digit());
453        let mut end = if span.range.start == span.range.end {
454            // only first_ch
455            start + first_ch.len_utf8()
456        } else {
457            span.range.end
458        };
459
460        // Check for decimal point followed by digits (float literal)
461        let mut is_float = false;
462        if let Some((dot_idx, '.')) = self.peek() {
463            // Look ahead to see if there's a digit after the dot
464            // We need to check if the next character after '.' is a digit
465            let after_dot = self.src.get(dot_idx + 1..dot_idx + 2);
466            if let Some(ch_str) = after_dot {
467                if let Some(ch) = ch_str.chars().next() {
468                    if ch.is_ascii_digit() {
469                        // Consume the dot
470                        self.bump();
471                        // Consume the fractional digits
472                        let (frac_span, _) = self.consume_while(dot_idx + 1, |c| c.is_ascii_digit());
473                        end = frac_span.range.end;
474                        is_float = true;
475                    }
476                }
477            }
478        }
479
480        let full_span = Span::new(start, end);
481        let lexeme = &self.src[full_span.range.clone()];
482        let kind = if is_float {
483            TokenKind::FloatLiteral(lexeme.to_string())
484        } else {
485            TokenKind::IntLiteral(lexeme.to_string())
486        };
487        (kind, full_span)
488    }
489
490    fn lex_ident_or_keyword(&mut self, start: usize) -> (TokenKind, Span) {
491        let (span, text) = self.consume_while(start, |c| c.is_alphanumeric() || c == '_');
492        let kind = self.classify_ident_or_keyword(span.clone(), text);
493        (kind, span)
494    }
495
496    fn lex_string(&mut self, start: usize) -> (TokenKind, Span) {
497        // Assumes opening quote has already been consumed.
498        let mut end = start;
499        let mut value = String::new();
500
501        while let Some((idx, ch)) = self.bump() {
502            if ch == '"' {
503                end = idx + 1;
504                break;
505            } else if ch == '\\' {
506                // Handle escape sequences
507                if let Some((esc_idx, esc_ch)) = self.bump() {
508                    end = esc_idx + 1;
509                    match esc_ch {
510                        'n' => value.push('\n'),
511                        't' => value.push('\t'),
512                        'r' => value.push('\r'),
513                        '0' => value.push('\0'),
514                        '\\' => value.push('\\'),
515                        '"' => value.push('"'),
516                        // For unknown escapes, keep as-is
517                        other => {
518                            value.push('\\');
519                            value.push(other);
520                        }
521                    }
522                }
523            } else {
524                value.push(ch);
525                end = idx + 1;
526            }
527        }
528
529        let span = self.make_span(start, end);
530        (TokenKind::StringLiteral(value), span)
531    }
532}
533
534impl<'src> Iterator for Lexer<'src> {
535    type Item = Token;
536
537    fn next(&mut self) -> Option<Self::Item> {
538        if self.finished {
539            return None;
540        }
541
542        // Collect leading trivia (whitespace, newlines, comments before this token)
543        let leading_trivia = self.collect_leading_trivia();
544
545        let (start, ch) = match self.bump() {
546            Some(pair) => pair,
547            None => {
548                let span = Span::new(self.end, self.end);
549                self.finished = true;
550                return Some(Token::with_trivia(
551                    TokenKind::Eof,
552                    span,
553                    leading_trivia,
554                    Vec::new(),
555                ));
556            }
557        };
558
559        // Get the token kind and span
560        let (kind, span) = match ch {
561            c if c.is_ascii_alphabetic() || c == '_' => self.lex_ident_or_keyword(start),
562            c if c.is_ascii_digit() => self.lex_number(start, c),
563            '"' => self.lex_string(start),
564            '(' => (TokenKind::LParen, Span::new(start, start + 1)),
565            ')' => (TokenKind::RParen, Span::new(start, start + 1)),
566            '{' => (TokenKind::LBrace, Span::new(start, start + 1)),
567            '}' => (TokenKind::RBrace, Span::new(start, start + 1)),
568            ',' => (TokenKind::Comma, Span::new(start, start + 1)),
569            ':' => {
570                if let Some((idx2, ':')) = self.peek() {
571                    self.bump();
572                    (TokenKind::ColonColon, Span::new(start, idx2 + 1))
573                } else {
574                    (TokenKind::Colon, Span::new(start, start + 1))
575                }
576            }
577            ';' => (TokenKind::Semicolon, Span::new(start, start + 1)),
578            '.' => {
579                if let Some((idx2, '.')) = self.peek() {
580                    self.bump(); // consume second '.'
581
582                    if let Some((idx3, '=')) = self.peek() {
583                        self.bump(); // consume '='
584                        (TokenKind::DotDotEq, Span::new(start, idx3 + 1))
585                    } else {
586                        (TokenKind::DotDot, Span::new(start, idx2 + 1))
587                    }
588                } else {
589                    (TokenKind::Dot, Span::new(start, start + 1))
590                }
591            }
592            '-' => {
593                if let Some((idx2, '>')) = self.peek() {
594                    self.bump();
595                    (TokenKind::Arrow, Span::new(start, idx2 + 1))
596                } else if let Some((idx2, '=')) = self.peek() {
597                    self.bump();
598                    (TokenKind::MinusEq, Span::new(start, idx2 + 1))
599                } else {
600                    (TokenKind::Minus, Span::new(start, start + 1))
601                }
602            }
603            '=' => {
604                if let Some((idx2, next)) = self.peek() {
605                    match next {
606                        '>' => {
607                            self.bump();
608                            (TokenKind::FatArrow, Span::new(start, idx2 + 1))
609                        }
610                        '=' => {
611                            self.bump();
612                            (TokenKind::EqEq, Span::new(start, idx2 + 1))
613                        }
614                        _ => (TokenKind::Eq, Span::new(start, start + 1)),
615                    }
616                } else {
617                    (TokenKind::Eq, Span::new(start, start + 1))
618                }
619            }
620            '+' => {
621                if let Some((idx2, '=')) = self.peek() {
622                    self.bump();
623                    (TokenKind::PlusEq, Span::new(start, idx2 + 1))
624                } else {
625                    (TokenKind::Plus, Span::new(start, start + 1))
626                }
627            }
628            '*' => (TokenKind::Star, Span::new(start, start + 1)),
629            '/' => (TokenKind::Slash, Span::new(start, start + 1)),
630            '%' => {
631                if let Some((idx2, '=')) = self.peek() {
632                    self.bump();
633                    (TokenKind::PercentEq, Span::new(start, idx2 + 1))
634                } else {
635                    (TokenKind::Percent, Span::new(start, start + 1))
636                }
637            }
638            '!' => {
639                if let Some((idx2, '=')) = self.peek() {
640                    self.bump();
641                    (TokenKind::BangEq, Span::new(start, idx2 + 1))
642                } else {
643                    (TokenKind::Bang, Span::new(start, start + 1))
644                }
645            }
646            '<' => {
647                if let Some((idx2, '=')) = self.peek() {
648                    self.bump();
649                    (TokenKind::Le, Span::new(start, idx2 + 1))
650                } else {
651                    (TokenKind::Lt, Span::new(start, start + 1))
652                }
653            }
654            '>' => {
655                if let Some((idx2, '=')) = self.peek() {
656                    self.bump();
657                    (TokenKind::Ge, Span::new(start, idx2 + 1))
658                } else {
659                    (TokenKind::Gt, Span::new(start, start + 1))
660                }
661            }
662            '&' => {
663                if let Some((idx2, '&')) = self.peek() {
664                    self.bump();
665                    (TokenKind::AndAnd, Span::new(start, idx2 + 1))
666                } else {
667                    (TokenKind::Amp, Span::new(start, start + 1))
668                }
669            }
670            '|' => {
671                if let Some((idx2, '|')) = self.peek() {
672                    self.bump();
673                    (TokenKind::OrOr, Span::new(start, idx2 + 1))
674                } else {
675                    (TokenKind::Pipe, Span::new(start, start + 1))
676                }
677            }
678            '#' => (TokenKind::Hash, Span::new(start, start + 1)),
679            '[' => (TokenKind::LBracket, Span::new(start, start + 1)),
680            ']' => (TokenKind::RBracket, Span::new(start, start + 1)),
681            _ => {
682                // Unknown character, skip for now; in the future we will emit diagnostics.
683                (TokenKind::Eof, Span::new(start, start + 1))
684            }
685        };
686
687        // Collect trailing trivia (whitespace and comments on the same line after the token)
688        let trailing_trivia = self.collect_trailing_trivia();
689
690        Some(Token::with_trivia(kind, span, leading_trivia, trailing_trivia))
691    }
692}
693
694#[cfg(test)]
695mod tests {
696    use super::*;
697
698    #[test]
699    fn test_trivia_leading_whitespace() {
700        let src = "   foo";
701        let mut lexer = Lexer::new(src);
702        let token = lexer.next().unwrap();
703
704        assert!(matches!(token.kind, TokenKind::Ident(ref s) if s == "foo"));
705        assert_eq!(token.leading_trivia.len(), 1);
706        assert!(matches!(&token.leading_trivia[0], Trivia::Whitespace(ws) if ws == "   "));
707    }
708
709    #[test]
710    fn test_trivia_leading_newlines() {
711        let src = "\n\nfoo";
712        let mut lexer = Lexer::new(src);
713        let token = lexer.next().unwrap();
714
715        assert!(matches!(token.kind, TokenKind::Ident(ref s) if s == "foo"));
716        assert_eq!(token.leading_trivia.len(), 2);
717        assert!(matches!(&token.leading_trivia[0], Trivia::Newline(nl) if nl == "\n"));
718        assert!(matches!(&token.leading_trivia[1], Trivia::Newline(nl) if nl == "\n"));
719    }
720
721    #[test]
722    fn test_trivia_leading_comment() {
723        let src = "// this is a comment\nfoo";
724        let mut lexer = Lexer::new(src);
725        let token = lexer.next().unwrap();
726
727        assert!(matches!(token.kind, TokenKind::Ident(ref s) if s == "foo"));
728        assert_eq!(token.leading_trivia.len(), 2);
729        assert!(
730            matches!(&token.leading_trivia[0], Trivia::LineComment(c) if c == "// this is a comment")
731        );
732        assert!(matches!(&token.leading_trivia[1], Trivia::Newline(nl) if nl == "\n"));
733    }
734
735    #[test]
736    fn test_trivia_trailing_comment() {
737        let src = "foo // trailing\nbar";
738        let mut lexer = Lexer::new(src);
739
740        let foo = lexer.next().unwrap();
741        assert!(matches!(foo.kind, TokenKind::Ident(ref s) if s == "foo"));
742        assert_eq!(foo.trailing_trivia.len(), 2);
743        assert!(matches!(&foo.trailing_trivia[0], Trivia::Whitespace(ws) if ws == " "));
744        assert!(
745            matches!(&foo.trailing_trivia[1], Trivia::LineComment(c) if c == "// trailing")
746        );
747
748        let bar = lexer.next().unwrap();
749        assert!(matches!(bar.kind, TokenKind::Ident(ref s) if s == "bar"));
750        assert_eq!(bar.leading_trivia.len(), 1);
751        assert!(matches!(&bar.leading_trivia[0], Trivia::Newline(nl) if nl == "\n"));
752    }
753
754    #[test]
755    fn test_trivia_blank_lines() {
756        let src = "\n\n\nfoo";
757        let mut lexer = Lexer::new(src);
758        let token = lexer.next().unwrap();
759
760        assert!(matches!(token.kind, TokenKind::Ident(ref s) if s == "foo"));
761        // 3 newlines = 2 blank lines
762        assert_eq!(token.leading_blank_lines(), 2);
763    }
764
765    #[test]
766    fn test_trivia_has_leading_comments() {
767        let src = "// comment\nfoo";
768        let mut lexer = Lexer::new(src);
769        let token = lexer.next().unwrap();
770
771        assert!(token.has_leading_comments());
772    }
773
774    #[test]
775    fn test_trivia_has_trailing_comment() {
776        let src = "foo // comment\n";
777        let mut lexer = Lexer::new(src);
778        let token = lexer.next().unwrap();
779
780        assert!(token.has_trailing_comment());
781    }
782
783    #[test]
784    fn test_trivia_complex_mixed() {
785        let src = "  // header comment\n\n  fn main() {}";
786        let mut lexer = Lexer::new(src);
787
788        // First token: 'fn'
789        let fn_token = lexer.next().unwrap();
790        assert!(matches!(fn_token.kind, TokenKind::Keyword(Keyword::Fn)));
791        // Leading: whitespace, comment, newline, newline, whitespace
792        assert_eq!(fn_token.leading_trivia.len(), 5);
793        assert!(matches!(&fn_token.leading_trivia[0], Trivia::Whitespace(_)));
794        assert!(matches!(&fn_token.leading_trivia[1], Trivia::LineComment(_)));
795        assert!(matches!(&fn_token.leading_trivia[2], Trivia::Newline(_)));
796        assert!(matches!(&fn_token.leading_trivia[3], Trivia::Newline(_)));
797        assert!(matches!(&fn_token.leading_trivia[4], Trivia::Whitespace(_)));
798    }
799
800    #[test]
801    fn test_trivia_no_trivia() {
802        let src = "foo";
803        let mut lexer = Lexer::new(src);
804        let token = lexer.next().unwrap();
805
806        assert!(matches!(token.kind, TokenKind::Ident(ref s) if s == "foo"));
807        assert!(token.leading_trivia.is_empty());
808        assert!(token.trailing_trivia.is_empty());
809    }
810
811    #[test]
812    fn test_trivia_eof_preserves_trivia() {
813        let src = "foo\n// final comment\n";
814        let mut lexer = Lexer::new(src);
815
816        let foo = lexer.next().unwrap();
817        assert!(matches!(foo.kind, TokenKind::Ident(ref s) if s == "foo"));
818
819        let eof = lexer.next().unwrap();
820        assert!(matches!(eof.kind, TokenKind::Eof));
821        // EOF should have the trailing comment as leading trivia
822        assert!(eof.has_leading_comments());
823    }
824
825    #[test]
826    fn test_trivia_between_tokens() {
827        let src = "a + b";
828        let mut lexer = Lexer::new(src);
829
830        let a = lexer.next().unwrap();
831        assert!(matches!(a.kind, TokenKind::Ident(ref s) if s == "a"));
832        assert_eq!(a.trailing_trivia.len(), 1);
833        assert!(matches!(&a.trailing_trivia[0], Trivia::Whitespace(ws) if ws == " "));
834
835        let plus = lexer.next().unwrap();
836        assert!(matches!(plus.kind, TokenKind::Plus));
837        assert!(plus.leading_trivia.is_empty()); // trailing of 'a' consumed it
838        assert_eq!(plus.trailing_trivia.len(), 1);
839        assert!(matches!(&plus.trailing_trivia[0], Trivia::Whitespace(ws) if ws == " "));
840
841        let b = lexer.next().unwrap();
842        assert!(matches!(b.kind, TokenKind::Ident(ref s) if s == "b"));
843        assert!(b.leading_trivia.is_empty()); // trailing of '+' consumed it
844    }
845
846    #[test]
847    fn test_string_escape_sequences() {
848        // Test newline escape
849        let src = r#""\n""#;
850        let mut lexer = Lexer::new(src);
851        let token = lexer.next().unwrap();
852        assert!(
853            matches!(token.kind, TokenKind::StringLiteral(ref s) if s == "\n"),
854            "Expected newline character, got {:?}",
855            token.kind
856        );
857
858        // Test tab escape
859        let src = r#""\t""#;
860        let mut lexer = Lexer::new(src);
861        let token = lexer.next().unwrap();
862        assert!(
863            matches!(token.kind, TokenKind::StringLiteral(ref s) if s == "\t"),
864            "Expected tab character, got {:?}",
865            token.kind
866        );
867
868        // Test backslash escape
869        let src = r#""\\""#;
870        let mut lexer = Lexer::new(src);
871        let token = lexer.next().unwrap();
872        assert!(
873            matches!(token.kind, TokenKind::StringLiteral(ref s) if s == "\\"),
874            "Expected backslash character, got {:?}",
875            token.kind
876        );
877
878        // Test quote escape
879        let src = r#""\"""#;
880        let mut lexer = Lexer::new(src);
881        let token = lexer.next().unwrap();
882        assert!(
883            matches!(token.kind, TokenKind::StringLiteral(ref s) if s == "\""),
884            "Expected quote character, got {:?}",
885            token.kind
886        );
887
888        // Test carriage return escape
889        let src = r#""\r""#;
890        let mut lexer = Lexer::new(src);
891        let token = lexer.next().unwrap();
892        assert!(
893            matches!(token.kind, TokenKind::StringLiteral(ref s) if s == "\r"),
894            "Expected carriage return character, got {:?}",
895            token.kind
896        );
897
898        // Test null escape
899        let src = r#""\0""#;
900        let mut lexer = Lexer::new(src);
901        let token = lexer.next().unwrap();
902        assert!(
903            matches!(token.kind, TokenKind::StringLiteral(ref s) if s == "\0"),
904            "Expected null character, got {:?}",
905            token.kind
906        );
907
908        // Test mixed content with escapes
909        let src = r#""hello\nworld""#;
910        let mut lexer = Lexer::new(src);
911        let token = lexer.next().unwrap();
912        assert!(
913            matches!(token.kind, TokenKind::StringLiteral(ref s) if s == "hello\nworld"),
914            "Expected 'hello\\nworld', got {:?}",
915            token.kind
916        );
917    }
918}