Skip to main content

axon_frontend/
lexer.rs

1//! AXON Lexer — direct port of axon/compiler/lexer.py.
2//!
3//! Source text → Vec<Token>
4//!
5//! Handles:
6//!   - All AXON keywords (100+)
7//!   - String literals with escape sequences
8//!   - Integer / Float / Duration literals
9//!   - Arrow (->), DotDot (..), comparison operators
10//!   - **Lossless comment lexing (Fase 14.a)** — comments are emitted
11//!     as discriminated `LineComment` / `BlockComment` /
12//!     `DocLineComment` / `DocBlockComment` tokens preserving full
13//!     text + position. The parser materialises them into `Trivia`
14//!     attached to AST nodes (leading + trailing). Pre-14.a behaviour
15//!     (silent stripping) is reachable via `Lexer::tokenize_with(strip_comments=true)`
16//!     so callers that want the legacy IR-equivalent token stream
17//!     still get it without filtering.
18//!   - Line/column tracking for error messages
19
20use crate::tokens::{keyword_type, Token, TokenType};
21
22// ── Public error type ─────────────────────────────────────────────────────────
23
24#[derive(Debug)]
25pub struct LexerError {
26    pub message: String,
27    pub line: u32,
28    pub column: u32,
29}
30
31// ── Lexer ─────────────────────────────────────────────────────────────────────
32
33pub struct Lexer {
34    source: Vec<char>,
35    _filename: String,
36    pos: usize,
37    line: u32,
38    column: u32,
39    tokens: Vec<Token>,
40    /// Fase 14.a — when `true`, comment tokens are not emitted (legacy
41    /// pre-14.a behaviour). Default `false` preserves comments as
42    /// discriminated tokens so the parser can build the trivia channel.
43    strip_comments: bool,
44}
45
46impl Lexer {
47    pub fn new(source: &str, filename: &str) -> Self {
48        Lexer {
49            source: source.chars().collect(),
50            _filename: filename.to_string(),
51            pos: 0,
52            line: 1,
53            column: 1,
54            tokens: Vec::new(),
55            strip_comments: false,
56        }
57    }
58
59    // ── public API ────────────────────────────────────────────────
60
61    pub fn tokenize(self) -> Result<Vec<Token>, LexerError> {
62        self.tokenize_with(false)
63    }
64
65    /// Tokenize with explicit control over comment emission.
66    ///
67    /// `strip_comments=true` reproduces the pre-14.a behaviour — useful
68    /// for downstream tooling that treats comments as pure whitespace
69    /// (cost estimators, IR golden-file generators that should not
70    /// pick up a token-count delta from the trivia rewrite).
71    pub fn tokenize_with(mut self, strip_comments: bool) -> Result<Vec<Token>, LexerError> {
72        self.strip_comments = strip_comments;
73        while !self.at_end() {
74            self.consume_trivia()?;
75            if self.at_end() {
76                break;
77            }
78            self.scan_token()?;
79        }
80        self.tokens.push(Token {
81            ttype: TokenType::Eof,
82            value: String::new(),
83            line: self.line,
84            column: self.column,
85        });
86        Ok(self.tokens)
87    }
88
89    // ── character helpers ─────────────────────────────────────────
90
91    fn at_end(&self) -> bool {
92        self.pos >= self.source.len()
93    }
94
95    fn peek(&self) -> char {
96        if self.at_end() {
97            '\0'
98        } else {
99            self.source[self.pos]
100        }
101    }
102
103    fn peek_next(&self) -> char {
104        if self.pos + 1 >= self.source.len() {
105            '\0'
106        } else {
107            self.source[self.pos + 1]
108        }
109    }
110
111    fn advance(&mut self) -> char {
112        let ch = self.source[self.pos];
113        self.pos += 1;
114        if ch == '\n' {
115            self.line += 1;
116            self.column = 1;
117        } else {
118            self.column += 1;
119        }
120        ch
121    }
122
123    fn match_char(&mut self, expected: char) -> bool {
124        if self.at_end() || self.source[self.pos] != expected {
125            return false;
126        }
127        self.advance();
128        true
129    }
130
131    fn emit(&mut self, ttype: TokenType, value: &str, line: u32, column: u32) {
132        self.tokens.push(Token {
133            ttype,
134            value: value.to_string(),
135            line,
136            column,
137        });
138    }
139
140    // ── whitespace & comments (Fase 14.a — lossless) ─────────────
141
142    /// Consume whitespace + emit comment tokens until next non-trivia.
143    /// Pre-14.a this routine was named `skip_whitespace` and silently
144    /// discarded comments. It now advances past pure whitespace but
145    /// for comments delegates to `consume_*_comment` helpers that
146    /// emit discriminated tokens. `strip_comments=true` on
147    /// `tokenize_with` suppresses the emission while still advancing
148    /// the cursor.
149    fn consume_trivia(&mut self) -> Result<(), LexerError> {
150        while !self.at_end() {
151            let ch = self.peek();
152            if ch == ' ' || ch == '\t' || ch == '\r' || ch == '\n' {
153                self.advance();
154            } else if ch == '/' && self.peek_next() == '/' {
155                self.consume_line_comment();
156            } else if ch == '/' && self.peek_next() == '*' {
157                self.consume_block_comment()?;
158            } else {
159                break;
160            }
161        }
162        Ok(())
163    }
164
165    /// Lex a `// …`, `/// …` or `//! …` line comment.
166    ///
167    /// Outer-doc heuristic (Fase 14.a): a line is an outer doc comment
168    /// iff it starts with EXACTLY three slashes (`///` followed by a
169    /// non-`/`). `////` (4+ slashes) is a regular line comment.
170    ///
171    /// Inner-doc heuristic (Fase 14.c): a line is an inner doc comment
172    /// iff it starts with `//!` (two slashes + bang). Inner doc
173    /// comments document the *enclosing* module/file rather than the
174    /// next sibling — same convention as Rust.
175    fn consume_line_comment(&mut self) {
176        let line = self.line;
177        let col = self.column;
178        self.advance(); // /
179        self.advance(); // /
180        let is_outer_doc = self.peek() == '/' && self.peek_next() != '/';
181        let is_inner_doc = !is_outer_doc && self.peek() == '!';
182        if is_outer_doc || is_inner_doc {
183            self.advance(); // third / (outer) or ! (inner)
184        }
185
186        let body_start = self.pos;
187        while !self.at_end() && self.peek() != '\n' {
188            self.advance();
189        }
190        let body: String = self.source[body_start..self.pos].iter().collect();
191
192        let (ttype, full_text) = if is_outer_doc {
193            (TokenType::DocLineComment, format!("///{body}"))
194        } else if is_inner_doc {
195            (TokenType::InnerDocLineComment, format!("//!{body}"))
196        } else {
197            (TokenType::LineComment, format!("//{body}"))
198        };
199
200        if !self.strip_comments {
201            self.emit(ttype, &full_text, line, col);
202        }
203    }
204
205    /// Lex a `/* … */`, `/** … */` or `/*! … */` block comment.
206    ///
207    /// Outer-doc heuristic (Fase 14.a): a block is an outer doc comment
208    /// iff it starts with exactly `/**` followed by a non-`/` character.
209    /// `/**/` (empty block) is a regular block, not a doc.
210    ///
211    /// Inner-doc heuristic (Fase 14.c): a block is an inner doc comment
212    /// iff it starts with `/*!` — same Rust convention as `//!`.
213    fn consume_block_comment(&mut self) -> Result<(), LexerError> {
214        let line = self.line;
215        let col = self.column;
216        self.advance(); // /
217        self.advance(); // *
218        let is_outer_doc = self.peek() == '*' && self.peek_next() != '/';
219        let is_inner_doc = !is_outer_doc && self.peek() == '!';
220        if is_outer_doc || is_inner_doc {
221            self.advance(); // second * (outer) or ! (inner)
222        }
223
224        let body_start = self.pos;
225        while !self.at_end() {
226            if self.peek() == '*' && self.peek_next() == '/' {
227                let body: String = self.source[body_start..self.pos].iter().collect();
228                self.advance(); // *
229                self.advance(); // /
230                let (ttype, full_text) = if is_outer_doc {
231                    (TokenType::DocBlockComment, format!("/**{body}*/"))
232                } else if is_inner_doc {
233                    (TokenType::InnerDocBlockComment, format!("/*!{body}*/"))
234                } else {
235                    (TokenType::BlockComment, format!("/*{body}*/"))
236                };
237                if !self.strip_comments {
238                    self.emit(ttype, &full_text, line, col);
239                }
240                return Ok(());
241            }
242            self.advance();
243        }
244        Err(LexerError {
245            message: "Unterminated block comment".to_string(),
246            line,
247            column: col,
248        })
249    }
250
251    // ── main scanner dispatch ─────────────────────────────────────
252
253    fn scan_token(&mut self) -> Result<(), LexerError> {
254        let line = self.line;
255        let col = self.column;
256        let ch = self.advance();
257
258        match ch {
259            '{' => self.emit(TokenType::LBrace, "{", line, col),
260            '}' => self.emit(TokenType::RBrace, "}", line, col),
261            '(' => self.emit(TokenType::LParen, "(", line, col),
262            ')' => self.emit(TokenType::RParen, ")", line, col),
263            '[' => self.emit(TokenType::LBracket, "[", line, col),
264            ']' => self.emit(TokenType::RBracket, "]", line, col),
265            ':' => self.emit(TokenType::Colon, ":", line, col),
266            ',' => self.emit(TokenType::Comma, ",", line, col),
267            '?' => self.emit(TokenType::Question, "?", line, col),
268            '@' => self.emit(TokenType::At, "@", line, col),
269            '+' => self.emit(TokenType::Plus, "+", line, col),
270            '*' => self.emit(TokenType::Star, "*", line, col),
271
272            '.' => {
273                if self.match_char('.') {
274                    self.emit(TokenType::DotDot, "..", line, col);
275                } else {
276                    self.emit(TokenType::Dot, ".", line, col);
277                }
278            }
279
280            '-' => {
281                if self.match_char('>') {
282                    self.emit(TokenType::Arrow, "->", line, col);
283                } else if !self.at_end() && self.peek().is_ascii_digit() {
284                    self.scan_number(line, col, '\0', true)?;
285                } else {
286                    self.emit(TokenType::Minus, "-", line, col);
287                }
288            }
289
290            '/' => self.emit(TokenType::Slash, "/", line, col),
291
292            '<' => {
293                if self.match_char('=') {
294                    self.emit(TokenType::Lte, "<=", line, col);
295                } else {
296                    self.emit(TokenType::Lt, "<", line, col);
297                }
298            }
299            '>' => {
300                if self.match_char('=') {
301                    self.emit(TokenType::Gte, ">=", line, col);
302                } else {
303                    self.emit(TokenType::Gt, ">", line, col);
304                }
305            }
306            '=' => {
307                if self.match_char('=') {
308                    self.emit(TokenType::Eq, "==", line, col);
309                } else {
310                    self.emit(TokenType::Assign, "=", line, col);
311                }
312            }
313            '!' => {
314                if self.match_char('=') {
315                    self.emit(TokenType::Neq, "!=", line, col);
316                } else {
317                    return Err(LexerError {
318                        message: "Unexpected '!'. Did you mean '!='?".to_string(),
319                        line,
320                        column: col,
321                    });
322                }
323            }
324
325            '"' => self.scan_string(line, col)?,
326
327            c if c.is_ascii_digit() => self.scan_number(line, col, c, false)?,
328            c if c.is_alphabetic() || c == '_' => self.scan_identifier(line, col, c),
329
330            // §Fase 54.b — `$` is only meaningful as interpolation, and
331            // interpolation in Axon canonically lives INSIDE a string
332            // literal (`"${name}"` / `"$name"`) — the same form used for
333            // prompt text, `persist` field values, and the `use <Tool> on
334            // "${param}"` tool argument. A bare `$` outside quotes is never
335            // valid; guide the author to quote it rather than emitting the
336            // opaque "unexpected character" message.
337            '$' => {
338                return Err(LexerError {
339                    message: "Unexpected '$'. Interpolation `${name}` / `$name` is only \
340                              valid inside a string literal — wrap it in quotes, e.g. \
341                              `\"${name}\"`."
342                        .to_string(),
343                    line,
344                    column: col,
345                });
346            }
347
348            c => {
349                return Err(LexerError {
350                    message: format!("Unexpected character {:?}", c),
351                    line,
352                    column: col,
353                });
354            }
355        }
356
357        Ok(())
358    }
359
360    // ── literal scanners ──────────────────────────────────────────
361
362    fn scan_string(&mut self, start_line: u32, start_col: u32) -> Result<(), LexerError> {
363        let mut chars = String::new();
364        while !self.at_end() && self.peek() != '"' {
365            if self.peek() == '\n' {
366                chars.push(self.advance());
367                continue;
368            }
369            if self.peek() == '\\' {
370                self.advance(); // consume backslash
371                if self.at_end() {
372                    return Err(LexerError {
373                        message: "Unterminated escape sequence".to_string(),
374                        line: self.line,
375                        column: self.column,
376                    });
377                }
378                let esc = self.advance();
379                match esc {
380                    'n' => chars.push('\n'),
381                    't' => chars.push('\t'),
382                    '\\' => chars.push('\\'),
383                    '"' => chars.push('"'),
384                    c => chars.push(c),
385                }
386            } else {
387                chars.push(self.advance());
388            }
389        }
390        if self.at_end() {
391            return Err(LexerError {
392                message: "Unterminated string".to_string(),
393                line: start_line,
394                column: start_col,
395            });
396        }
397        self.advance(); // closing "
398        self.emit(TokenType::StringLit, &chars, start_line, start_col);
399        Ok(())
400    }
401
402    /// `first_char`: the first digit already consumed (or '\0' if negative prefix).
403    /// `negative`: true if a leading '-' was consumed before calling this.
404    fn scan_number(
405        &mut self,
406        start_line: u32,
407        start_col: u32,
408        first_char: char,
409        negative: bool,
410    ) -> Result<(), LexerError> {
411        let mut digits = String::new();
412        if negative {
413            digits.push('-');
414        }
415        if first_char != '\0' {
416            digits.push(first_char);
417        }
418
419        // Integer part
420        while !self.at_end() && self.peek().is_ascii_digit() {
421            digits.push(self.advance());
422        }
423
424        let mut is_float = false;
425
426        // Decimal point (but not range operator ..)
427        if !self.at_end() && self.peek() == '.' && self.peek_next() != '.' {
428            is_float = true;
429            digits.push(self.advance()); // '.'
430            if self.at_end() || !self.peek().is_ascii_digit() {
431                return Err(LexerError {
432                    message: "Expected digit after decimal point".to_string(),
433                    line: self.line,
434                    column: self.column,
435                });
436            }
437            while !self.at_end() && self.peek().is_ascii_digit() {
438                digits.push(self.advance());
439            }
440        }
441
442        let raw = digits.clone();
443
444        // Duration suffix?
445        if !self.at_end() && self.peek().is_alphabetic() {
446            let saved_pos = self.pos;
447            let saved_col = self.column;
448            let mut suffix = String::new();
449            while !self.at_end() && self.peek().is_alphabetic() {
450                suffix.push(self.advance());
451            }
452            if matches!(suffix.as_str(), "s" | "ms" | "m" | "h" | "d") {
453                let value = format!("{}{}", raw, suffix);
454                self.emit(TokenType::Duration, &value, start_line, start_col);
455                return Ok(());
456            } else {
457                // Rewind
458                self.pos = saved_pos;
459                self.column = saved_col;
460            }
461        }
462
463        if is_float {
464            self.emit(TokenType::Float, &raw, start_line, start_col);
465        } else {
466            self.emit(TokenType::Integer, &raw, start_line, start_col);
467        }
468        Ok(())
469    }
470
471    fn scan_identifier(&mut self, start_line: u32, start_col: u32, first_char: char) {
472        let mut word = String::new();
473        word.push(first_char);
474        while !self.at_end() && (self.peek().is_alphanumeric() || self.peek() == '_') {
475            word.push(self.advance());
476        }
477        let ttype = keyword_type(&word);
478        self.emit(ttype, &word, start_line, start_col);
479    }
480}
481
482#[cfg(test)]
483mod fase_1_to_5_end_to_end {
484    //! Lexer integration tests covering the new Fase 1–5 keywords end-to-end.
485    //! These feed real source text through the lexer and assert the emitted
486    //! TokenTypes — closing the loop beyond the unit tests in `tokens.rs`.
487    use super::*;
488
489    fn kinds(source: &str) -> Vec<TokenType> {
490        Lexer::new(source, "<test>")
491            .tokenize()
492            .expect("lex ok")
493            .into_iter()
494            .map(|t| t.ttype)
495            .collect()
496    }
497
498    #[test]
499    fn resource_decl_tokenizes() {
500        let kinds = kinds("resource Db { kind: postgres lifetime: linear }");
501        assert!(kinds.contains(&TokenType::Resource));
502        assert!(kinds.contains(&TokenType::LBrace));
503        assert!(kinds.contains(&TokenType::RBrace));
504    }
505
506    #[test]
507    fn fabric_manifest_observe_tokenize() {
508        let src = r#"
509            fabric Vpc { provider: aws region: "us-east-1" zones: 2 }
510            manifest M { resources: [Db] fabric: Vpc }
511            observe O { sources: [M] quorum: 1 on_partition: degrade }
512        "#;
513        let k = kinds(src);
514        assert!(k.contains(&TokenType::Fabric));
515        assert!(k.contains(&TokenType::Manifest));
516        assert!(k.contains(&TokenType::Observe));
517    }
518
519    #[test]
520    fn reconcile_lease_ensemble_tokenize() {
521        let src = r#"
522            reconcile R { manifest: M observe: O max_retries: 3 period: "60s" }
523            lease L { resource: Db ttl: "30m" renewable: true }
524            ensemble E { daemons: [] quorum: 1 disagreement: degrade }
525        "#;
526        let k = kinds(src);
527        assert!(k.contains(&TokenType::Reconcile));
528        assert!(k.contains(&TokenType::Lease));
529        assert!(k.contains(&TokenType::Ensemble));
530    }
531
532    #[test]
533    fn topology_and_session_pi_calculus_tokenize() {
534        let src = r#"
535            session S {
536              client: [send Request end]
537              server: [receive Request end]
538            }
539            topology T { nodes: [A, B] edges: [A -> B : S] }
540        "#;
541        let k = kinds(src);
542        assert!(k.contains(&TokenType::Session));
543        assert!(k.contains(&TokenType::Send));
544        assert!(k.contains(&TokenType::Receive));
545        assert!(k.contains(&TokenType::End));
546        assert!(k.contains(&TokenType::Topology));
547    }
548
549    #[test]
550    fn immune_reflex_heal_tokenize() {
551        let src = r#"
552            immune I { sensitivity: 0.5 window: "1m" baseline: "7d" action: alert }
553            reflex Rf { on: drift action: throttle }
554            heal H { target: I max_patches: 3 rollback_on: divergence }
555        "#;
556        let k = kinds(src);
557        assert!(k.contains(&TokenType::Immune));
558        assert!(k.contains(&TokenType::Reflex));
559        assert!(k.contains(&TokenType::Heal));
560    }
561
562    #[test]
563    fn new_keywords_do_not_collide_with_identifiers() {
564        // Identifiers that look similar must still lex as Identifier, not keyword.
565        let k = kinds("resource_group manifested observer reconciled leased");
566        for tt in k.iter() {
567            assert!(
568                !matches!(
569                    tt,
570                    TokenType::Resource
571                        | TokenType::Manifest
572                        | TokenType::Observe
573                        | TokenType::Reconcile
574                        | TokenType::Lease
575                ),
576                "near-match identifier wrongly classified as keyword: {tt:?}"
577            );
578        }
579    }
580}
581
582// ── §Fase 14.a — Lossless lexing tests ─────────────────────────────────────
583
584#[cfg(test)]
585mod fase14a_trivia_tests {
586    use super::*;
587
588    fn lex(src: &str) -> Vec<Token> {
589        Lexer::new(src, "<test>").tokenize().expect("lex")
590    }
591
592    fn lex_strip(src: &str) -> Vec<Token> {
593        Lexer::new(src, "<test>").tokenize_with(true).expect("lex")
594    }
595
596    fn non_eof(toks: &[Token]) -> Vec<&Token> {
597        toks.iter().filter(|t| t.ttype != TokenType::Eof).collect()
598    }
599
600    #[test]
601    fn regular_line_comment_emitted_as_line_comment() {
602        let toks = lex("// hi");
603        let body: Vec<_> = non_eof(&toks);
604        assert_eq!(body.len(), 1);
605        assert_eq!(body[0].ttype, TokenType::LineComment);
606        assert_eq!(body[0].value, "// hi");
607    }
608
609    #[test]
610    fn doc_line_comment_emitted_with_doc_kind() {
611        let toks = lex("/// docs");
612        let body: Vec<_> = non_eof(&toks);
613        assert_eq!(body[0].ttype, TokenType::DocLineComment);
614        assert_eq!(body[0].value, "/// docs");
615    }
616
617    #[test]
618    fn four_slash_banner_is_regular_not_doc() {
619        // Mirrors Python lexer behaviour and Rust convention.
620        let toks = lex("//// banner");
621        assert_eq!(non_eof(&toks)[0].ttype, TokenType::LineComment);
622    }
623
624    #[test]
625    fn regular_block_comment_emitted() {
626        let toks = lex("/* body */");
627        assert_eq!(non_eof(&toks)[0].ttype, TokenType::BlockComment);
628        assert_eq!(non_eof(&toks)[0].value, "/* body */");
629    }
630
631    #[test]
632    fn doc_block_comment_emitted() {
633        let toks = lex("/** docs */");
634        assert_eq!(non_eof(&toks)[0].ttype, TokenType::DocBlockComment);
635    }
636
637    #[test]
638    fn empty_block_is_regular_not_doc() {
639        // /**/ — empty regular block, not a doc comment.
640        let toks = lex("/**/");
641        assert_eq!(non_eof(&toks)[0].ttype, TokenType::BlockComment);
642    }
643
644    #[test]
645    fn strip_comments_opt_in_legacy() {
646        let src = "// dropped\nflow F() -> Out { }";
647        let toks = lex_strip(src);
648        for t in &toks {
649            assert!(
650                !matches!(
651                    t.ttype,
652                    TokenType::LineComment
653                        | TokenType::BlockComment
654                        | TokenType::DocLineComment
655                        | TokenType::DocBlockComment
656                        | TokenType::InnerDocLineComment
657                        | TokenType::InnerDocBlockComment
658                ),
659                "strip_comments=true must not emit any comment kind, got {:?}",
660                t.ttype
661            );
662        }
663    }
664
665    // ── Fase 14.c — inner doc comments (`//!`, `/*!`) ──
666
667    #[test]
668    fn inner_doc_line_comment_emitted_with_inner_doc_kind() {
669        let toks = lex("//! module docs");
670        let body: Vec<_> = non_eof(&toks);
671        assert_eq!(body[0].ttype, TokenType::InnerDocLineComment);
672        assert_eq!(body[0].value, "//! module docs");
673    }
674
675    #[test]
676    fn inner_doc_block_comment_emitted_with_inner_doc_kind() {
677        let toks = lex("/*! module docs */");
678        let body: Vec<_> = non_eof(&toks);
679        assert_eq!(body[0].ttype, TokenType::InnerDocBlockComment);
680        assert_eq!(body[0].value, "/*! module docs */");
681    }
682
683    #[test]
684    fn outer_and_inner_doc_distinguished() {
685        // `///` → outer; `//!` → inner; `//` → regular. All three must
686        // be discriminated correctly when colocated.
687        let toks = lex("/// outer\n//! inner\n// plain");
688        let body: Vec<_> = non_eof(&toks);
689        assert_eq!(body.len(), 3);
690        assert_eq!(body[0].ttype, TokenType::DocLineComment);
691        assert_eq!(body[1].ttype, TokenType::InnerDocLineComment);
692        assert_eq!(body[2].ttype, TokenType::LineComment);
693    }
694
695    #[test]
696    fn block_outer_and_inner_doc_distinguished() {
697        let toks = lex("/** outer */\n/*! inner */\n/* plain */");
698        let body: Vec<_> = non_eof(&toks);
699        assert_eq!(body.len(), 3);
700        assert_eq!(body[0].ttype, TokenType::DocBlockComment);
701        assert_eq!(body[1].ttype, TokenType::InnerDocBlockComment);
702        assert_eq!(body[2].ttype, TokenType::BlockComment);
703    }
704
705    #[test]
706    fn comment_loc_preserved_across_lines() {
707        let toks = lex("// a\n/// b\n/* c */");
708        let body: Vec<_> = non_eof(&toks);
709        assert_eq!(body[0].line, 1);
710        assert_eq!(body[1].line, 2);
711        assert_eq!(body[2].line, 3);
712    }
713
714    #[test]
715    fn unterminated_block_still_errors() {
716        let result = Lexer::new("/* never closes", "<test>").tokenize();
717        assert!(result.is_err());
718        let err = result.unwrap_err();
719        assert!(err.message.contains("Unterminated"));
720    }
721
722    #[test]
723    fn trivia_helpers_strip_markers() {
724        use crate::tokens::{Trivia, TriviaKind};
725        let t = Trivia {
726            kind: TriviaKind::DocLine,
727            text: "/// hi".into(),
728            line: 1,
729            column: 1,
730        };
731        assert!(t.is_doc());
732        assert_eq!(t.stripped_text(), " hi");
733        let b = Trivia {
734            kind: TriviaKind::DocBlock,
735            text: "/** body */".into(),
736            line: 1,
737            column: 1,
738        };
739        assert!(b.is_doc());
740        assert_eq!(b.stripped_text(), " body ");
741        let r = Trivia {
742            kind: TriviaKind::Line,
743            text: "// regular".into(),
744            line: 1,
745            column: 1,
746        };
747        assert!(!r.is_doc());
748        assert_eq!(r.stripped_text(), " regular");
749    }
750}