Skip to main content

fallow_core/duplicates/
tokenize.rs

1use std::path::Path;
2
3use oxc_allocator::Allocator;
4use oxc_ast::ast::*;
5use oxc_ast_visit::Visit;
6use oxc_ast_visit::walk;
7use oxc_parser::Parser;
8use oxc_span::{GetSpan, SourceType, Span};
9use oxc_syntax::scope::ScopeFlags;
10
11/// A single token extracted from the AST with its source location.
12#[derive(Debug, Clone)]
13pub struct SourceToken {
14    /// The kind of token.
15    pub kind: TokenKind,
16    /// Byte offset into the source file.
17    pub span: Span,
18}
19
20/// Normalized token types for clone detection.
21#[derive(Debug, Clone, PartialEq, Eq, Hash)]
22pub enum TokenKind {
23    // Keywords
24    Keyword(KeywordType),
25    // Identifiers -- value is the actual name (blinded in semantic mode)
26    Identifier(String),
27    // Literals
28    StringLiteral(String),
29    NumericLiteral(String),
30    BooleanLiteral(bool),
31    NullLiteral,
32    TemplateLiteral,
33    RegExpLiteral,
34    // Operators
35    Operator(OperatorType),
36    // Punctuation / delimiters
37    Punctuation(PunctuationType),
38}
39
40/// TypeScript/JavaScript keyword types.
41#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash)]
42pub enum KeywordType {
43    Var,
44    Let,
45    Const,
46    Function,
47    Return,
48    If,
49    Else,
50    For,
51    While,
52    Do,
53    Switch,
54    Case,
55    Break,
56    Continue,
57    Default,
58    Throw,
59    Try,
60    Catch,
61    Finally,
62    New,
63    Delete,
64    Typeof,
65    Instanceof,
66    In,
67    Of,
68    Void,
69    This,
70    Super,
71    Class,
72    Extends,
73    Import,
74    Export,
75    From,
76    As,
77    Async,
78    Await,
79    Yield,
80    Static,
81    Get,
82    Set,
83    Type,
84    Interface,
85    Enum,
86    Implements,
87    Abstract,
88    Declare,
89    Readonly,
90    Keyof,
91    Satisfies,
92}
93
94/// Operator categories.
95#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash)]
96pub enum OperatorType {
97    Assign,
98    Add,
99    Sub,
100    Mul,
101    Div,
102    Mod,
103    Exp,
104    Eq,
105    NEq,
106    StrictEq,
107    StrictNEq,
108    Lt,
109    Gt,
110    LtEq,
111    GtEq,
112    And,
113    Or,
114    Not,
115    BitwiseAnd,
116    BitwiseOr,
117    BitwiseXor,
118    BitwiseNot,
119    ShiftLeft,
120    ShiftRight,
121    UnsignedShiftRight,
122    NullishCoalescing,
123    OptionalChaining,
124    Spread,
125    Ternary,
126    Arrow,
127    Comma,
128    AddAssign,
129    SubAssign,
130    MulAssign,
131    DivAssign,
132    ModAssign,
133    ExpAssign,
134    AndAssign,
135    OrAssign,
136    NullishAssign,
137    BitwiseAndAssign,
138    BitwiseOrAssign,
139    BitwiseXorAssign,
140    ShiftLeftAssign,
141    ShiftRightAssign,
142    UnsignedShiftRightAssign,
143    Increment,
144    Decrement,
145    Instanceof,
146    In,
147}
148
149/// Punctuation / delimiter types.
150#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash)]
151pub enum PunctuationType {
152    OpenParen,
153    CloseParen,
154    OpenBrace,
155    CloseBrace,
156    OpenBracket,
157    CloseBracket,
158    Semicolon,
159    Colon,
160    Dot,
161}
162
163/// Result of tokenizing a source file.
164#[derive(Debug, Clone)]
165pub struct FileTokens {
166    /// The extracted token sequence.
167    pub tokens: Vec<SourceToken>,
168    /// Source text (needed for extracting fragments).
169    pub source: String,
170    /// Total number of lines in the source.
171    pub line_count: usize,
172}
173
174/// Create a 1-byte span at the given byte position.
175///
176/// Used for synthetic punctuation tokens (`(`, `)`, `,`, `.`) that don't
177/// have their own AST span. Using the parent expression's full span would
178/// inflate clone line ranges, especially in chained method calls.
179const fn point_span(pos: u32) -> Span {
180    Span::new(pos, pos + 1)
181}
182
183/// Tokenize a source file into a sequence of normalized tokens.
184///
185/// For Vue/Svelte SFC files, extracts `<script>` blocks first and tokenizes
186/// their content, mirroring the main analysis pipeline's SFC handling.
187/// For Astro files, extracts frontmatter. For MDX files, extracts import/export statements.
188///
189/// When `strip_types` is true, TypeScript type annotations, interfaces, and type
190/// aliases are stripped from the token stream. This enables cross-language clone
191/// detection between `.ts` and `.js` files.
192pub fn tokenize_file(path: &Path, source: &str) -> FileTokens {
193    tokenize_file_inner(path, source, false)
194}
195
196/// Tokenize a source file with optional type stripping for cross-language detection.
197pub fn tokenize_file_cross_language(path: &Path, source: &str, strip_types: bool) -> FileTokens {
198    tokenize_file_inner(path, source, strip_types)
199}
200
201fn tokenize_file_inner(path: &Path, source: &str, strip_types: bool) -> FileTokens {
202    use crate::extract::{
203        extract_astro_frontmatter, extract_mdx_statements, extract_sfc_scripts, is_sfc_file,
204    };
205
206    let ext = path.extension().and_then(|e| e.to_str()).unwrap_or("");
207
208    // For Vue/Svelte SFCs, extract and tokenize `<script>` blocks.
209    if is_sfc_file(path) {
210        let scripts = extract_sfc_scripts(source);
211        let mut all_tokens = Vec::new();
212
213        for script in &scripts {
214            let source_type = match (script.is_typescript, script.is_jsx) {
215                (true, true) => SourceType::tsx(),
216                (true, false) => SourceType::ts(),
217                (false, true) => SourceType::jsx(),
218                (false, false) => SourceType::mjs(),
219            };
220            let allocator = Allocator::default();
221            let parser_return = Parser::new(&allocator, &script.body, source_type).parse();
222
223            let mut extractor = TokenExtractor::with_strip_types(strip_types);
224            extractor.visit_program(&parser_return.program);
225
226            // Adjust token spans to reference positions in the full SFC source
227            // rather than the extracted script block.
228            let offset = script.byte_offset as u32;
229            for token in &mut extractor.tokens {
230                token.span = Span::new(token.span.start + offset, token.span.end + offset);
231            }
232            all_tokens.extend(extractor.tokens);
233        }
234
235        let line_count = source.lines().count().max(1);
236        return FileTokens {
237            tokens: all_tokens,
238            source: source.to_string(),
239            line_count,
240        };
241    }
242
243    // For Astro files, extract and tokenize frontmatter.
244    if ext == "astro" {
245        if let Some(script) = extract_astro_frontmatter(source) {
246            let allocator = Allocator::default();
247            let parser_return = Parser::new(&allocator, &script.body, SourceType::ts()).parse();
248
249            let mut extractor = TokenExtractor::with_strip_types(strip_types);
250            extractor.visit_program(&parser_return.program);
251
252            let offset = script.byte_offset as u32;
253            for token in &mut extractor.tokens {
254                token.span = Span::new(token.span.start + offset, token.span.end + offset);
255            }
256
257            let line_count = source.lines().count().max(1);
258            return FileTokens {
259                tokens: extractor.tokens,
260                source: source.to_string(),
261                line_count,
262            };
263        }
264        // No frontmatter — return empty tokens.
265        let line_count = source.lines().count().max(1);
266        return FileTokens {
267            tokens: Vec::new(),
268            source: source.to_string(),
269            line_count,
270        };
271    }
272
273    // For MDX files, extract and tokenize import/export statements.
274    if ext == "mdx" {
275        let statements = extract_mdx_statements(source);
276        if !statements.is_empty() {
277            let allocator = Allocator::default();
278            let parser_return = Parser::new(&allocator, &statements, SourceType::jsx()).parse();
279
280            let mut extractor = TokenExtractor::with_strip_types(strip_types);
281            extractor.visit_program(&parser_return.program);
282
283            let line_count = source.lines().count().max(1);
284            return FileTokens {
285                tokens: extractor.tokens,
286                source: source.to_string(),
287                line_count,
288            };
289        }
290        let line_count = source.lines().count().max(1);
291        return FileTokens {
292            tokens: Vec::new(),
293            source: source.to_string(),
294            line_count,
295        };
296    }
297
298    // CSS/SCSS files are not JS/TS — skip tokenization for duplication detection.
299    if ext == "css" || ext == "scss" {
300        let line_count = source.lines().count().max(1);
301        return FileTokens {
302            tokens: Vec::new(),
303            source: source.to_string(),
304            line_count,
305        };
306    }
307
308    let source_type = SourceType::from_path(path).unwrap_or_default();
309    let allocator = Allocator::default();
310    let parser_return = Parser::new(&allocator, source, source_type).parse();
311
312    let mut extractor = TokenExtractor::with_strip_types(strip_types);
313    extractor.visit_program(&parser_return.program);
314
315    // If parsing produced very few tokens relative to source size (likely parse errors
316    // from Flow types or JSX in .js files), retry with JSX/TSX source type as a fallback.
317    if extractor.tokens.len() < 5 && source.len() > 100 && !source_type.is_jsx() {
318        let jsx_type = if source_type.is_typescript() {
319            SourceType::tsx()
320        } else {
321            SourceType::jsx()
322        };
323        let allocator2 = Allocator::default();
324        let retry_return = Parser::new(&allocator2, source, jsx_type).parse();
325        let mut retry_extractor = TokenExtractor::with_strip_types(strip_types);
326        retry_extractor.visit_program(&retry_return.program);
327        if retry_extractor.tokens.len() > extractor.tokens.len() {
328            extractor = retry_extractor;
329        }
330    }
331
332    let line_count = source.lines().count().max(1);
333
334    FileTokens {
335        tokens: extractor.tokens,
336        source: source.to_string(),
337        line_count,
338    }
339}
340
341/// AST visitor that extracts a flat sequence of normalized tokens.
342struct TokenExtractor {
343    tokens: Vec<SourceToken>,
344    /// When true, skip TypeScript type annotations, interfaces, and type aliases
345    /// to enable cross-language clone detection between .ts and .js files.
346    strip_types: bool,
347}
348
349impl TokenExtractor {
350    const fn with_strip_types(strip_types: bool) -> Self {
351        Self {
352            tokens: Vec::new(),
353            strip_types,
354        }
355    }
356
357    fn push(&mut self, kind: TokenKind, span: Span) {
358        self.tokens.push(SourceToken { kind, span });
359    }
360
361    fn push_keyword(&mut self, kw: KeywordType, span: Span) {
362        self.push(TokenKind::Keyword(kw), span);
363    }
364
365    fn push_op(&mut self, op: OperatorType, span: Span) {
366        self.push(TokenKind::Operator(op), span);
367    }
368
369    fn push_punc(&mut self, p: PunctuationType, span: Span) {
370        self.push(TokenKind::Punctuation(p), span);
371    }
372}
373
374impl<'a> Visit<'a> for TokenExtractor {
375    // ── Statements ──────────────────────────────────────────
376
377    fn visit_variable_declaration(&mut self, decl: &VariableDeclaration<'a>) {
378        let kw = match decl.kind {
379            VariableDeclarationKind::Var => KeywordType::Var,
380            VariableDeclarationKind::Let => KeywordType::Let,
381            VariableDeclarationKind::Const => KeywordType::Const,
382            VariableDeclarationKind::Using | VariableDeclarationKind::AwaitUsing => {
383                KeywordType::Const
384            }
385        };
386        self.push_keyword(kw, decl.span);
387        walk::walk_variable_declaration(self, decl);
388    }
389
390    fn visit_return_statement(&mut self, stmt: &ReturnStatement<'a>) {
391        self.push_keyword(KeywordType::Return, stmt.span);
392        walk::walk_return_statement(self, stmt);
393    }
394
395    fn visit_if_statement(&mut self, stmt: &IfStatement<'a>) {
396        self.push_keyword(KeywordType::If, stmt.span);
397        self.push_punc(PunctuationType::OpenParen, stmt.span);
398        self.visit_expression(&stmt.test);
399        self.push_punc(PunctuationType::CloseParen, stmt.span);
400        self.visit_statement(&stmt.consequent);
401        if let Some(alt) = &stmt.alternate {
402            self.push_keyword(KeywordType::Else, stmt.span);
403            self.visit_statement(alt);
404        }
405    }
406
407    fn visit_for_statement(&mut self, stmt: &ForStatement<'a>) {
408        self.push_keyword(KeywordType::For, stmt.span);
409        self.push_punc(PunctuationType::OpenParen, stmt.span);
410        walk::walk_for_statement(self, stmt);
411        self.push_punc(PunctuationType::CloseParen, stmt.span);
412    }
413
414    fn visit_for_in_statement(&mut self, stmt: &ForInStatement<'a>) {
415        self.push_keyword(KeywordType::For, stmt.span);
416        self.push_punc(PunctuationType::OpenParen, stmt.span);
417        self.visit_for_statement_left(&stmt.left);
418        self.push_keyword(KeywordType::In, stmt.span);
419        self.visit_expression(&stmt.right);
420        self.push_punc(PunctuationType::CloseParen, stmt.span);
421        self.visit_statement(&stmt.body);
422    }
423
424    fn visit_for_of_statement(&mut self, stmt: &ForOfStatement<'a>) {
425        self.push_keyword(KeywordType::For, stmt.span);
426        self.push_punc(PunctuationType::OpenParen, stmt.span);
427        self.visit_for_statement_left(&stmt.left);
428        self.push_keyword(KeywordType::Of, stmt.span);
429        self.visit_expression(&stmt.right);
430        self.push_punc(PunctuationType::CloseParen, stmt.span);
431        self.visit_statement(&stmt.body);
432    }
433
434    fn visit_while_statement(&mut self, stmt: &WhileStatement<'a>) {
435        self.push_keyword(KeywordType::While, stmt.span);
436        self.push_punc(PunctuationType::OpenParen, stmt.span);
437        walk::walk_while_statement(self, stmt);
438        self.push_punc(PunctuationType::CloseParen, stmt.span);
439    }
440
441    fn visit_do_while_statement(&mut self, stmt: &DoWhileStatement<'a>) {
442        self.push_keyword(KeywordType::Do, stmt.span);
443        walk::walk_do_while_statement(self, stmt);
444    }
445
446    fn visit_switch_statement(&mut self, stmt: &SwitchStatement<'a>) {
447        self.push_keyword(KeywordType::Switch, stmt.span);
448        self.push_punc(PunctuationType::OpenParen, stmt.span);
449        walk::walk_switch_statement(self, stmt);
450        self.push_punc(PunctuationType::CloseParen, stmt.span);
451    }
452
453    fn visit_switch_case(&mut self, case: &SwitchCase<'a>) {
454        if case.test.is_some() {
455            self.push_keyword(KeywordType::Case, case.span);
456        } else {
457            self.push_keyword(KeywordType::Default, case.span);
458        }
459        self.push_punc(PunctuationType::Colon, case.span);
460        walk::walk_switch_case(self, case);
461    }
462
463    fn visit_break_statement(&mut self, stmt: &BreakStatement<'a>) {
464        self.push_keyword(KeywordType::Break, stmt.span);
465    }
466
467    fn visit_continue_statement(&mut self, stmt: &ContinueStatement<'a>) {
468        self.push_keyword(KeywordType::Continue, stmt.span);
469    }
470
471    fn visit_throw_statement(&mut self, stmt: &ThrowStatement<'a>) {
472        self.push_keyword(KeywordType::Throw, stmt.span);
473        walk::walk_throw_statement(self, stmt);
474    }
475
476    fn visit_try_statement(&mut self, stmt: &TryStatement<'a>) {
477        self.push_keyword(KeywordType::Try, stmt.span);
478        walk::walk_try_statement(self, stmt);
479    }
480
481    fn visit_catch_clause(&mut self, clause: &CatchClause<'a>) {
482        self.push_keyword(KeywordType::Catch, clause.span);
483        walk::walk_catch_clause(self, clause);
484    }
485
486    fn visit_block_statement(&mut self, block: &BlockStatement<'a>) {
487        self.push_punc(PunctuationType::OpenBrace, block.span);
488        walk::walk_block_statement(self, block);
489        self.push_punc(PunctuationType::CloseBrace, block.span);
490    }
491
492    // ── Expressions ─────────────────────────────────────────
493
494    fn visit_identifier_reference(&mut self, ident: &IdentifierReference<'a>) {
495        self.push(TokenKind::Identifier(ident.name.to_string()), ident.span);
496    }
497
498    fn visit_binding_identifier(&mut self, ident: &BindingIdentifier<'a>) {
499        self.push(TokenKind::Identifier(ident.name.to_string()), ident.span);
500    }
501
502    fn visit_string_literal(&mut self, lit: &StringLiteral<'a>) {
503        self.push(TokenKind::StringLiteral(lit.value.to_string()), lit.span);
504    }
505
506    fn visit_numeric_literal(&mut self, lit: &NumericLiteral<'a>) {
507        let raw_str = lit
508            .raw
509            .as_ref()
510            .map_or_else(|| lit.value.to_string(), |r| r.to_string());
511        self.push(TokenKind::NumericLiteral(raw_str), lit.span);
512    }
513
514    fn visit_boolean_literal(&mut self, lit: &BooleanLiteral) {
515        self.push(TokenKind::BooleanLiteral(lit.value), lit.span);
516    }
517
518    fn visit_null_literal(&mut self, lit: &NullLiteral) {
519        self.push(TokenKind::NullLiteral, lit.span);
520    }
521
522    fn visit_template_literal(&mut self, lit: &TemplateLiteral<'a>) {
523        self.push(TokenKind::TemplateLiteral, lit.span);
524        walk::walk_template_literal(self, lit);
525    }
526
527    fn visit_reg_exp_literal(&mut self, lit: &RegExpLiteral<'a>) {
528        self.push(TokenKind::RegExpLiteral, lit.span);
529    }
530
531    fn visit_this_expression(&mut self, expr: &ThisExpression) {
532        self.push_keyword(KeywordType::This, expr.span);
533    }
534
535    fn visit_super(&mut self, expr: &Super) {
536        self.push_keyword(KeywordType::Super, expr.span);
537    }
538
539    fn visit_array_expression(&mut self, expr: &ArrayExpression<'a>) {
540        self.push_punc(PunctuationType::OpenBracket, expr.span);
541        walk::walk_array_expression(self, expr);
542        self.push_punc(PunctuationType::CloseBracket, expr.span);
543    }
544
545    fn visit_object_expression(&mut self, expr: &ObjectExpression<'a>) {
546        self.push_punc(PunctuationType::OpenBrace, expr.span);
547        walk::walk_object_expression(self, expr);
548        self.push_punc(PunctuationType::CloseBrace, expr.span);
549    }
550
551    fn visit_call_expression(&mut self, expr: &CallExpression<'a>) {
552        self.visit_expression(&expr.callee);
553        // Use point spans for synthetic punctuation to avoid inflating clone
554        // ranges when call expressions are chained (expr.span covers the
555        // entire chain, not just this call's parentheses).
556        let open = point_span(expr.callee.span().end);
557        self.push_punc(PunctuationType::OpenParen, open);
558        for arg in &expr.arguments {
559            self.visit_argument(arg);
560            let comma = point_span(arg.span().end);
561            self.push_op(OperatorType::Comma, comma);
562        }
563        let close = point_span(expr.span.end.saturating_sub(1));
564        self.push_punc(PunctuationType::CloseParen, close);
565    }
566
567    fn visit_new_expression(&mut self, expr: &NewExpression<'a>) {
568        self.push_keyword(KeywordType::New, expr.span);
569        self.visit_expression(&expr.callee);
570        let open = point_span(expr.callee.span().end);
571        self.push_punc(PunctuationType::OpenParen, open);
572        for arg in &expr.arguments {
573            self.visit_argument(arg);
574            let comma = point_span(arg.span().end);
575            self.push_op(OperatorType::Comma, comma);
576        }
577        let close = point_span(expr.span.end.saturating_sub(1));
578        self.push_punc(PunctuationType::CloseParen, close);
579    }
580
581    fn visit_static_member_expression(&mut self, expr: &StaticMemberExpression<'a>) {
582        self.visit_expression(&expr.object);
583        // Use point span at the dot position (right after the object).
584        let dot = point_span(expr.object.span().end);
585        self.push_punc(PunctuationType::Dot, dot);
586        self.push(
587            TokenKind::Identifier(expr.property.name.to_string()),
588            expr.property.span,
589        );
590    }
591
592    fn visit_computed_member_expression(&mut self, expr: &ComputedMemberExpression<'a>) {
593        self.visit_expression(&expr.object);
594        let open = point_span(expr.object.span().end);
595        self.push_punc(PunctuationType::OpenBracket, open);
596        self.visit_expression(&expr.expression);
597        let close = point_span(expr.span.end.saturating_sub(1));
598        self.push_punc(PunctuationType::CloseBracket, close);
599    }
600
601    fn visit_assignment_expression(&mut self, expr: &AssignmentExpression<'a>) {
602        self.visit_assignment_target(&expr.left);
603        let op = match expr.operator {
604            AssignmentOperator::Assign => OperatorType::Assign,
605            AssignmentOperator::Addition => OperatorType::AddAssign,
606            AssignmentOperator::Subtraction => OperatorType::SubAssign,
607            AssignmentOperator::Multiplication => OperatorType::MulAssign,
608            AssignmentOperator::Division => OperatorType::DivAssign,
609            AssignmentOperator::Remainder => OperatorType::ModAssign,
610            AssignmentOperator::Exponential => OperatorType::ExpAssign,
611            AssignmentOperator::LogicalAnd => OperatorType::AndAssign,
612            AssignmentOperator::LogicalOr => OperatorType::OrAssign,
613            AssignmentOperator::LogicalNullish => OperatorType::NullishAssign,
614            AssignmentOperator::BitwiseAnd => OperatorType::BitwiseAndAssign,
615            AssignmentOperator::BitwiseOR => OperatorType::BitwiseOrAssign,
616            AssignmentOperator::BitwiseXOR => OperatorType::BitwiseXorAssign,
617            AssignmentOperator::ShiftLeft => OperatorType::ShiftLeftAssign,
618            AssignmentOperator::ShiftRight => OperatorType::ShiftRightAssign,
619            AssignmentOperator::ShiftRightZeroFill => OperatorType::UnsignedShiftRightAssign,
620        };
621        self.push_op(op, expr.span);
622        self.visit_expression(&expr.right);
623    }
624
625    fn visit_binary_expression(&mut self, expr: &BinaryExpression<'a>) {
626        self.visit_expression(&expr.left);
627        let op = match expr.operator {
628            BinaryOperator::Addition => OperatorType::Add,
629            BinaryOperator::Subtraction => OperatorType::Sub,
630            BinaryOperator::Multiplication => OperatorType::Mul,
631            BinaryOperator::Division => OperatorType::Div,
632            BinaryOperator::Remainder => OperatorType::Mod,
633            BinaryOperator::Exponential => OperatorType::Exp,
634            BinaryOperator::Equality => OperatorType::Eq,
635            BinaryOperator::Inequality => OperatorType::NEq,
636            BinaryOperator::StrictEquality => OperatorType::StrictEq,
637            BinaryOperator::StrictInequality => OperatorType::StrictNEq,
638            BinaryOperator::LessThan => OperatorType::Lt,
639            BinaryOperator::GreaterThan => OperatorType::Gt,
640            BinaryOperator::LessEqualThan => OperatorType::LtEq,
641            BinaryOperator::GreaterEqualThan => OperatorType::GtEq,
642            BinaryOperator::BitwiseAnd => OperatorType::BitwiseAnd,
643            BinaryOperator::BitwiseOR => OperatorType::BitwiseOr,
644            BinaryOperator::BitwiseXOR => OperatorType::BitwiseXor,
645            BinaryOperator::ShiftLeft => OperatorType::ShiftLeft,
646            BinaryOperator::ShiftRight => OperatorType::ShiftRight,
647            BinaryOperator::ShiftRightZeroFill => OperatorType::UnsignedShiftRight,
648            BinaryOperator::Instanceof => OperatorType::Instanceof,
649            BinaryOperator::In => OperatorType::In,
650        };
651        self.push_op(op, expr.span);
652        self.visit_expression(&expr.right);
653    }
654
655    fn visit_logical_expression(&mut self, expr: &LogicalExpression<'a>) {
656        self.visit_expression(&expr.left);
657        let op = match expr.operator {
658            LogicalOperator::And => OperatorType::And,
659            LogicalOperator::Or => OperatorType::Or,
660            LogicalOperator::Coalesce => OperatorType::NullishCoalescing,
661        };
662        self.push_op(op, expr.span);
663        self.visit_expression(&expr.right);
664    }
665
666    fn visit_unary_expression(&mut self, expr: &UnaryExpression<'a>) {
667        let op = match expr.operator {
668            UnaryOperator::UnaryPlus => OperatorType::Add,
669            UnaryOperator::UnaryNegation => OperatorType::Sub,
670            UnaryOperator::LogicalNot => OperatorType::Not,
671            UnaryOperator::BitwiseNot => OperatorType::BitwiseNot,
672            UnaryOperator::Typeof => {
673                self.push_keyword(KeywordType::Typeof, expr.span);
674                walk::walk_unary_expression(self, expr);
675                return;
676            }
677            UnaryOperator::Void => {
678                self.push_keyword(KeywordType::Void, expr.span);
679                walk::walk_unary_expression(self, expr);
680                return;
681            }
682            UnaryOperator::Delete => {
683                self.push_keyword(KeywordType::Delete, expr.span);
684                walk::walk_unary_expression(self, expr);
685                return;
686            }
687        };
688        self.push_op(op, expr.span);
689        walk::walk_unary_expression(self, expr);
690    }
691
692    fn visit_update_expression(&mut self, expr: &UpdateExpression<'a>) {
693        let op = match expr.operator {
694            UpdateOperator::Increment => OperatorType::Increment,
695            UpdateOperator::Decrement => OperatorType::Decrement,
696        };
697        if expr.prefix {
698            self.push_op(op, expr.span);
699        }
700        walk::walk_update_expression(self, expr);
701        if !expr.prefix {
702            self.push_op(op, expr.span);
703        }
704    }
705
706    fn visit_conditional_expression(&mut self, expr: &ConditionalExpression<'a>) {
707        self.visit_expression(&expr.test);
708        self.push_op(OperatorType::Ternary, expr.span);
709        self.visit_expression(&expr.consequent);
710        self.push_punc(PunctuationType::Colon, expr.span);
711        self.visit_expression(&expr.alternate);
712    }
713
714    fn visit_arrow_function_expression(&mut self, expr: &ArrowFunctionExpression<'a>) {
715        if expr.r#async {
716            self.push_keyword(KeywordType::Async, expr.span);
717        }
718        let params_span = expr.params.span;
719        self.push_punc(PunctuationType::OpenParen, point_span(params_span.start));
720        for param in &expr.params.items {
721            self.visit_binding_pattern(&param.pattern);
722            self.push_op(OperatorType::Comma, point_span(param.span.end));
723        }
724        self.push_punc(
725            PunctuationType::CloseParen,
726            point_span(params_span.end.saturating_sub(1)),
727        );
728        self.push_op(OperatorType::Arrow, point_span(params_span.end));
729        walk::walk_arrow_function_expression(self, expr);
730    }
731
732    fn visit_yield_expression(&mut self, expr: &YieldExpression<'a>) {
733        self.push_keyword(KeywordType::Yield, expr.span);
734        walk::walk_yield_expression(self, expr);
735    }
736
737    fn visit_await_expression(&mut self, expr: &AwaitExpression<'a>) {
738        self.push_keyword(KeywordType::Await, expr.span);
739        walk::walk_await_expression(self, expr);
740    }
741
742    fn visit_spread_element(&mut self, elem: &SpreadElement<'a>) {
743        self.push_op(OperatorType::Spread, elem.span);
744        walk::walk_spread_element(self, elem);
745    }
746
747    fn visit_sequence_expression(&mut self, expr: &SequenceExpression<'a>) {
748        for (i, sub_expr) in expr.expressions.iter().enumerate() {
749            if i > 0 {
750                self.push_op(OperatorType::Comma, expr.span);
751            }
752            self.visit_expression(sub_expr);
753        }
754    }
755
756    // ── Functions ──────────────────────────────────────────
757
758    fn visit_function(&mut self, func: &Function<'a>, flags: ScopeFlags) {
759        if func.r#async {
760            self.push_keyword(KeywordType::Async, func.span);
761        }
762        self.push_keyword(KeywordType::Function, func.span);
763        if let Some(id) = &func.id {
764            self.push(TokenKind::Identifier(id.name.to_string()), id.span);
765        }
766        let params_span = func.params.span;
767        self.push_punc(PunctuationType::OpenParen, point_span(params_span.start));
768        for param in &func.params.items {
769            self.visit_binding_pattern(&param.pattern);
770            self.push_op(OperatorType::Comma, point_span(param.span.end));
771        }
772        self.push_punc(
773            PunctuationType::CloseParen,
774            point_span(params_span.end.saturating_sub(1)),
775        );
776        walk::walk_function(self, func, flags);
777    }
778
779    // ── Classes ─────────────────────────────────────────────
780
781    fn visit_class(&mut self, class: &Class<'a>) {
782        self.push_keyword(KeywordType::Class, class.span);
783        if let Some(id) = &class.id {
784            self.push(TokenKind::Identifier(id.name.to_string()), id.span);
785        }
786        if class.super_class.is_some() {
787            self.push_keyword(KeywordType::Extends, class.span);
788        }
789        walk::walk_class(self, class);
790    }
791
792    // ── Import/Export ───────────────────────────────────────
793
794    fn visit_import_declaration(&mut self, decl: &ImportDeclaration<'a>) {
795        // Skip `import type { ... } from '...'` when stripping types
796        if self.strip_types && decl.import_kind.is_type() {
797            return;
798        }
799        self.push_keyword(KeywordType::Import, decl.span);
800        walk::walk_import_declaration(self, decl);
801        self.push_keyword(KeywordType::From, decl.span);
802        self.push(
803            TokenKind::StringLiteral(decl.source.value.to_string()),
804            decl.source.span,
805        );
806    }
807
808    fn visit_export_named_declaration(&mut self, decl: &ExportNamedDeclaration<'a>) {
809        // Skip `export type { ... }` when stripping types
810        if self.strip_types && decl.export_kind.is_type() {
811            return;
812        }
813        self.push_keyword(KeywordType::Export, decl.span);
814        walk::walk_export_named_declaration(self, decl);
815    }
816
817    fn visit_export_default_declaration(&mut self, decl: &ExportDefaultDeclaration<'a>) {
818        self.push_keyword(KeywordType::Export, decl.span);
819        self.push_keyword(KeywordType::Default, decl.span);
820        walk::walk_export_default_declaration(self, decl);
821    }
822
823    fn visit_export_all_declaration(&mut self, decl: &ExportAllDeclaration<'a>) {
824        self.push_keyword(KeywordType::Export, decl.span);
825        self.push_keyword(KeywordType::From, decl.span);
826        self.push(
827            TokenKind::StringLiteral(decl.source.value.to_string()),
828            decl.source.span,
829        );
830    }
831
832    // ── TypeScript declarations ────────────────────────────
833
834    fn visit_ts_interface_declaration(&mut self, decl: &TSInterfaceDeclaration<'a>) {
835        if self.strip_types {
836            return; // Skip entire interface when stripping types
837        }
838        self.push_keyword(KeywordType::Interface, decl.span);
839        walk::walk_ts_interface_declaration(self, decl);
840    }
841
842    fn visit_ts_interface_body(&mut self, body: &TSInterfaceBody<'a>) {
843        self.push_punc(PunctuationType::OpenBrace, body.span);
844        walk::walk_ts_interface_body(self, body);
845        self.push_punc(PunctuationType::CloseBrace, body.span);
846    }
847
848    fn visit_ts_type_alias_declaration(&mut self, decl: &TSTypeAliasDeclaration<'a>) {
849        if self.strip_types {
850            return; // Skip entire type alias when stripping types
851        }
852        self.push_keyword(KeywordType::Type, decl.span);
853        walk::walk_ts_type_alias_declaration(self, decl);
854    }
855
856    fn visit_ts_module_declaration(&mut self, decl: &TSModuleDeclaration<'a>) {
857        if self.strip_types && decl.declare {
858            return; // Skip `declare module` / `declare namespace` when stripping types
859        }
860        walk::walk_ts_module_declaration(self, decl);
861    }
862
863    fn visit_ts_enum_declaration(&mut self, decl: &TSEnumDeclaration<'a>) {
864        self.push_keyword(KeywordType::Enum, decl.span);
865        walk::walk_ts_enum_declaration(self, decl);
866    }
867
868    fn visit_ts_enum_body(&mut self, body: &TSEnumBody<'a>) {
869        self.push_punc(PunctuationType::OpenBrace, body.span);
870        walk::walk_ts_enum_body(self, body);
871        self.push_punc(PunctuationType::CloseBrace, body.span);
872    }
873
874    fn visit_ts_property_signature(&mut self, sig: &TSPropertySignature<'a>) {
875        walk::walk_ts_property_signature(self, sig);
876        self.push_punc(PunctuationType::Semicolon, sig.span);
877    }
878
879    fn visit_ts_type_annotation(&mut self, ann: &TSTypeAnnotation<'a>) {
880        if self.strip_types {
881            return; // Skip parameter/return type annotations when stripping types
882        }
883        self.push_punc(PunctuationType::Colon, ann.span);
884        walk::walk_ts_type_annotation(self, ann);
885    }
886
887    fn visit_ts_type_parameter_declaration(&mut self, decl: &TSTypeParameterDeclaration<'a>) {
888        if self.strip_types {
889            return; // Skip generic type parameters when stripping types
890        }
891        walk::walk_ts_type_parameter_declaration(self, decl);
892    }
893
894    fn visit_ts_type_parameter_instantiation(&mut self, inst: &TSTypeParameterInstantiation<'a>) {
895        if self.strip_types {
896            return; // Skip generic type arguments when stripping types
897        }
898        walk::walk_ts_type_parameter_instantiation(self, inst);
899    }
900
901    fn visit_ts_as_expression(&mut self, expr: &TSAsExpression<'a>) {
902        self.visit_expression(&expr.expression);
903        if !self.strip_types {
904            self.push_keyword(KeywordType::As, expr.span);
905            self.visit_ts_type(&expr.type_annotation);
906        }
907    }
908
909    fn visit_ts_satisfies_expression(&mut self, expr: &TSSatisfiesExpression<'a>) {
910        self.visit_expression(&expr.expression);
911        if !self.strip_types {
912            self.push_keyword(KeywordType::Satisfies, expr.span);
913            self.visit_ts_type(&expr.type_annotation);
914        }
915    }
916
917    fn visit_ts_non_null_expression(&mut self, expr: &TSNonNullExpression<'a>) {
918        self.visit_expression(&expr.expression);
919        // The `!` postfix is stripped when stripping types (it's a type assertion)
920    }
921
922    fn visit_identifier_name(&mut self, ident: &IdentifierName<'a>) {
923        self.push(TokenKind::Identifier(ident.name.to_string()), ident.span);
924    }
925
926    fn visit_ts_string_keyword(&mut self, it: &TSStringKeyword) {
927        self.push(TokenKind::Identifier("string".to_string()), it.span);
928    }
929
930    fn visit_ts_number_keyword(&mut self, it: &TSNumberKeyword) {
931        self.push(TokenKind::Identifier("number".to_string()), it.span);
932    }
933
934    fn visit_ts_boolean_keyword(&mut self, it: &TSBooleanKeyword) {
935        self.push(TokenKind::Identifier("boolean".to_string()), it.span);
936    }
937
938    fn visit_ts_any_keyword(&mut self, it: &TSAnyKeyword) {
939        self.push(TokenKind::Identifier("any".to_string()), it.span);
940    }
941
942    fn visit_ts_void_keyword(&mut self, it: &TSVoidKeyword) {
943        self.push(TokenKind::Identifier("void".to_string()), it.span);
944    }
945
946    fn visit_ts_null_keyword(&mut self, it: &TSNullKeyword) {
947        self.push(TokenKind::NullLiteral, it.span);
948    }
949
950    fn visit_ts_undefined_keyword(&mut self, it: &TSUndefinedKeyword) {
951        self.push(TokenKind::Identifier("undefined".to_string()), it.span);
952    }
953
954    fn visit_ts_never_keyword(&mut self, it: &TSNeverKeyword) {
955        self.push(TokenKind::Identifier("never".to_string()), it.span);
956    }
957
958    fn visit_ts_unknown_keyword(&mut self, it: &TSUnknownKeyword) {
959        self.push(TokenKind::Identifier("unknown".to_string()), it.span);
960    }
961
962    // ── JSX ─────────────────────────────────────────────────
963
964    fn visit_jsx_opening_element(&mut self, elem: &JSXOpeningElement<'a>) {
965        self.push_punc(PunctuationType::OpenBracket, elem.span);
966        walk::walk_jsx_opening_element(self, elem);
967        self.push_punc(PunctuationType::CloseBracket, elem.span);
968    }
969
970    fn visit_jsx_closing_element(&mut self, elem: &JSXClosingElement<'a>) {
971        self.push_punc(PunctuationType::OpenBracket, elem.span);
972        walk::walk_jsx_closing_element(self, elem);
973        self.push_punc(PunctuationType::CloseBracket, elem.span);
974    }
975
976    fn visit_jsx_identifier(&mut self, ident: &JSXIdentifier<'a>) {
977        self.push(TokenKind::Identifier(ident.name.to_string()), ident.span);
978    }
979
980    fn visit_jsx_spread_attribute(&mut self, attr: &JSXSpreadAttribute<'a>) {
981        self.push_op(OperatorType::Spread, attr.span);
982        walk::walk_jsx_spread_attribute(self, attr);
983    }
984
985    // ── Misc ────────────────────────────────────────────────
986
987    fn visit_variable_declarator(&mut self, decl: &VariableDeclarator<'a>) {
988        self.visit_binding_pattern(&decl.id);
989        if let Some(init) = &decl.init {
990            self.push_op(OperatorType::Assign, decl.span);
991            self.visit_expression(init);
992        }
993        self.push_punc(PunctuationType::Semicolon, decl.span);
994    }
995
996    fn visit_expression_statement(&mut self, stmt: &ExpressionStatement<'a>) {
997        walk::walk_expression_statement(self, stmt);
998        self.push_punc(PunctuationType::Semicolon, stmt.span);
999    }
1000}
1001
1002#[cfg(test)]
1003mod tests {
1004    use super::*;
1005    use std::path::PathBuf;
1006
1007    fn tokenize(code: &str) -> Vec<SourceToken> {
1008        let path = PathBuf::from("test.ts");
1009        tokenize_file(&path, code).tokens
1010    }
1011
1012    #[test]
1013    fn tokenize_variable_declaration() {
1014        let tokens = tokenize("const x = 42;");
1015        assert!(!tokens.is_empty());
1016        // Should have: const, x (identifier), = (assign), 42 (numeric), ;
1017        assert!(matches!(
1018            tokens[0].kind,
1019            TokenKind::Keyword(KeywordType::Const)
1020        ));
1021    }
1022
1023    #[test]
1024    fn tokenize_function_declaration() {
1025        let tokens = tokenize("function foo() { return 1; }");
1026        assert!(!tokens.is_empty());
1027        assert!(matches!(
1028            tokens[0].kind,
1029            TokenKind::Keyword(KeywordType::Function)
1030        ));
1031    }
1032
1033    #[test]
1034    fn tokenize_arrow_function() {
1035        let tokens = tokenize("const f = (a, b) => a + b;");
1036        assert!(!tokens.is_empty());
1037        let has_arrow = tokens
1038            .iter()
1039            .any(|t| matches!(t.kind, TokenKind::Operator(OperatorType::Arrow)));
1040        assert!(has_arrow, "Should contain arrow operator");
1041    }
1042
1043    #[test]
1044    fn tokenize_if_else() {
1045        let tokens = tokenize("if (x) { y; } else { z; }");
1046        assert!(!tokens.is_empty());
1047        assert!(matches!(
1048            tokens[0].kind,
1049            TokenKind::Keyword(KeywordType::If)
1050        ));
1051        let has_else = tokens
1052            .iter()
1053            .any(|t| matches!(t.kind, TokenKind::Keyword(KeywordType::Else)));
1054        assert!(has_else, "Should contain else keyword");
1055    }
1056
1057    #[test]
1058    fn tokenize_class() {
1059        let tokens = tokenize("class Foo extends Bar { }");
1060        assert!(!tokens.is_empty());
1061        assert!(matches!(
1062            tokens[0].kind,
1063            TokenKind::Keyword(KeywordType::Class)
1064        ));
1065        let has_extends = tokens
1066            .iter()
1067            .any(|t| matches!(t.kind, TokenKind::Keyword(KeywordType::Extends)));
1068        assert!(has_extends, "Should contain extends keyword");
1069    }
1070
1071    #[test]
1072    fn tokenize_string_literal() {
1073        let tokens = tokenize("const s = \"hello\";");
1074        let has_string = tokens
1075            .iter()
1076            .any(|t| matches!(&t.kind, TokenKind::StringLiteral(s) if s == "hello"));
1077        assert!(has_string, "Should contain string literal");
1078    }
1079
1080    #[test]
1081    fn tokenize_boolean_literal() {
1082        let tokens = tokenize("const b = true;");
1083        let has_bool = tokens
1084            .iter()
1085            .any(|t| matches!(t.kind, TokenKind::BooleanLiteral(true)));
1086        assert!(has_bool, "Should contain boolean literal");
1087    }
1088
1089    #[test]
1090    fn tokenize_null_literal() {
1091        let tokens = tokenize("const n = null;");
1092        let has_null = tokens
1093            .iter()
1094            .any(|t| matches!(t.kind, TokenKind::NullLiteral));
1095        assert!(has_null, "Should contain null literal");
1096    }
1097
1098    #[test]
1099    fn tokenize_empty_file() {
1100        let tokens = tokenize("");
1101        assert!(tokens.is_empty());
1102    }
1103
1104    #[test]
1105    fn tokenize_ts_interface() {
1106        let tokens = tokenize("interface Foo { bar: string; baz: number; }");
1107        let has_interface = tokens
1108            .iter()
1109            .any(|t| matches!(t.kind, TokenKind::Keyword(KeywordType::Interface)));
1110        assert!(has_interface, "Should contain interface keyword");
1111        let has_bar = tokens
1112            .iter()
1113            .any(|t| matches!(&t.kind, TokenKind::Identifier(name) if name == "bar"));
1114        assert!(has_bar, "Should contain property name 'bar'");
1115        let has_string = tokens
1116            .iter()
1117            .any(|t| matches!(&t.kind, TokenKind::Identifier(name) if name == "string"));
1118        assert!(has_string, "Should contain type 'string'");
1119        // Should have enough tokens for clone detection
1120        assert!(
1121            tokens.len() >= 10,
1122            "Interface should produce sufficient tokens, got {}",
1123            tokens.len()
1124        );
1125    }
1126
1127    #[test]
1128    fn tokenize_ts_type_alias() {
1129        let tokens = tokenize("type Result = { ok: boolean; error: string; }");
1130        let has_type = tokens
1131            .iter()
1132            .any(|t| matches!(t.kind, TokenKind::Keyword(KeywordType::Type)));
1133        assert!(has_type, "Should contain type keyword");
1134    }
1135
1136    #[test]
1137    fn tokenize_ts_enum() {
1138        let tokens = tokenize("enum Color { Red, Green, Blue }");
1139        let has_enum = tokens
1140            .iter()
1141            .any(|t| matches!(t.kind, TokenKind::Keyword(KeywordType::Enum)));
1142        assert!(has_enum, "Should contain enum keyword");
1143        let has_red = tokens
1144            .iter()
1145            .any(|t| matches!(&t.kind, TokenKind::Identifier(name) if name == "Red"));
1146        assert!(has_red, "Should contain enum member 'Red'");
1147    }
1148
1149    fn tokenize_tsx(code: &str) -> Vec<SourceToken> {
1150        let path = PathBuf::from("test.tsx");
1151        tokenize_file(&path, code).tokens
1152    }
1153
1154    fn tokenize_cross_language(code: &str) -> Vec<SourceToken> {
1155        let path = PathBuf::from("test.ts");
1156        tokenize_file_cross_language(&path, code, true).tokens
1157    }
1158
1159    #[test]
1160    fn tokenize_jsx_element() {
1161        let tokens =
1162            tokenize_tsx("const x = <div className=\"foo\"><Button onClick={handler} /></div>;");
1163        let has_div = tokens
1164            .iter()
1165            .any(|t| matches!(&t.kind, TokenKind::Identifier(name) if name == "div"));
1166        assert!(has_div, "Should contain JSX element name 'div'");
1167        let has_classname = tokens
1168            .iter()
1169            .any(|t| matches!(&t.kind, TokenKind::Identifier(name) if name == "className"));
1170        assert!(has_classname, "Should contain JSX attribute 'className'");
1171        let brackets = tokens
1172            .iter()
1173            .filter(|t| {
1174                matches!(
1175                    t.kind,
1176                    TokenKind::Punctuation(PunctuationType::OpenBracket)
1177                        | TokenKind::Punctuation(PunctuationType::CloseBracket)
1178                )
1179            })
1180            .count();
1181        assert!(
1182            brackets >= 4,
1183            "Should contain JSX angle brackets, got {brackets}"
1184        );
1185    }
1186
1187    // ── Cross-language type stripping tests ──────────────────────
1188
1189    #[test]
1190    fn strip_types_removes_parameter_type_annotations() {
1191        let ts_tokens = tokenize("function foo(x: string) { return x; }");
1192        let stripped = tokenize_cross_language("function foo(x: string) { return x; }");
1193
1194        // The stripped version should have fewer tokens (no `: string`)
1195        assert!(
1196            stripped.len() < ts_tokens.len(),
1197            "Stripped tokens ({}) should be fewer than full tokens ({})",
1198            stripped.len(),
1199            ts_tokens.len()
1200        );
1201
1202        // Should NOT contain type-annotation colon or the type name
1203        let has_colon_before_string = ts_tokens.windows(2).any(|w| {
1204            matches!(w[0].kind, TokenKind::Punctuation(PunctuationType::Colon))
1205                && matches!(&w[1].kind, TokenKind::Identifier(n) if n == "string")
1206        });
1207        assert!(has_colon_before_string, "Original should have `: string`");
1208
1209        // Stripped version should match JS version
1210        let js_tokens = {
1211            let path = PathBuf::from("test.js");
1212            tokenize_file(&path, "function foo(x) { return x; }").tokens
1213        };
1214        assert_eq!(
1215            stripped.len(),
1216            js_tokens.len(),
1217            "Stripped TS should produce same token count as JS"
1218        );
1219    }
1220
1221    #[test]
1222    fn strip_types_removes_return_type_annotations() {
1223        let stripped = tokenize_cross_language("function foo(): string { return 'hello'; }");
1224        // Should NOT contain the return type annotation
1225        let has_string_type = stripped.iter().enumerate().any(|(i, t)| {
1226            matches!(&t.kind, TokenKind::Identifier(n) if n == "string")
1227                && i > 0
1228                && matches!(
1229                    stripped[i - 1].kind,
1230                    TokenKind::Punctuation(PunctuationType::Colon)
1231                )
1232        });
1233        assert!(
1234            !has_string_type,
1235            "Stripped version should not have return type annotation"
1236        );
1237    }
1238
1239    #[test]
1240    fn strip_types_removes_interface_declarations() {
1241        let stripped = tokenize_cross_language("interface Foo { bar: string; }\nconst x = 42;");
1242        // Should NOT contain interface keyword
1243        let has_interface = stripped
1244            .iter()
1245            .any(|t| matches!(t.kind, TokenKind::Keyword(KeywordType::Interface)));
1246        assert!(
1247            !has_interface,
1248            "Stripped version should not contain interface declaration"
1249        );
1250        // Should still contain the const declaration
1251        let has_const = stripped
1252            .iter()
1253            .any(|t| matches!(t.kind, TokenKind::Keyword(KeywordType::Const)));
1254        assert!(has_const, "Should still contain const keyword");
1255    }
1256
1257    #[test]
1258    fn strip_types_removes_type_alias_declarations() {
1259        let stripped = tokenize_cross_language("type Result = string | number;\nconst x = 42;");
1260        let has_type = stripped
1261            .iter()
1262            .any(|t| matches!(t.kind, TokenKind::Keyword(KeywordType::Type)));
1263        assert!(!has_type, "Stripped version should not contain type alias");
1264        let has_const = stripped
1265            .iter()
1266            .any(|t| matches!(t.kind, TokenKind::Keyword(KeywordType::Const)));
1267        assert!(has_const, "Should still contain const keyword");
1268    }
1269
1270    #[test]
1271    fn strip_types_preserves_runtime_code() {
1272        let stripped =
1273            tokenize_cross_language("const x: number = 42;\nif (x > 0) { console.log(x); }");
1274        // Should have const, x, =, 42, if, x, >, 0, console, log, x, etc.
1275        let has_const = stripped
1276            .iter()
1277            .any(|t| matches!(t.kind, TokenKind::Keyword(KeywordType::Const)));
1278        let has_if = stripped
1279            .iter()
1280            .any(|t| matches!(t.kind, TokenKind::Keyword(KeywordType::If)));
1281        let has_42 = stripped
1282            .iter()
1283            .any(|t| matches!(&t.kind, TokenKind::NumericLiteral(n) if n == "42"));
1284        assert!(has_const, "Should preserve const");
1285        assert!(has_if, "Should preserve if");
1286        assert!(has_42, "Should preserve numeric literal");
1287    }
1288
1289    #[test]
1290    fn strip_types_preserves_enums() {
1291        // Enums have runtime semantics, so they should NOT be stripped
1292        let stripped = tokenize_cross_language("enum Color { Red, Green, Blue }");
1293        let has_enum = stripped
1294            .iter()
1295            .any(|t| matches!(t.kind, TokenKind::Keyword(KeywordType::Enum)));
1296        assert!(
1297            has_enum,
1298            "Enums should be preserved (they have runtime semantics)"
1299        );
1300    }
1301
1302    #[test]
1303    fn strip_types_removes_import_type() {
1304        let stripped = tokenize_cross_language("import type { Foo } from './foo';\nconst x = 42;");
1305        // Should NOT contain import keyword from the type-only import
1306        let import_count = stripped
1307            .iter()
1308            .filter(|t| matches!(t.kind, TokenKind::Keyword(KeywordType::Import)))
1309            .count();
1310        assert_eq!(import_count, 0, "import type should be stripped");
1311        // Should still contain the const declaration
1312        let has_const = stripped
1313            .iter()
1314            .any(|t| matches!(t.kind, TokenKind::Keyword(KeywordType::Const)));
1315        assert!(has_const, "Runtime code should be preserved");
1316    }
1317
1318    #[test]
1319    fn strip_types_preserves_value_imports() {
1320        let stripped = tokenize_cross_language("import { foo } from './foo';\nconst x = foo();");
1321        let has_import = stripped
1322            .iter()
1323            .any(|t| matches!(t.kind, TokenKind::Keyword(KeywordType::Import)));
1324        assert!(has_import, "Value imports should be preserved");
1325    }
1326
1327    #[test]
1328    fn strip_types_removes_export_type() {
1329        let stripped = tokenize_cross_language("export type { Foo };\nconst x = 42;");
1330        // The export type should be stripped
1331        let export_count = stripped
1332            .iter()
1333            .filter(|t| matches!(t.kind, TokenKind::Keyword(KeywordType::Export)))
1334            .count();
1335        assert_eq!(export_count, 0, "export type should be stripped");
1336    }
1337
1338    #[test]
1339    fn strip_types_removes_declare_module() {
1340        let stripped = tokenize_cross_language(
1341            "declare module 'foo' { export function bar(): void; }\nconst x = 42;",
1342        );
1343        // Should not contain function keyword from the declare block
1344        let has_function_keyword = stripped
1345            .iter()
1346            .any(|t| matches!(t.kind, TokenKind::Keyword(KeywordType::Function)));
1347        assert!(
1348            !has_function_keyword,
1349            "declare module contents should be stripped"
1350        );
1351        let has_const = stripped
1352            .iter()
1353            .any(|t| matches!(t.kind, TokenKind::Keyword(KeywordType::Const)));
1354        assert!(has_const, "Runtime code should be preserved");
1355    }
1356
1357    // ── File type dispatch tests ─────────────────────────────────
1358
1359    #[test]
1360    fn tokenize_vue_sfc_extracts_script_block() {
1361        let vue_source = r#"<template><div>Hello</div></template>
1362<script lang="ts">
1363import { ref } from 'vue';
1364const count = ref(0);
1365</script>"#;
1366        let path = PathBuf::from("Component.vue");
1367        let result = tokenize_file(&path, vue_source);
1368        assert!(!result.tokens.is_empty(), "Vue SFC should produce tokens");
1369        let has_import = result
1370            .tokens
1371            .iter()
1372            .any(|t| matches!(t.kind, TokenKind::Keyword(KeywordType::Import)));
1373        assert!(has_import, "Should tokenize import in <script> block");
1374        let has_const = result
1375            .tokens
1376            .iter()
1377            .any(|t| matches!(t.kind, TokenKind::Keyword(KeywordType::Const)));
1378        assert!(has_const, "Should tokenize const in <script> block");
1379    }
1380
1381    #[test]
1382    fn tokenize_svelte_sfc_extracts_script_block() {
1383        let svelte_source = r"<script>
1384let count = 0;
1385function increment() { count += 1; }
1386</script>
1387<button on:click={increment}>{count}</button>";
1388        let path = PathBuf::from("Component.svelte");
1389        let result = tokenize_file(&path, svelte_source);
1390        assert!(
1391            !result.tokens.is_empty(),
1392            "Svelte SFC should produce tokens"
1393        );
1394        let has_let = result
1395            .tokens
1396            .iter()
1397            .any(|t| matches!(t.kind, TokenKind::Keyword(KeywordType::Let)));
1398        assert!(has_let, "Should tokenize let in <script> block");
1399        let has_function = result
1400            .tokens
1401            .iter()
1402            .any(|t| matches!(t.kind, TokenKind::Keyword(KeywordType::Function)));
1403        assert!(has_function, "Should tokenize function in <script> block");
1404    }
1405
1406    #[test]
1407    fn tokenize_vue_sfc_adjusts_span_offsets() {
1408        let vue_source = "<template><div/></template>\n<script>\nconst x = 1;\n</script>";
1409        let path = PathBuf::from("Test.vue");
1410        let result = tokenize_file(&path, vue_source);
1411        // The script body starts after "<template><div/></template>\n<script>\n"
1412        let script_body_offset = vue_source.find("const x").unwrap() as u32;
1413        // All token spans should reference positions in the full SFC source,
1414        // not positions within the extracted script body.
1415        for token in &result.tokens {
1416            assert!(
1417                token.span.start >= script_body_offset,
1418                "Token span start ({}) should be >= script body offset ({})",
1419                token.span.start,
1420                script_body_offset
1421            );
1422            // Verify span text is recoverable from the full source
1423            let text = &vue_source[token.span.start as usize..token.span.end as usize];
1424            assert!(
1425                !text.is_empty(),
1426                "Token span should recover non-empty text from full SFC source"
1427            );
1428        }
1429    }
1430
1431    #[test]
1432    fn tokenize_astro_extracts_frontmatter() {
1433        let astro_source = "---\nimport { Layout } from '../layouts/Layout.astro';\nconst title = 'Home';\n---\n<Layout title={title}><h1>Hello</h1></Layout>";
1434        let path = PathBuf::from("page.astro");
1435        let result = tokenize_file(&path, astro_source);
1436        assert!(
1437            !result.tokens.is_empty(),
1438            "Astro frontmatter should produce tokens"
1439        );
1440        let has_import = result
1441            .tokens
1442            .iter()
1443            .any(|t| matches!(t.kind, TokenKind::Keyword(KeywordType::Import)));
1444        assert!(has_import, "Should tokenize import in frontmatter");
1445    }
1446
1447    #[test]
1448    fn tokenize_astro_without_frontmatter_returns_empty() {
1449        let astro_source = "<html><body>Hello</body></html>";
1450        let path = PathBuf::from("page.astro");
1451        let result = tokenize_file(&path, astro_source);
1452        assert!(
1453            result.tokens.is_empty(),
1454            "Astro without frontmatter should produce no tokens"
1455        );
1456    }
1457
1458    #[test]
1459    fn tokenize_astro_adjusts_span_offsets() {
1460        let astro_source = "---\nconst x = 1;\n---\n<div/>";
1461        let path = PathBuf::from("page.astro");
1462        let result = tokenize_file(&path, astro_source);
1463        assert!(!result.tokens.is_empty());
1464        // "---\n" is 4 bytes — spans should be offset from there
1465        for token in &result.tokens {
1466            assert!(
1467                token.span.start >= 4,
1468                "Token span start ({}) should be offset into the full astro source",
1469                token.span.start
1470            );
1471        }
1472    }
1473
1474    #[test]
1475    fn tokenize_mdx_extracts_imports_and_exports() {
1476        let mdx_source = "import { Button } from './Button';\nexport const meta = { title: 'Hello' };\n\n# Hello World\n\n<Button>Click me</Button>";
1477        let path = PathBuf::from("page.mdx");
1478        let result = tokenize_file(&path, mdx_source);
1479        assert!(
1480            !result.tokens.is_empty(),
1481            "MDX should produce tokens from imports/exports"
1482        );
1483        let has_import = result
1484            .tokens
1485            .iter()
1486            .any(|t| matches!(t.kind, TokenKind::Keyword(KeywordType::Import)));
1487        assert!(has_import, "Should tokenize import in MDX");
1488        let has_export = result
1489            .tokens
1490            .iter()
1491            .any(|t| matches!(t.kind, TokenKind::Keyword(KeywordType::Export)));
1492        assert!(has_export, "Should tokenize export in MDX");
1493    }
1494
1495    #[test]
1496    fn tokenize_mdx_without_statements_returns_empty() {
1497        let mdx_source = "# Just Markdown\n\nNo imports or exports here.";
1498        let path = PathBuf::from("page.mdx");
1499        let result = tokenize_file(&path, mdx_source);
1500        assert!(
1501            result.tokens.is_empty(),
1502            "MDX without imports/exports should produce no tokens"
1503        );
1504    }
1505
1506    #[test]
1507    fn tokenize_css_returns_empty() {
1508        let css_source = ".foo { color: red; }\n.bar { font-size: 16px; }";
1509        let path = PathBuf::from("styles.css");
1510        let result = tokenize_file(&path, css_source);
1511        assert!(
1512            result.tokens.is_empty(),
1513            "CSS files should produce no tokens"
1514        );
1515        assert!(result.line_count >= 1);
1516    }
1517
1518    #[test]
1519    fn tokenize_scss_returns_empty() {
1520        let scss_source = "$color: red;\n.foo { color: $color; }";
1521        let path = PathBuf::from("styles.scss");
1522        let result = tokenize_file(&path, scss_source);
1523        assert!(
1524            result.tokens.is_empty(),
1525            "SCSS files should produce no tokens"
1526        );
1527    }
1528
1529    // ── Line count and FileTokens metadata ──────────────────────
1530
1531    #[test]
1532    fn file_tokens_line_count_matches_source() {
1533        let source = "const x = 1;\nconst y = 2;\nconst z = 3;";
1534        let path = PathBuf::from("test.ts");
1535        let result = tokenize_file(&path, source);
1536        assert_eq!(result.line_count, 3);
1537        assert_eq!(result.source, source);
1538    }
1539
1540    #[test]
1541    fn file_tokens_line_count_minimum_is_one() {
1542        let path = PathBuf::from("test.ts");
1543        let result = tokenize_file(&path, "");
1544        assert_eq!(result.line_count, 1, "Empty file should have line_count 1");
1545    }
1546
1547    // ── JSX fallback retry path ─────────────────────────────────
1548
1549    #[test]
1550    fn js_file_with_jsx_retries_as_jsx() {
1551        // A .js file containing JSX should trigger the fallback retry with JSX source type.
1552        // The initial parse as plain JS will fail on JSX, producing few tokens.
1553        // The retry as JSX should succeed and produce more tokens.
1554        let jsx_code = r#"
1555function App() {
1556    return (
1557        <div className="app">
1558            <h1>Hello World</h1>
1559            <p>Welcome to the app</p>
1560        </div>
1561    );
1562}
1563"#;
1564        let path = PathBuf::from("app.js");
1565        let result = tokenize_file(&path, jsx_code);
1566        // If the retry works, we should see JSX angle brackets
1567        let has_brackets = result
1568            .tokens
1569            .iter()
1570            .any(|t| matches!(t.kind, TokenKind::Punctuation(PunctuationType::OpenBracket)));
1571        assert!(
1572            has_brackets,
1573            "JSX fallback retry should produce JSX tokens from .js file"
1574        );
1575    }
1576
1577    // ── Statement tokenization ──────────────────────────────────
1578
1579    #[test]
1580    fn tokenize_for_in_statement() {
1581        let tokens = tokenize("for (const key in obj) { console.log(key); }");
1582        assert!(matches!(
1583            tokens[0].kind,
1584            TokenKind::Keyword(KeywordType::For)
1585        ));
1586        let has_in = tokens
1587            .iter()
1588            .any(|t| matches!(t.kind, TokenKind::Keyword(KeywordType::In)));
1589        assert!(has_in, "Should contain 'in' keyword");
1590    }
1591
1592    #[test]
1593    fn tokenize_for_of_statement() {
1594        let tokens = tokenize("for (const item of items) { process(item); }");
1595        assert!(matches!(
1596            tokens[0].kind,
1597            TokenKind::Keyword(KeywordType::For)
1598        ));
1599        let has_of = tokens
1600            .iter()
1601            .any(|t| matches!(t.kind, TokenKind::Keyword(KeywordType::Of)));
1602        assert!(has_of, "Should contain 'of' keyword");
1603    }
1604
1605    #[test]
1606    fn tokenize_while_statement() {
1607        let tokens = tokenize("while (x > 0) { x--; }");
1608        assert!(matches!(
1609            tokens[0].kind,
1610            TokenKind::Keyword(KeywordType::While)
1611        ));
1612        let has_gt = tokens
1613            .iter()
1614            .any(|t| matches!(t.kind, TokenKind::Operator(OperatorType::Gt)));
1615        assert!(has_gt, "Should contain greater-than operator");
1616    }
1617
1618    #[test]
1619    fn tokenize_do_while_statement() {
1620        let tokens = tokenize("do { x++; } while (x < 10);");
1621        assert!(matches!(
1622            tokens[0].kind,
1623            TokenKind::Keyword(KeywordType::Do)
1624        ));
1625        // The visitor only emits `Do` -- the `while` part is implicit in the AST walk.
1626        // Verify the body and condition are tokenized:
1627        let has_increment = tokens
1628            .iter()
1629            .any(|t| matches!(t.kind, TokenKind::Operator(OperatorType::Increment)));
1630        assert!(has_increment, "do-while body should contain increment");
1631        let has_lt = tokens
1632            .iter()
1633            .any(|t| matches!(t.kind, TokenKind::Operator(OperatorType::Lt)));
1634        assert!(has_lt, "do-while condition should contain < operator");
1635    }
1636
1637    #[test]
1638    fn tokenize_switch_case_default() {
1639        let tokens = tokenize("switch (x) { case 1: break; case 2: break; default: return; }");
1640        assert!(matches!(
1641            tokens[0].kind,
1642            TokenKind::Keyword(KeywordType::Switch)
1643        ));
1644        let case_count = tokens
1645            .iter()
1646            .filter(|t| matches!(t.kind, TokenKind::Keyword(KeywordType::Case)))
1647            .count();
1648        assert_eq!(case_count, 2, "Should have two case keywords");
1649        let has_default = tokens
1650            .iter()
1651            .any(|t| matches!(t.kind, TokenKind::Keyword(KeywordType::Default)));
1652        assert!(has_default, "Should have default keyword");
1653        let has_break = tokens
1654            .iter()
1655            .any(|t| matches!(t.kind, TokenKind::Keyword(KeywordType::Break)));
1656        assert!(has_break, "Should have break keyword");
1657        // Colons after case/default
1658        let colon_count = tokens
1659            .iter()
1660            .filter(|t| matches!(t.kind, TokenKind::Punctuation(PunctuationType::Colon)))
1661            .count();
1662        assert!(
1663            colon_count >= 3,
1664            "Should have at least 3 colons (case, case, default), got {colon_count}"
1665        );
1666    }
1667
1668    #[test]
1669    fn tokenize_continue_statement() {
1670        let tokens = tokenize("for (let i = 0; i < 10; i++) { if (i === 5) continue; }");
1671        let has_continue = tokens
1672            .iter()
1673            .any(|t| matches!(t.kind, TokenKind::Keyword(KeywordType::Continue)));
1674        assert!(has_continue, "Should contain continue keyword");
1675    }
1676
1677    #[test]
1678    fn tokenize_try_catch_finally() {
1679        let tokens = tokenize("try { foo(); } catch (e) { bar(); } finally { baz(); }");
1680        let has_try = tokens
1681            .iter()
1682            .any(|t| matches!(t.kind, TokenKind::Keyword(KeywordType::Try)));
1683        let has_catch = tokens
1684            .iter()
1685            .any(|t| matches!(t.kind, TokenKind::Keyword(KeywordType::Catch)));
1686        let has_finally = tokens
1687            .iter()
1688            .any(|t| matches!(t.kind, TokenKind::Keyword(KeywordType::Finally)));
1689        assert!(has_try, "Should contain try keyword");
1690        assert!(has_catch, "Should contain catch keyword");
1691        // No visit_finally_clause override — finally keyword is not emitted as a token.
1692        // The finally block's braces and contents are still visited via walk.
1693        assert!(
1694            !has_finally,
1695            "Finally keyword is not emitted (no visitor override)"
1696        );
1697    }
1698
1699    #[test]
1700    fn tokenize_throw_statement() {
1701        let tokens = tokenize("throw new Error('fail');");
1702        assert!(matches!(
1703            tokens[0].kind,
1704            TokenKind::Keyword(KeywordType::Throw)
1705        ));
1706        let has_new = tokens
1707            .iter()
1708            .any(|t| matches!(t.kind, TokenKind::Keyword(KeywordType::New)));
1709        assert!(has_new, "Should contain new keyword");
1710    }
1711
1712    // ── Expression tokenization ─────────────────────────────────
1713
1714    #[test]
1715    fn tokenize_this_expression() {
1716        let tokens = tokenize("const x = this.foo;");
1717        let has_this = tokens
1718            .iter()
1719            .any(|t| matches!(t.kind, TokenKind::Keyword(KeywordType::This)));
1720        assert!(has_this, "Should contain this keyword");
1721    }
1722
1723    #[test]
1724    fn tokenize_super_expression() {
1725        let tokens = tokenize("class Child extends Parent { constructor() { super(); } }");
1726        let has_super = tokens
1727            .iter()
1728            .any(|t| matches!(t.kind, TokenKind::Keyword(KeywordType::Super)));
1729        assert!(has_super, "Should contain super keyword");
1730    }
1731
1732    #[test]
1733    fn tokenize_array_expression() {
1734        let tokens = tokenize("const arr = [1, 2, 3];");
1735        let open_bracket = tokens
1736            .iter()
1737            .any(|t| matches!(t.kind, TokenKind::Punctuation(PunctuationType::OpenBracket)));
1738        let close_bracket = tokens.iter().any(|t| {
1739            matches!(
1740                t.kind,
1741                TokenKind::Punctuation(PunctuationType::CloseBracket)
1742            )
1743        });
1744        assert!(open_bracket, "Should contain open bracket");
1745        assert!(close_bracket, "Should contain close bracket");
1746    }
1747
1748    #[test]
1749    fn tokenize_object_expression() {
1750        let tokens = tokenize("const obj = { a: 1, b: 2 };");
1751        let open_brace = tokens
1752            .iter()
1753            .filter(|t| matches!(t.kind, TokenKind::Punctuation(PunctuationType::OpenBrace)))
1754            .count();
1755        let close_brace = tokens
1756            .iter()
1757            .filter(|t| matches!(t.kind, TokenKind::Punctuation(PunctuationType::CloseBrace)))
1758            .count();
1759        assert!(open_brace >= 1, "Should have open brace for object");
1760        assert!(close_brace >= 1, "Should have close brace for object");
1761    }
1762
1763    #[test]
1764    fn tokenize_computed_member_expression() {
1765        let tokens = tokenize("const x = obj[key];");
1766        // Should have open and close brackets around the computed member
1767        let open_bracket = tokens
1768            .iter()
1769            .any(|t| matches!(t.kind, TokenKind::Punctuation(PunctuationType::OpenBracket)));
1770        let close_bracket = tokens.iter().any(|t| {
1771            matches!(
1772                t.kind,
1773                TokenKind::Punctuation(PunctuationType::CloseBracket)
1774            )
1775        });
1776        assert!(
1777            open_bracket,
1778            "Should contain open bracket for computed member"
1779        );
1780        assert!(
1781            close_bracket,
1782            "Should contain close bracket for computed member"
1783        );
1784    }
1785
1786    #[test]
1787    fn tokenize_static_member_expression() {
1788        let tokens = tokenize("const x = obj.prop;");
1789        let has_dot = tokens
1790            .iter()
1791            .any(|t| matches!(t.kind, TokenKind::Punctuation(PunctuationType::Dot)));
1792        let has_prop = tokens
1793            .iter()
1794            .any(|t| matches!(&t.kind, TokenKind::Identifier(n) if n == "prop"));
1795        assert!(has_dot, "Should contain dot for member access");
1796        assert!(has_prop, "Should contain property name 'prop'");
1797    }
1798
1799    #[test]
1800    fn tokenize_new_expression() {
1801        let tokens = tokenize("const d = new Date(2024, 1, 1);");
1802        let has_new = tokens
1803            .iter()
1804            .any(|t| matches!(t.kind, TokenKind::Keyword(KeywordType::New)));
1805        assert!(has_new, "Should contain new keyword");
1806        let has_date = tokens
1807            .iter()
1808            .any(|t| matches!(&t.kind, TokenKind::Identifier(n) if n == "Date"));
1809        assert!(has_date, "Should contain identifier 'Date'");
1810    }
1811
1812    #[test]
1813    fn tokenize_template_literal() {
1814        let tokens = tokenize("const s = `hello ${name}`;");
1815        let has_template = tokens
1816            .iter()
1817            .any(|t| matches!(t.kind, TokenKind::TemplateLiteral));
1818        assert!(has_template, "Should contain template literal token");
1819    }
1820
1821    #[test]
1822    fn tokenize_regex_literal() {
1823        let tokens = tokenize("const re = /foo[a-z]+/gi;");
1824        let has_regex = tokens
1825            .iter()
1826            .any(|t| matches!(t.kind, TokenKind::RegExpLiteral));
1827        assert!(has_regex, "Should contain regex literal token");
1828    }
1829
1830    #[test]
1831    fn tokenize_conditional_ternary_expression() {
1832        let tokens = tokenize("const x = a ? b : c;");
1833        let has_ternary = tokens
1834            .iter()
1835            .any(|t| matches!(t.kind, TokenKind::Operator(OperatorType::Ternary)));
1836        let has_colon = tokens
1837            .iter()
1838            .any(|t| matches!(t.kind, TokenKind::Punctuation(PunctuationType::Colon)));
1839        assert!(has_ternary, "Should contain ternary operator");
1840        assert!(has_colon, "Should contain colon for ternary");
1841    }
1842
1843    #[test]
1844    fn tokenize_sequence_expression() {
1845        let tokens = tokenize("for (let i = 0, j = 10; i < j; i++, j--) {}");
1846        let comma_count = tokens
1847            .iter()
1848            .filter(|t| matches!(t.kind, TokenKind::Operator(OperatorType::Comma)))
1849            .count();
1850        assert!(
1851            comma_count >= 1,
1852            "Sequence expression should produce comma operators"
1853        );
1854    }
1855
1856    #[test]
1857    fn tokenize_spread_element() {
1858        let tokens = tokenize("const arr = [...other, 1, 2];");
1859        let has_spread = tokens
1860            .iter()
1861            .any(|t| matches!(t.kind, TokenKind::Operator(OperatorType::Spread)));
1862        assert!(has_spread, "Should contain spread operator");
1863    }
1864
1865    #[test]
1866    fn tokenize_yield_expression() {
1867        let tokens = tokenize("function* gen() { yield 42; }");
1868        let has_yield = tokens
1869            .iter()
1870            .any(|t| matches!(t.kind, TokenKind::Keyword(KeywordType::Yield)));
1871        assert!(has_yield, "Should contain yield keyword");
1872    }
1873
1874    #[test]
1875    fn tokenize_await_expression() {
1876        let tokens = tokenize("async function run() { const x = await fetch(); }");
1877        let has_async = tokens
1878            .iter()
1879            .any(|t| matches!(t.kind, TokenKind::Keyword(KeywordType::Async)));
1880        let has_await = tokens
1881            .iter()
1882            .any(|t| matches!(t.kind, TokenKind::Keyword(KeywordType::Await)));
1883        assert!(has_async, "Should contain async keyword");
1884        assert!(has_await, "Should contain await keyword");
1885    }
1886
1887    #[test]
1888    fn tokenize_async_arrow_function() {
1889        let tokens = tokenize("const f = async () => { await fetch(); };");
1890        let has_async = tokens
1891            .iter()
1892            .any(|t| matches!(t.kind, TokenKind::Keyword(KeywordType::Async)));
1893        let has_arrow = tokens
1894            .iter()
1895            .any(|t| matches!(t.kind, TokenKind::Operator(OperatorType::Arrow)));
1896        assert!(has_async, "Should contain async keyword before arrow");
1897        assert!(has_arrow, "Should contain arrow operator");
1898    }
1899
1900    // ── Operator coverage ───────────────────────────────────────
1901
1902    #[test]
1903    fn tokenize_all_binary_operators() {
1904        let code = r"
1905const a = 1 + 2;
1906const b = 3 - 4;
1907const c = 5 * 6;
1908const d = 7 / 8;
1909const e = 9 % 10;
1910const f = 2 ** 3;
1911const g = a == b;
1912const h = a != b;
1913const i = a === b;
1914const j = a !== b;
1915const k = a < b;
1916const l = a > b;
1917const m = a <= b;
1918const n = a >= b;
1919const o = a & b;
1920const p = a | b;
1921const q = a ^ b;
1922const r = a << b;
1923const s = a >> b;
1924const t = a >>> b;
1925const u = a instanceof Object;
1926const v = 'key' in obj;
1927";
1928        let tokens = tokenize(code);
1929        let ops: Vec<&OperatorType> = tokens
1930            .iter()
1931            .filter_map(|t| match &t.kind {
1932                TokenKind::Operator(op) => Some(op),
1933                _ => None,
1934            })
1935            .collect();
1936        assert!(ops.contains(&&OperatorType::Add));
1937        assert!(ops.contains(&&OperatorType::Sub));
1938        assert!(ops.contains(&&OperatorType::Mul));
1939        assert!(ops.contains(&&OperatorType::Div));
1940        assert!(ops.contains(&&OperatorType::Mod));
1941        assert!(ops.contains(&&OperatorType::Exp));
1942        assert!(ops.contains(&&OperatorType::Eq));
1943        assert!(ops.contains(&&OperatorType::NEq));
1944        assert!(ops.contains(&&OperatorType::StrictEq));
1945        assert!(ops.contains(&&OperatorType::StrictNEq));
1946        assert!(ops.contains(&&OperatorType::Lt));
1947        assert!(ops.contains(&&OperatorType::Gt));
1948        assert!(ops.contains(&&OperatorType::LtEq));
1949        assert!(ops.contains(&&OperatorType::GtEq));
1950        assert!(ops.contains(&&OperatorType::BitwiseAnd));
1951        assert!(ops.contains(&&OperatorType::BitwiseOr));
1952        assert!(ops.contains(&&OperatorType::BitwiseXor));
1953        assert!(ops.contains(&&OperatorType::ShiftLeft));
1954        assert!(ops.contains(&&OperatorType::ShiftRight));
1955        assert!(ops.contains(&&OperatorType::UnsignedShiftRight));
1956        assert!(ops.contains(&&OperatorType::Instanceof));
1957        assert!(ops.contains(&&OperatorType::In));
1958    }
1959
1960    #[test]
1961    fn tokenize_logical_operators() {
1962        let tokens = tokenize("const x = a && b || c ?? d;");
1963        let ops: Vec<&OperatorType> = tokens
1964            .iter()
1965            .filter_map(|t| match &t.kind {
1966                TokenKind::Operator(op) => Some(op),
1967                _ => None,
1968            })
1969            .collect();
1970        assert!(ops.contains(&&OperatorType::And));
1971        assert!(ops.contains(&&OperatorType::Or));
1972        assert!(ops.contains(&&OperatorType::NullishCoalescing));
1973    }
1974
1975    #[test]
1976    fn tokenize_assignment_operators() {
1977        let code = r"
1978x = 1;
1979x += 1;
1980x -= 1;
1981x *= 1;
1982x /= 1;
1983x %= 1;
1984x **= 1;
1985x &&= true;
1986x ||= true;
1987x ??= 1;
1988x &= 1;
1989x |= 1;
1990x ^= 1;
1991x <<= 1;
1992x >>= 1;
1993x >>>= 1;
1994";
1995        let tokens = tokenize(code);
1996        let ops: Vec<&OperatorType> = tokens
1997            .iter()
1998            .filter_map(|t| match &t.kind {
1999                TokenKind::Operator(op) => Some(op),
2000                _ => None,
2001            })
2002            .collect();
2003        assert!(ops.contains(&&OperatorType::Assign));
2004        assert!(ops.contains(&&OperatorType::AddAssign));
2005        assert!(ops.contains(&&OperatorType::SubAssign));
2006        assert!(ops.contains(&&OperatorType::MulAssign));
2007        assert!(ops.contains(&&OperatorType::DivAssign));
2008        assert!(ops.contains(&&OperatorType::ModAssign));
2009        assert!(ops.contains(&&OperatorType::ExpAssign));
2010        assert!(ops.contains(&&OperatorType::AndAssign));
2011        assert!(ops.contains(&&OperatorType::OrAssign));
2012        assert!(ops.contains(&&OperatorType::NullishAssign));
2013        assert!(ops.contains(&&OperatorType::BitwiseAndAssign));
2014        assert!(ops.contains(&&OperatorType::BitwiseOrAssign));
2015        assert!(ops.contains(&&OperatorType::BitwiseXorAssign));
2016        assert!(ops.contains(&&OperatorType::ShiftLeftAssign));
2017        assert!(ops.contains(&&OperatorType::ShiftRightAssign));
2018        assert!(ops.contains(&&OperatorType::UnsignedShiftRightAssign));
2019    }
2020
2021    #[test]
2022    fn tokenize_unary_operators() {
2023        let code = "const a = +x; const b = -x; const c = !x; const d = ~x;";
2024        let tokens = tokenize(code);
2025        let ops: Vec<&OperatorType> = tokens
2026            .iter()
2027            .filter_map(|t| match &t.kind {
2028                TokenKind::Operator(op) => Some(op),
2029                _ => None,
2030            })
2031            .collect();
2032        // Unary plus maps to Add, unary minus to Sub
2033        assert!(
2034            ops.contains(&&OperatorType::Add),
2035            "Should have unary plus (mapped to Add)"
2036        );
2037        assert!(
2038            ops.contains(&&OperatorType::Sub),
2039            "Should have unary minus (mapped to Sub)"
2040        );
2041        assert!(ops.contains(&&OperatorType::Not), "Should have logical not");
2042        assert!(
2043            ops.contains(&&OperatorType::BitwiseNot),
2044            "Should have bitwise not"
2045        );
2046    }
2047
2048    #[test]
2049    fn tokenize_typeof_void_delete_as_keywords() {
2050        let tokens = tokenize("typeof x; void 0; delete obj.key;");
2051        let has_typeof = tokens
2052            .iter()
2053            .any(|t| matches!(t.kind, TokenKind::Keyword(KeywordType::Typeof)));
2054        let has_void = tokens
2055            .iter()
2056            .any(|t| matches!(t.kind, TokenKind::Keyword(KeywordType::Void)));
2057        let has_delete = tokens
2058            .iter()
2059            .any(|t| matches!(t.kind, TokenKind::Keyword(KeywordType::Delete)));
2060        assert!(has_typeof, "typeof should be a keyword token");
2061        assert!(has_void, "void should be a keyword token");
2062        assert!(has_delete, "delete should be a keyword token");
2063    }
2064
2065    #[test]
2066    fn tokenize_prefix_and_postfix_update() {
2067        let tokens = tokenize("++x; x--;");
2068        let first_increment_idx = tokens
2069            .iter()
2070            .position(|t| matches!(t.kind, TokenKind::Operator(OperatorType::Increment)));
2071        let has_decrement = tokens
2072            .iter()
2073            .any(|t| matches!(t.kind, TokenKind::Operator(OperatorType::Decrement)));
2074        assert!(
2075            first_increment_idx.is_some(),
2076            "Should have increment operator"
2077        );
2078        assert!(has_decrement, "Should have decrement operator");
2079
2080        // Prefix ++x: the operator appears before the identifier
2081        let first_x_idx = tokens
2082            .iter()
2083            .position(|t| matches!(&t.kind, TokenKind::Identifier(n) if n == "x"))
2084            .unwrap();
2085        assert!(
2086            first_increment_idx.unwrap() < first_x_idx,
2087            "Prefix ++ should appear before identifier"
2088        );
2089    }
2090
2091    // ── TypeScript-specific syntax ──────────────────────────────
2092
2093    #[test]
2094    fn tokenize_ts_as_expression() {
2095        let tokens = tokenize("const x = value as string;");
2096        let has_as = tokens
2097            .iter()
2098            .any(|t| matches!(t.kind, TokenKind::Keyword(KeywordType::As)));
2099        assert!(has_as, "Should contain 'as' keyword");
2100    }
2101
2102    #[test]
2103    fn tokenize_ts_satisfies_expression() {
2104        let tokens = tokenize("const config = {} satisfies Config;");
2105        let has_satisfies = tokens
2106            .iter()
2107            .any(|t| matches!(t.kind, TokenKind::Keyword(KeywordType::Satisfies)));
2108        assert!(has_satisfies, "Should contain 'satisfies' keyword");
2109    }
2110
2111    #[test]
2112    fn tokenize_ts_non_null_assertion() {
2113        let ts_tokens = tokenize("const x = value!.toString();");
2114        // The non-null assertion (!) is NOT emitted as a separate token.
2115        // It just visits the inner expression.
2116        let has_value = ts_tokens
2117            .iter()
2118            .any(|t| matches!(&t.kind, TokenKind::Identifier(n) if n == "value"));
2119        assert!(has_value, "Should contain 'value' identifier");
2120    }
2121
2122    #[test]
2123    fn tokenize_ts_generic_type_parameters() {
2124        let tokens = tokenize("function identity<T>(x: T): T { return x; }");
2125        // Without stripping types, the generic parameter T should appear
2126        let t_count = tokens
2127            .iter()
2128            .filter(|t| matches!(&t.kind, TokenKind::Identifier(n) if n == "T"))
2129            .count();
2130        assert!(
2131            t_count >= 1,
2132            "Generic type parameter T should appear in tokens"
2133        );
2134    }
2135
2136    #[test]
2137    fn tokenize_ts_type_keywords() {
2138        let tokens = tokenize(
2139            "type T = string | number | boolean | any | void | null | undefined | never | unknown;",
2140        );
2141        let idents: Vec<&String> = tokens
2142            .iter()
2143            .filter_map(|t| match &t.kind {
2144                TokenKind::Identifier(name) => Some(name),
2145                _ => None,
2146            })
2147            .collect();
2148        assert!(idents.contains(&&"string".to_string()));
2149        assert!(idents.contains(&&"number".to_string()));
2150        assert!(idents.contains(&&"boolean".to_string()));
2151        assert!(idents.contains(&&"any".to_string()));
2152        assert!(idents.contains(&&"void".to_string()));
2153        assert!(idents.contains(&&"undefined".to_string()));
2154        assert!(idents.contains(&&"never".to_string()));
2155        assert!(idents.contains(&&"unknown".to_string()));
2156        // null is a NullLiteral, not an identifier
2157        let has_null = tokens
2158            .iter()
2159            .any(|t| matches!(t.kind, TokenKind::NullLiteral));
2160        assert!(has_null, "null keyword should produce NullLiteral token");
2161    }
2162
2163    #[test]
2164    fn tokenize_ts_property_signatures_in_interface() {
2165        let tokens = tokenize("interface Foo { bar: string; baz: number; }");
2166        // Property signatures end with semicolons
2167        let semicolons = tokens
2168            .iter()
2169            .filter(|t| matches!(t.kind, TokenKind::Punctuation(PunctuationType::Semicolon)))
2170            .count();
2171        assert!(
2172            semicolons >= 2,
2173            "Interface property signatures should produce semicolons, got {semicolons}"
2174        );
2175    }
2176
2177    #[test]
2178    fn tokenize_ts_enum_with_initializers() {
2179        let tokens = tokenize("enum Status { Active = 'ACTIVE', Inactive = 'INACTIVE' }");
2180        let has_enum = tokens
2181            .iter()
2182            .any(|t| matches!(t.kind, TokenKind::Keyword(KeywordType::Enum)));
2183        assert!(has_enum);
2184        let has_active_str = tokens
2185            .iter()
2186            .any(|t| matches!(&t.kind, TokenKind::StringLiteral(s) if s == "ACTIVE"));
2187        assert!(has_active_str, "Should contain string initializer 'ACTIVE'");
2188    }
2189
2190    // ── Cross-language type stripping (advanced) ────────────────
2191
2192    #[test]
2193    fn strip_types_removes_generic_type_parameters() {
2194        let stripped = tokenize_cross_language("function identity<T>(x: T): T { return x; }");
2195        let js_tokens = {
2196            let path = PathBuf::from("test.js");
2197            tokenize_file(&path, "function identity(x) { return x; }").tokens
2198        };
2199        assert_eq!(
2200            stripped.len(),
2201            js_tokens.len(),
2202            "Stripped TS with generics should match JS token count: stripped={}, js={}",
2203            stripped.len(),
2204            js_tokens.len()
2205        );
2206    }
2207
2208    #[test]
2209    fn strip_types_removes_generic_type_arguments() {
2210        let stripped = tokenize_cross_language("const x = new Map<string, number>();");
2211        // <string, number> should be stripped
2212        let has_string_ident = stripped
2213            .iter()
2214            .any(|t| matches!(&t.kind, TokenKind::Identifier(n) if n == "string"));
2215        // "string" as a type argument should be removed, but "Map" should remain
2216        let has_map = stripped
2217            .iter()
2218            .any(|t| matches!(&t.kind, TokenKind::Identifier(n) if n == "Map"));
2219        assert!(has_map, "Map identifier should be preserved");
2220        // In strip mode the type args are removed
2221        assert!(
2222            !has_string_ident,
2223            "Type argument 'string' should be stripped"
2224        );
2225    }
2226
2227    #[test]
2228    fn strip_types_removes_as_expression() {
2229        let stripped = tokenize_cross_language("const x = value as string;");
2230        let has_as = stripped
2231            .iter()
2232            .any(|t| matches!(t.kind, TokenKind::Keyword(KeywordType::As)));
2233        assert!(!has_as, "'as' expression should be stripped");
2234    }
2235
2236    #[test]
2237    fn strip_types_removes_satisfies_expression() {
2238        let stripped = tokenize_cross_language("const config = {} satisfies Config;");
2239        let has_satisfies = stripped
2240            .iter()
2241            .any(|t| matches!(t.kind, TokenKind::Keyword(KeywordType::Satisfies)));
2242        assert!(!has_satisfies, "'satisfies' expression should be stripped");
2243    }
2244
2245    #[test]
2246    fn strip_types_ts_and_js_produce_identical_token_kinds() {
2247        let ts_code = r#"
2248function greet(name: string, age: number): string {
2249    const msg: string = `Hello ${name}`;
2250    if (age > 18) {
2251        return msg;
2252    }
2253    return "too young";
2254}
2255"#;
2256        let js_code = r#"
2257function greet(name, age) {
2258    const msg = `Hello ${name}`;
2259    if (age > 18) {
2260        return msg;
2261    }
2262    return "too young";
2263}
2264"#;
2265        let stripped = tokenize_cross_language(ts_code);
2266        let js_tokens = {
2267            let path = PathBuf::from("test.js");
2268            tokenize_file(&path, js_code).tokens
2269        };
2270
2271        assert_eq!(
2272            stripped.len(),
2273            js_tokens.len(),
2274            "Stripped TS and JS should produce same number of tokens"
2275        );
2276
2277        // Verify token kinds match one by one
2278        for (i, (ts_tok, js_tok)) in stripped.iter().zip(js_tokens.iter()).enumerate() {
2279            assert_eq!(
2280                ts_tok.kind, js_tok.kind,
2281                "Token {i} mismatch: TS={:?}, JS={:?}",
2282                ts_tok.kind, js_tok.kind
2283            );
2284        }
2285    }
2286
2287    #[test]
2288    fn strip_types_removes_export_type_but_keeps_export_value() {
2289        let stripped =
2290            tokenize_cross_language("export type { Foo };\nexport { bar };\nexport const x = 1;");
2291        let export_count = stripped
2292            .iter()
2293            .filter(|t| matches!(t.kind, TokenKind::Keyword(KeywordType::Export)))
2294            .count();
2295        // export type is stripped, but export { bar } and export const x = 1 remain
2296        assert_eq!(
2297            export_count, 2,
2298            "Should have 2 value exports, got {export_count}"
2299        );
2300    }
2301
2302    // ── JSX/TSX tokenization ────────────────────────────────────
2303
2304    #[test]
2305    fn tokenize_jsx_fragment() {
2306        let tokens = tokenize_tsx("const x = <><div>Hello</div></>;");
2307        // Fragments produce opening and closing bracket tokens
2308        let bracket_count = tokens
2309            .iter()
2310            .filter(|t| {
2311                matches!(
2312                    t.kind,
2313                    TokenKind::Punctuation(PunctuationType::OpenBracket)
2314                        | TokenKind::Punctuation(PunctuationType::CloseBracket)
2315                )
2316            })
2317            .count();
2318        assert!(
2319            bracket_count >= 4,
2320            "JSX fragment should produce bracket tokens, got {bracket_count}"
2321        );
2322    }
2323
2324    #[test]
2325    fn tokenize_jsx_spread_attribute() {
2326        let tokens = tokenize_tsx("const x = <div {...props}>Hello</div>;");
2327        let has_spread = tokens
2328            .iter()
2329            .any(|t| matches!(t.kind, TokenKind::Operator(OperatorType::Spread)));
2330        assert!(
2331            has_spread,
2332            "JSX spread attribute should produce spread operator"
2333        );
2334    }
2335
2336    #[test]
2337    fn tokenize_jsx_expression_container() {
2338        let tokens = tokenize_tsx("const x = <div>{count > 0 ? 'yes' : 'no'}</div>;");
2339        let has_ternary = tokens
2340            .iter()
2341            .any(|t| matches!(t.kind, TokenKind::Operator(OperatorType::Ternary)));
2342        assert!(
2343            has_ternary,
2344            "Expression in JSX should be tokenized (ternary)"
2345        );
2346    }
2347
2348    // ── ES module patterns ──────────────────────────────────────
2349
2350    #[test]
2351    fn tokenize_import_declaration() {
2352        let tokens = tokenize("import { foo, bar } from './module';");
2353        let has_import = tokens
2354            .iter()
2355            .any(|t| matches!(t.kind, TokenKind::Keyword(KeywordType::Import)));
2356        let has_from = tokens
2357            .iter()
2358            .any(|t| matches!(t.kind, TokenKind::Keyword(KeywordType::From)));
2359        let has_source = tokens
2360            .iter()
2361            .any(|t| matches!(&t.kind, TokenKind::StringLiteral(s) if s == "./module"));
2362        assert!(has_import, "Should contain import keyword");
2363        assert!(has_from, "Should contain from keyword");
2364        assert!(has_source, "Should contain module source string");
2365    }
2366
2367    #[test]
2368    fn tokenize_export_default_declaration() {
2369        let tokens = tokenize("export default function() { return 42; }");
2370        let has_export = tokens
2371            .iter()
2372            .any(|t| matches!(t.kind, TokenKind::Keyword(KeywordType::Export)));
2373        let has_default = tokens
2374            .iter()
2375            .any(|t| matches!(t.kind, TokenKind::Keyword(KeywordType::Default)));
2376        assert!(has_export, "Should contain export keyword");
2377        assert!(has_default, "Should contain default keyword");
2378    }
2379
2380    #[test]
2381    fn tokenize_export_all_declaration() {
2382        let tokens = tokenize("export * from './module';");
2383        let has_export = tokens
2384            .iter()
2385            .any(|t| matches!(t.kind, TokenKind::Keyword(KeywordType::Export)));
2386        let has_from = tokens
2387            .iter()
2388            .any(|t| matches!(t.kind, TokenKind::Keyword(KeywordType::From)));
2389        let has_source = tokens
2390            .iter()
2391            .any(|t| matches!(&t.kind, TokenKind::StringLiteral(s) if s == "./module"));
2392        assert!(has_export, "export * should have export keyword");
2393        assert!(has_from, "export * should have from keyword");
2394        assert!(has_source, "export * should have source string");
2395    }
2396
2397    #[test]
2398    fn tokenize_dynamic_import() {
2399        let tokens = tokenize("const mod = await import('./module');");
2400        let has_import = tokens
2401            .iter()
2402            .any(|t| matches!(t.kind, TokenKind::Keyword(KeywordType::Import)));
2403        let has_await = tokens
2404            .iter()
2405            .any(|t| matches!(t.kind, TokenKind::Keyword(KeywordType::Await)));
2406        // Dynamic import() is an expression — no visit_import_expression override,
2407        // so no Import keyword is emitted (only static import declarations emit it).
2408        assert!(
2409            !has_import,
2410            "Dynamic import() should not produce Import keyword"
2411        );
2412        assert!(has_await, "Should contain await keyword");
2413    }
2414
2415    // ── Edge cases ──────────────────────────────────────────────
2416
2417    #[test]
2418    fn tokenize_only_comments() {
2419        let tokens = tokenize("// This is a comment\n/* block comment */\n");
2420        assert!(
2421            tokens.is_empty(),
2422            "File with only comments should produce no tokens"
2423        );
2424    }
2425
2426    #[test]
2427    fn tokenize_deeply_nested_structure() {
2428        let code = "const x = { a: { b: { c: { d: { e: 1 } } } } };";
2429        let tokens = tokenize(code);
2430        let open_braces = tokens
2431            .iter()
2432            .filter(|t| matches!(t.kind, TokenKind::Punctuation(PunctuationType::OpenBrace)))
2433            .count();
2434        let close_braces = tokens
2435            .iter()
2436            .filter(|t| matches!(t.kind, TokenKind::Punctuation(PunctuationType::CloseBrace)))
2437            .count();
2438        assert_eq!(
2439            open_braces, close_braces,
2440            "Nested structure should have balanced braces"
2441        );
2442        assert!(
2443            open_braces >= 5,
2444            "Should have at least 5 levels of braces, got {open_braces}"
2445        );
2446    }
2447
2448    #[test]
2449    fn tokenize_chained_method_calls_uses_point_spans() {
2450        let tokens = tokenize("arr.filter(x => x > 0).map(x => x * 2).reduce((a, b) => a + b, 0);");
2451        // Verify that call expression parentheses use point spans (not the full chain span).
2452        // The dots should be at point spans just after each object expression ends.
2453        let dots: Vec<&SourceToken> = tokens
2454            .iter()
2455            .filter(|t| matches!(t.kind, TokenKind::Punctuation(PunctuationType::Dot)))
2456            .collect();
2457        assert!(
2458            dots.len() >= 3,
2459            "Chained calls should produce dots, got {}",
2460            dots.len()
2461        );
2462        // Point spans should be small (1 byte)
2463        for dot in &dots {
2464            assert_eq!(
2465                dot.span.end - dot.span.start,
2466                1,
2467                "Dot should use point span"
2468            );
2469        }
2470    }
2471
2472    #[test]
2473    fn tokenize_expression_statement_appends_semicolon() {
2474        let tokens = tokenize("foo();");
2475        let last = tokens.last().unwrap();
2476        assert!(
2477            matches!(
2478                last.kind,
2479                TokenKind::Punctuation(PunctuationType::Semicolon)
2480                    | TokenKind::Punctuation(PunctuationType::CloseParen)
2481                    | TokenKind::Operator(OperatorType::Comma)
2482            ),
2483            "Expression statement should end with semicolon or related punctuation"
2484        );
2485        let has_semicolon = tokens
2486            .iter()
2487            .any(|t| matches!(t.kind, TokenKind::Punctuation(PunctuationType::Semicolon)));
2488        assert!(
2489            has_semicolon,
2490            "Expression statement should produce a semicolon"
2491        );
2492    }
2493
2494    #[test]
2495    fn tokenize_variable_declarator_with_no_initializer() {
2496        let tokens = tokenize("let x;");
2497        let has_let = tokens
2498            .iter()
2499            .any(|t| matches!(t.kind, TokenKind::Keyword(KeywordType::Let)));
2500        let has_x = tokens
2501            .iter()
2502            .any(|t| matches!(&t.kind, TokenKind::Identifier(n) if n == "x"));
2503        // Should NOT have an assign operator since there's no initializer
2504        let has_assign = tokens
2505            .iter()
2506            .any(|t| matches!(t.kind, TokenKind::Operator(OperatorType::Assign)));
2507        assert!(has_let, "Should have let keyword");
2508        assert!(has_x, "Should have identifier x");
2509        assert!(
2510            !has_assign,
2511            "Uninitialized declarator should not have assign operator"
2512        );
2513    }
2514
2515    #[test]
2516    fn tokenize_using_declaration_maps_to_const() {
2517        // TC39 `using` declaration should map to Const keyword
2518        let tokens = tokenize("{ using resource = getResource(); }");
2519        let has_const = tokens
2520            .iter()
2521            .any(|t| matches!(t.kind, TokenKind::Keyword(KeywordType::Const)));
2522        assert!(
2523            has_const,
2524            "`using` declaration should be mapped to Const keyword"
2525        );
2526    }
2527
2528    #[test]
2529    fn tokenize_block_statement_produces_braces() {
2530        let tokens = tokenize("{ const x = 1; }");
2531        assert!(matches!(
2532            tokens[0].kind,
2533            TokenKind::Punctuation(PunctuationType::OpenBrace)
2534        ));
2535        let last = tokens.last().unwrap();
2536        assert!(
2537            matches!(
2538                last.kind,
2539                TokenKind::Punctuation(PunctuationType::CloseBrace)
2540            ),
2541            "Block should end with close brace"
2542        );
2543    }
2544
2545    #[test]
2546    fn tokenize_class_without_name_and_no_extends() {
2547        let tokens = tokenize("const C = class { };");
2548        let has_class = tokens
2549            .iter()
2550            .any(|t| matches!(t.kind, TokenKind::Keyword(KeywordType::Class)));
2551        let has_extends = tokens
2552            .iter()
2553            .any(|t| matches!(t.kind, TokenKind::Keyword(KeywordType::Extends)));
2554        assert!(has_class, "Should have class keyword");
2555        assert!(
2556            !has_extends,
2557            "Anonymous class without extends should not have extends keyword"
2558        );
2559    }
2560
2561    #[test]
2562    fn tokenize_function_without_name() {
2563        let tokens = tokenize("const f = function() { return 1; };");
2564        let has_function = tokens
2565            .iter()
2566            .any(|t| matches!(t.kind, TokenKind::Keyword(KeywordType::Function)));
2567        assert!(has_function, "Should have function keyword");
2568    }
2569
2570    #[test]
2571    fn tokenize_ts_interface_body_has_braces() {
2572        let tokens = tokenize("interface I { x: number; }");
2573        let open_braces = tokens
2574            .iter()
2575            .filter(|t| matches!(t.kind, TokenKind::Punctuation(PunctuationType::OpenBrace)))
2576            .count();
2577        let close_braces = tokens
2578            .iter()
2579            .filter(|t| matches!(t.kind, TokenKind::Punctuation(PunctuationType::CloseBrace)))
2580            .count();
2581        assert!(open_braces >= 1, "Interface body should have open brace");
2582        assert_eq!(
2583            open_braces, close_braces,
2584            "Interface body braces should be balanced"
2585        );
2586    }
2587
2588    #[test]
2589    fn tokenize_ts_enum_body_has_braces() {
2590        let tokens = tokenize("enum E { A, B }");
2591        let open_braces = tokens
2592            .iter()
2593            .filter(|t| matches!(t.kind, TokenKind::Punctuation(PunctuationType::OpenBrace)))
2594            .count();
2595        assert!(open_braces >= 1, "Enum body should have open brace");
2596    }
2597
2598    #[test]
2599    fn tokenize_ts_module_declaration_not_stripped_when_not_declare() {
2600        // A non-declare namespace should not be stripped even when strip_types is true
2601        let tokens = tokenize("namespace Foo { export const x = 1; }");
2602        let has_const = tokens
2603            .iter()
2604            .any(|t| matches!(t.kind, TokenKind::Keyword(KeywordType::Const)));
2605        assert!(
2606            has_const,
2607            "Non-declare namespace contents should be preserved"
2608        );
2609    }
2610
2611    #[test]
2612    fn cross_language_preserves_non_declare_namespace() {
2613        let stripped = tokenize_cross_language("namespace Foo { export const x = 1; }");
2614        let has_const = stripped
2615            .iter()
2616            .any(|t| matches!(t.kind, TokenKind::Keyword(KeywordType::Const)));
2617        assert!(
2618            has_const,
2619            "Non-declare namespace contents should be preserved in cross-language mode"
2620        );
2621    }
2622
2623    #[test]
2624    fn tokenize_for_statement_with_all_clauses() {
2625        let tokens = tokenize("for (let i = 0; i < 10; i++) { console.log(i); }");
2626        assert!(matches!(
2627            tokens[0].kind,
2628            TokenKind::Keyword(KeywordType::For)
2629        ));
2630        let has_open_paren = tokens
2631            .iter()
2632            .any(|t| matches!(t.kind, TokenKind::Punctuation(PunctuationType::OpenParen)));
2633        let has_close_paren = tokens
2634            .iter()
2635            .any(|t| matches!(t.kind, TokenKind::Punctuation(PunctuationType::CloseParen)));
2636        assert!(has_open_paren, "For statement should have open paren");
2637        assert!(has_close_paren, "For statement should have close paren");
2638    }
2639
2640    #[test]
2641    fn tokenize_cross_language_produces_correct_metadata() {
2642        let path = PathBuf::from("test.ts");
2643        let source = "const x: number = 1;\nconst y: string = 'hello';";
2644        let result = tokenize_file_cross_language(&path, source, true);
2645        assert_eq!(result.line_count, 2);
2646        assert_eq!(result.source, source);
2647        assert!(!result.tokens.is_empty());
2648    }
2649
2650    #[test]
2651    fn strip_types_removes_complex_generics() {
2652        let stripped = tokenize_cross_language(
2653            "function merge<T extends object, U extends object>(a: T, b: U): T & U { return Object.assign(a, b); }",
2654        );
2655        let js_tokens = {
2656            let path = PathBuf::from("test.js");
2657            tokenize_file(
2658                &path,
2659                "function merge(a, b) { return Object.assign(a, b); }",
2660            )
2661            .tokens
2662        };
2663        assert_eq!(
2664            stripped.len(),
2665            js_tokens.len(),
2666            "Complex generics should be fully stripped: stripped={}, js={}",
2667            stripped.len(),
2668            js_tokens.len()
2669        );
2670    }
2671
2672    #[test]
2673    fn tokenize_ts_conditional_type_without_strip() {
2674        let tokens = tokenize("type IsString<T> = T extends string ? true : false;");
2675        let has_type = tokens
2676            .iter()
2677            .any(|t| matches!(t.kind, TokenKind::Keyword(KeywordType::Type)));
2678        assert!(has_type, "Should contain type keyword");
2679        // The 'extends' in a conditional type is part of TSConditionalType AST,
2680        // not a class extends clause. The tokenizer walks the type which produces
2681        // identifiers (T, string) and the ternary operator/colon from the conditional.
2682        let has_true_bool = tokens
2683            .iter()
2684            .any(|t| matches!(t.kind, TokenKind::BooleanLiteral(true)));
2685        let has_false_bool = tokens
2686            .iter()
2687            .any(|t| matches!(t.kind, TokenKind::BooleanLiteral(false)));
2688        assert!(
2689            has_true_bool,
2690            "Conditional type should contain true literal"
2691        );
2692        assert!(
2693            has_false_bool,
2694            "Conditional type should contain false literal"
2695        );
2696    }
2697
2698    #[test]
2699    fn strip_types_removes_conditional_type() {
2700        let stripped = tokenize_cross_language(
2701            "type IsString<T> = T extends string ? true : false;\nconst x = 1;",
2702        );
2703        let has_type = stripped
2704            .iter()
2705            .any(|t| matches!(t.kind, TokenKind::Keyword(KeywordType::Type)));
2706        assert!(!has_type, "Conditional type alias should be fully stripped");
2707    }
2708
2709    #[test]
2710    fn tokenize_vue_sfc_with_cross_language_stripping() {
2711        let vue_source = r#"<template><div/></template>
2712<script lang="ts">
2713import type { Ref } from 'vue';
2714import { ref } from 'vue';
2715const count: Ref<number> = ref(0);
2716</script>"#;
2717        let path = PathBuf::from("Component.vue");
2718        let result = tokenize_file_cross_language(&path, vue_source, true);
2719        // import type should be stripped
2720        let import_count = result
2721            .tokens
2722            .iter()
2723            .filter(|t| matches!(t.kind, TokenKind::Keyword(KeywordType::Import)))
2724            .count();
2725        assert_eq!(
2726            import_count, 1,
2727            "import type should be stripped, leaving only 1 value import, got {import_count}"
2728        );
2729    }
2730
2731    #[test]
2732    fn tokenize_no_extension_uses_default_source_type() {
2733        let path = PathBuf::from("Makefile");
2734        // Files without a recognized extension should still not panic
2735        let result = tokenize_file(&path, "const x = 1;");
2736        // May or may not produce tokens depending on how SourceType handles unknown extensions
2737        // The important thing is no panic
2738        assert!(result.line_count >= 1);
2739    }
2740
2741    #[test]
2742    fn point_span_is_one_byte() {
2743        let span = point_span(42);
2744        assert_eq!(span.start, 42);
2745        assert_eq!(span.end, 43);
2746    }
2747
2748    #[test]
2749    fn tokenize_call_expression_with_arguments() {
2750        let tokens = tokenize("foo(1, 'hello', true);");
2751        let has_open_paren = tokens
2752            .iter()
2753            .any(|t| matches!(t.kind, TokenKind::Punctuation(PunctuationType::OpenParen)));
2754        let has_close_paren = tokens
2755            .iter()
2756            .any(|t| matches!(t.kind, TokenKind::Punctuation(PunctuationType::CloseParen)));
2757        let comma_count = tokens
2758            .iter()
2759            .filter(|t| matches!(t.kind, TokenKind::Operator(OperatorType::Comma)))
2760            .count();
2761        assert!(has_open_paren, "Call should have open paren");
2762        assert!(has_close_paren, "Call should have close paren");
2763        assert!(
2764            comma_count >= 3,
2765            "3 arguments should produce at least 3 commas (one per arg), got {comma_count}"
2766        );
2767    }
2768
2769    #[test]
2770    fn tokenize_new_expression_with_arguments() {
2771        let tokens = tokenize("new Foo(1, 2);");
2772        let has_new = tokens
2773            .iter()
2774            .any(|t| matches!(t.kind, TokenKind::Keyword(KeywordType::New)));
2775        let comma_count = tokens
2776            .iter()
2777            .filter(|t| matches!(t.kind, TokenKind::Operator(OperatorType::Comma)))
2778            .count();
2779        assert!(has_new);
2780        assert!(
2781            comma_count >= 2,
2782            "2 arguments should produce at least 2 commas, got {comma_count}"
2783        );
2784    }
2785
2786    #[test]
2787    fn tokenize_arrow_function_params_produce_commas() {
2788        let tokens = tokenize("const f = (a, b, c) => a;");
2789        let comma_count = tokens
2790            .iter()
2791            .filter(|t| matches!(t.kind, TokenKind::Operator(OperatorType::Comma)))
2792            .count();
2793        assert!(
2794            comma_count >= 3,
2795            "Arrow function with 3 params should produce at least 3 commas, got {comma_count}"
2796        );
2797    }
2798
2799    #[test]
2800    fn tokenize_function_params_produce_commas() {
2801        let tokens = tokenize("function f(a, b) { return a + b; }");
2802        let comma_count = tokens
2803            .iter()
2804            .filter(|t| matches!(t.kind, TokenKind::Operator(OperatorType::Comma)))
2805            .count();
2806        assert!(
2807            comma_count >= 2,
2808            "Function with 2 params should produce at least 2 commas, got {comma_count}"
2809        );
2810    }
2811
2812    #[test]
2813    fn tokenize_switch_with_open_close_parens() {
2814        let tokens = tokenize("switch (x) { case 1: break; }");
2815        let has_open_paren = tokens
2816            .iter()
2817            .any(|t| matches!(t.kind, TokenKind::Punctuation(PunctuationType::OpenParen)));
2818        let has_close_paren = tokens
2819            .iter()
2820            .any(|t| matches!(t.kind, TokenKind::Punctuation(PunctuationType::CloseParen)));
2821        assert!(
2822            has_open_paren,
2823            "Switch should have open paren for discriminant"
2824        );
2825        assert!(
2826            has_close_paren,
2827            "Switch should have close paren for discriminant"
2828        );
2829    }
2830
2831    #[test]
2832    fn tokenize_while_has_parens_around_condition() {
2833        let tokens = tokenize("while (true) { break; }");
2834        let has_open_paren = tokens
2835            .iter()
2836            .any(|t| matches!(t.kind, TokenKind::Punctuation(PunctuationType::OpenParen)));
2837        let has_close_paren = tokens
2838            .iter()
2839            .any(|t| matches!(t.kind, TokenKind::Punctuation(PunctuationType::CloseParen)));
2840        assert!(has_open_paren, "While should have open paren");
2841        assert!(has_close_paren, "While should have close paren");
2842    }
2843
2844    #[test]
2845    fn tokenize_for_in_has_parens() {
2846        let tokens = tokenize("for (const k in obj) {}");
2847        let open_parens = tokens
2848            .iter()
2849            .filter(|t| matches!(t.kind, TokenKind::Punctuation(PunctuationType::OpenParen)))
2850            .count();
2851        let close_parens = tokens
2852            .iter()
2853            .filter(|t| matches!(t.kind, TokenKind::Punctuation(PunctuationType::CloseParen)))
2854            .count();
2855        assert!(open_parens >= 1, "for-in should have open paren");
2856        assert!(close_parens >= 1, "for-in should have close paren");
2857    }
2858
2859    #[test]
2860    fn tokenize_for_of_has_parens() {
2861        let tokens = tokenize("for (const v of arr) {}");
2862        let open_parens = tokens
2863            .iter()
2864            .filter(|t| matches!(t.kind, TokenKind::Punctuation(PunctuationType::OpenParen)))
2865            .count();
2866        let close_parens = tokens
2867            .iter()
2868            .filter(|t| matches!(t.kind, TokenKind::Punctuation(PunctuationType::CloseParen)))
2869            .count();
2870        assert!(open_parens >= 1, "for-of should have open paren");
2871        assert!(close_parens >= 1, "for-of should have close paren");
2872    }
2873
2874    #[test]
2875    fn strip_types_removes_ts_type_annotation_colon() {
2876        // Verify that the colon from type annotations is also stripped
2877        let stripped = tokenize_cross_language("const x: number = 1;");
2878        let colon_count = stripped
2879            .iter()
2880            .filter(|t| matches!(t.kind, TokenKind::Punctuation(PunctuationType::Colon)))
2881            .count();
2882        assert_eq!(
2883            colon_count, 0,
2884            "Type annotation colons should be stripped, got {colon_count}"
2885        );
2886    }
2887
2888    #[test]
2889    fn tokenize_ts_as_const() {
2890        let tokens = tokenize("const colors = ['red', 'green', 'blue'] as const;");
2891        let has_as = tokens
2892            .iter()
2893            .any(|t| matches!(t.kind, TokenKind::Keyword(KeywordType::As)));
2894        assert!(has_as, "as const should produce 'as' keyword");
2895        // The declaration 'const' is emitted as a keyword; the 'const' in 'as const'
2896        // is visited as a TS type (TSTypeOperator), not as a keyword.
2897        let has_const_decl = tokens
2898            .iter()
2899            .any(|t| matches!(t.kind, TokenKind::Keyword(KeywordType::Const)));
2900        assert!(
2901            has_const_decl,
2902            "Should have Const keyword for the declaration"
2903        );
2904    }
2905
2906    #[test]
2907    fn strip_types_removes_as_const() {
2908        let stripped = tokenize_cross_language("const colors = ['red', 'green', 'blue'] as const;");
2909        let has_as = stripped
2910            .iter()
2911            .any(|t| matches!(t.kind, TokenKind::Keyword(KeywordType::As)));
2912        assert!(
2913            !has_as,
2914            "'as const' should be stripped in cross-language mode"
2915        );
2916    }
2917}