Skip to main content

fallow_core/duplicates/
tokenize.rs

1use std::path::Path;
2
3use oxc_allocator::Allocator;
4use oxc_ast::ast::*;
5use oxc_ast_visit::Visit;
6use oxc_ast_visit::walk;
7use oxc_parser::Parser;
8use oxc_span::{GetSpan, SourceType, Span};
9use oxc_syntax::scope::ScopeFlags;
10
11/// A single token extracted from the AST with its source location.
12#[derive(Debug, Clone)]
13pub struct SourceToken {
14    /// The kind of token.
15    pub kind: TokenKind,
16    /// Byte offset into the source file.
17    pub span: Span,
18}
19
20/// Normalized token types for clone detection.
21#[derive(Debug, Clone, PartialEq, Eq, Hash)]
22pub enum TokenKind {
23    // Keywords
24    Keyword(KeywordType),
25    // Identifiers -- value is the actual name (blinded in semantic mode)
26    Identifier(String),
27    // Literals
28    StringLiteral(String),
29    NumericLiteral(String),
30    BooleanLiteral(bool),
31    NullLiteral,
32    TemplateLiteral,
33    RegExpLiteral,
34    // Operators
35    Operator(OperatorType),
36    // Punctuation / delimiters
37    Punctuation(PunctuationType),
38}
39
40/// JavaScript/TypeScript keyword types.
41#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash)]
42pub enum KeywordType {
43    Var,
44    Let,
45    Const,
46    Function,
47    Return,
48    If,
49    Else,
50    For,
51    While,
52    Do,
53    Switch,
54    Case,
55    Break,
56    Continue,
57    Default,
58    Throw,
59    Try,
60    Catch,
61    Finally,
62    New,
63    Delete,
64    Typeof,
65    Instanceof,
66    In,
67    Of,
68    Void,
69    This,
70    Super,
71    Class,
72    Extends,
73    Import,
74    Export,
75    From,
76    As,
77    Async,
78    Await,
79    Yield,
80    Static,
81    Get,
82    Set,
83    Type,
84    Interface,
85    Enum,
86    Implements,
87    Abstract,
88    Declare,
89    Readonly,
90    Keyof,
91    Satisfies,
92}
93
94/// Operator categories.
95#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash)]
96pub enum OperatorType {
97    Assign,
98    Add,
99    Sub,
100    Mul,
101    Div,
102    Mod,
103    Exp,
104    Eq,
105    NEq,
106    StrictEq,
107    StrictNEq,
108    Lt,
109    Gt,
110    LtEq,
111    GtEq,
112    And,
113    Or,
114    Not,
115    BitwiseAnd,
116    BitwiseOr,
117    BitwiseXor,
118    BitwiseNot,
119    ShiftLeft,
120    ShiftRight,
121    UnsignedShiftRight,
122    NullishCoalescing,
123    OptionalChaining,
124    Spread,
125    Ternary,
126    Arrow,
127    Comma,
128    AddAssign,
129    SubAssign,
130    MulAssign,
131    DivAssign,
132    ModAssign,
133    ExpAssign,
134    AndAssign,
135    OrAssign,
136    NullishAssign,
137    BitwiseAndAssign,
138    BitwiseOrAssign,
139    BitwiseXorAssign,
140    ShiftLeftAssign,
141    ShiftRightAssign,
142    UnsignedShiftRightAssign,
143    Increment,
144    Decrement,
145    Instanceof,
146    In,
147}
148
149/// Punctuation / delimiter types.
150#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash)]
151pub enum PunctuationType {
152    OpenParen,
153    CloseParen,
154    OpenBrace,
155    CloseBrace,
156    OpenBracket,
157    CloseBracket,
158    Semicolon,
159    Colon,
160    Dot,
161}
162
163/// Result of tokenizing a source file.
164#[derive(Debug, Clone)]
165pub struct FileTokens {
166    /// The extracted token sequence.
167    pub tokens: Vec<SourceToken>,
168    /// Source text (needed for extracting fragments).
169    pub source: String,
170    /// Total number of lines in the source.
171    pub line_count: usize,
172}
173
174/// Create a 1-byte span at the given byte position.
175///
176/// Used for synthetic punctuation tokens (`(`, `)`, `,`, `.`) that don't
177/// have their own AST span. Using the parent expression's full span would
178/// inflate clone line ranges, especially in chained method calls.
179const fn point_span(pos: u32) -> Span {
180    Span::new(pos, pos + 1)
181}
182
183/// Tokenize a source file into a sequence of normalized tokens.
184///
185/// For Vue/Svelte SFC files, extracts `<script>` blocks first and tokenizes
186/// their content, mirroring the main analysis pipeline's SFC handling.
187/// For Astro files, extracts frontmatter. For MDX files, extracts import/export statements.
188///
189/// When `strip_types` is true, TypeScript type annotations, interfaces, and type
190/// aliases are stripped from the token stream. This enables cross-language clone
191/// detection between `.ts` and `.js` files.
192pub fn tokenize_file(path: &Path, source: &str) -> FileTokens {
193    tokenize_file_inner(path, source, false)
194}
195
196/// Tokenize a source file with optional type stripping for cross-language detection.
197pub fn tokenize_file_cross_language(path: &Path, source: &str, strip_types: bool) -> FileTokens {
198    tokenize_file_inner(path, source, strip_types)
199}
200
201fn tokenize_file_inner(path: &Path, source: &str, strip_types: bool) -> FileTokens {
202    use crate::extract::{
203        extract_astro_frontmatter, extract_mdx_statements, extract_sfc_scripts, is_sfc_file,
204    };
205
206    let ext = path.extension().and_then(|e| e.to_str()).unwrap_or("");
207
208    // For Vue/Svelte SFCs, extract and tokenize `<script>` blocks.
209    if is_sfc_file(path) {
210        let scripts = extract_sfc_scripts(source);
211        let mut all_tokens = Vec::new();
212
213        for script in &scripts {
214            let source_type = match (script.is_typescript, script.is_jsx) {
215                (true, true) => SourceType::tsx(),
216                (true, false) => SourceType::ts(),
217                (false, true) => SourceType::jsx(),
218                (false, false) => SourceType::mjs(),
219            };
220            let allocator = Allocator::default();
221            let parser_return = Parser::new(&allocator, &script.body, source_type).parse();
222
223            let mut extractor = TokenExtractor::with_strip_types(strip_types);
224            extractor.visit_program(&parser_return.program);
225
226            // Adjust token spans to reference positions in the full SFC source
227            // rather than the extracted script block.
228            let offset = script.byte_offset as u32;
229            for token in &mut extractor.tokens {
230                token.span = Span::new(token.span.start + offset, token.span.end + offset);
231            }
232            all_tokens.extend(extractor.tokens);
233        }
234
235        let line_count = source.lines().count().max(1);
236        return FileTokens {
237            tokens: all_tokens,
238            source: source.to_string(),
239            line_count,
240        };
241    }
242
243    // For Astro files, extract and tokenize frontmatter.
244    if ext == "astro" {
245        if let Some(script) = extract_astro_frontmatter(source) {
246            let allocator = Allocator::default();
247            let parser_return = Parser::new(&allocator, &script.body, SourceType::ts()).parse();
248
249            let mut extractor = TokenExtractor::with_strip_types(strip_types);
250            extractor.visit_program(&parser_return.program);
251
252            let offset = script.byte_offset as u32;
253            for token in &mut extractor.tokens {
254                token.span = Span::new(token.span.start + offset, token.span.end + offset);
255            }
256
257            let line_count = source.lines().count().max(1);
258            return FileTokens {
259                tokens: extractor.tokens,
260                source: source.to_string(),
261                line_count,
262            };
263        }
264        // No frontmatter — return empty tokens.
265        let line_count = source.lines().count().max(1);
266        return FileTokens {
267            tokens: Vec::new(),
268            source: source.to_string(),
269            line_count,
270        };
271    }
272
273    // For MDX files, extract and tokenize import/export statements.
274    if ext == "mdx" {
275        let statements = extract_mdx_statements(source);
276        if !statements.is_empty() {
277            let allocator = Allocator::default();
278            let parser_return = Parser::new(&allocator, &statements, SourceType::jsx()).parse();
279
280            let mut extractor = TokenExtractor::with_strip_types(strip_types);
281            extractor.visit_program(&parser_return.program);
282
283            let line_count = source.lines().count().max(1);
284            return FileTokens {
285                tokens: extractor.tokens,
286                source: source.to_string(),
287                line_count,
288            };
289        }
290        let line_count = source.lines().count().max(1);
291        return FileTokens {
292            tokens: Vec::new(),
293            source: source.to_string(),
294            line_count,
295        };
296    }
297
298    // CSS/SCSS files are not JS/TS — skip tokenization for duplication detection.
299    if ext == "css" || ext == "scss" {
300        let line_count = source.lines().count().max(1);
301        return FileTokens {
302            tokens: Vec::new(),
303            source: source.to_string(),
304            line_count,
305        };
306    }
307
308    let source_type = SourceType::from_path(path).unwrap_or_default();
309    let allocator = Allocator::default();
310    let parser_return = Parser::new(&allocator, source, source_type).parse();
311
312    let mut extractor = TokenExtractor::with_strip_types(strip_types);
313    extractor.visit_program(&parser_return.program);
314
315    // If parsing produced very few tokens relative to source size (likely parse errors
316    // from Flow types or JSX in .js files), retry with JSX/TSX source type as a fallback.
317    if extractor.tokens.len() < 5 && source.len() > 100 && !source_type.is_jsx() {
318        let jsx_type = if source_type.is_typescript() {
319            SourceType::tsx()
320        } else {
321            SourceType::jsx()
322        };
323        let allocator2 = Allocator::default();
324        let retry_return = Parser::new(&allocator2, source, jsx_type).parse();
325        let mut retry_extractor = TokenExtractor::with_strip_types(strip_types);
326        retry_extractor.visit_program(&retry_return.program);
327        if retry_extractor.tokens.len() > extractor.tokens.len() {
328            extractor = retry_extractor;
329        }
330    }
331
332    let line_count = source.lines().count().max(1);
333
334    FileTokens {
335        tokens: extractor.tokens,
336        source: source.to_string(),
337        line_count,
338    }
339}
340
341/// AST visitor that extracts a flat sequence of normalized tokens.
342struct TokenExtractor {
343    tokens: Vec<SourceToken>,
344    /// When true, skip TypeScript type annotations, interfaces, and type aliases
345    /// to enable cross-language clone detection between .ts and .js files.
346    strip_types: bool,
347}
348
349impl TokenExtractor {
350    const fn with_strip_types(strip_types: bool) -> Self {
351        Self {
352            tokens: Vec::new(),
353            strip_types,
354        }
355    }
356
357    fn push(&mut self, kind: TokenKind, span: Span) {
358        self.tokens.push(SourceToken { kind, span });
359    }
360
361    fn push_keyword(&mut self, kw: KeywordType, span: Span) {
362        self.push(TokenKind::Keyword(kw), span);
363    }
364
365    fn push_op(&mut self, op: OperatorType, span: Span) {
366        self.push(TokenKind::Operator(op), span);
367    }
368
369    fn push_punc(&mut self, p: PunctuationType, span: Span) {
370        self.push(TokenKind::Punctuation(p), span);
371    }
372}
373
374impl<'a> Visit<'a> for TokenExtractor {
375    // ── Statements ──────────────────────────────────────────
376
377    fn visit_variable_declaration(&mut self, decl: &VariableDeclaration<'a>) {
378        let kw = match decl.kind {
379            VariableDeclarationKind::Var => KeywordType::Var,
380            VariableDeclarationKind::Let => KeywordType::Let,
381            VariableDeclarationKind::Const => KeywordType::Const,
382            VariableDeclarationKind::Using | VariableDeclarationKind::AwaitUsing => {
383                KeywordType::Const
384            }
385        };
386        self.push_keyword(kw, decl.span);
387        walk::walk_variable_declaration(self, decl);
388    }
389
390    fn visit_return_statement(&mut self, stmt: &ReturnStatement<'a>) {
391        self.push_keyword(KeywordType::Return, stmt.span);
392        walk::walk_return_statement(self, stmt);
393    }
394
395    fn visit_if_statement(&mut self, stmt: &IfStatement<'a>) {
396        self.push_keyword(KeywordType::If, stmt.span);
397        self.push_punc(PunctuationType::OpenParen, stmt.span);
398        self.visit_expression(&stmt.test);
399        self.push_punc(PunctuationType::CloseParen, stmt.span);
400        self.visit_statement(&stmt.consequent);
401        if let Some(alt) = &stmt.alternate {
402            self.push_keyword(KeywordType::Else, stmt.span);
403            self.visit_statement(alt);
404        }
405    }
406
407    fn visit_for_statement(&mut self, stmt: &ForStatement<'a>) {
408        self.push_keyword(KeywordType::For, stmt.span);
409        self.push_punc(PunctuationType::OpenParen, stmt.span);
410        walk::walk_for_statement(self, stmt);
411        self.push_punc(PunctuationType::CloseParen, stmt.span);
412    }
413
414    fn visit_for_in_statement(&mut self, stmt: &ForInStatement<'a>) {
415        self.push_keyword(KeywordType::For, stmt.span);
416        self.push_punc(PunctuationType::OpenParen, stmt.span);
417        self.visit_for_statement_left(&stmt.left);
418        self.push_keyword(KeywordType::In, stmt.span);
419        self.visit_expression(&stmt.right);
420        self.push_punc(PunctuationType::CloseParen, stmt.span);
421        self.visit_statement(&stmt.body);
422    }
423
424    fn visit_for_of_statement(&mut self, stmt: &ForOfStatement<'a>) {
425        self.push_keyword(KeywordType::For, stmt.span);
426        self.push_punc(PunctuationType::OpenParen, stmt.span);
427        self.visit_for_statement_left(&stmt.left);
428        self.push_keyword(KeywordType::Of, stmt.span);
429        self.visit_expression(&stmt.right);
430        self.push_punc(PunctuationType::CloseParen, stmt.span);
431        self.visit_statement(&stmt.body);
432    }
433
434    fn visit_while_statement(&mut self, stmt: &WhileStatement<'a>) {
435        self.push_keyword(KeywordType::While, stmt.span);
436        self.push_punc(PunctuationType::OpenParen, stmt.span);
437        walk::walk_while_statement(self, stmt);
438        self.push_punc(PunctuationType::CloseParen, stmt.span);
439    }
440
441    fn visit_do_while_statement(&mut self, stmt: &DoWhileStatement<'a>) {
442        self.push_keyword(KeywordType::Do, stmt.span);
443        walk::walk_do_while_statement(self, stmt);
444    }
445
446    fn visit_switch_statement(&mut self, stmt: &SwitchStatement<'a>) {
447        self.push_keyword(KeywordType::Switch, stmt.span);
448        self.push_punc(PunctuationType::OpenParen, stmt.span);
449        walk::walk_switch_statement(self, stmt);
450        self.push_punc(PunctuationType::CloseParen, stmt.span);
451    }
452
453    fn visit_switch_case(&mut self, case: &SwitchCase<'a>) {
454        if case.test.is_some() {
455            self.push_keyword(KeywordType::Case, case.span);
456        } else {
457            self.push_keyword(KeywordType::Default, case.span);
458        }
459        self.push_punc(PunctuationType::Colon, case.span);
460        walk::walk_switch_case(self, case);
461    }
462
463    fn visit_break_statement(&mut self, stmt: &BreakStatement<'a>) {
464        self.push_keyword(KeywordType::Break, stmt.span);
465    }
466
467    fn visit_continue_statement(&mut self, stmt: &ContinueStatement<'a>) {
468        self.push_keyword(KeywordType::Continue, stmt.span);
469    }
470
471    fn visit_throw_statement(&mut self, stmt: &ThrowStatement<'a>) {
472        self.push_keyword(KeywordType::Throw, stmt.span);
473        walk::walk_throw_statement(self, stmt);
474    }
475
476    fn visit_try_statement(&mut self, stmt: &TryStatement<'a>) {
477        self.push_keyword(KeywordType::Try, stmt.span);
478        walk::walk_try_statement(self, stmt);
479    }
480
481    fn visit_catch_clause(&mut self, clause: &CatchClause<'a>) {
482        self.push_keyword(KeywordType::Catch, clause.span);
483        walk::walk_catch_clause(self, clause);
484    }
485
486    fn visit_block_statement(&mut self, block: &BlockStatement<'a>) {
487        self.push_punc(PunctuationType::OpenBrace, block.span);
488        walk::walk_block_statement(self, block);
489        self.push_punc(PunctuationType::CloseBrace, block.span);
490    }
491
492    // ── Expressions ─────────────────────────────────────────
493
494    fn visit_identifier_reference(&mut self, ident: &IdentifierReference<'a>) {
495        self.push(TokenKind::Identifier(ident.name.to_string()), ident.span);
496    }
497
498    fn visit_binding_identifier(&mut self, ident: &BindingIdentifier<'a>) {
499        self.push(TokenKind::Identifier(ident.name.to_string()), ident.span);
500    }
501
502    fn visit_string_literal(&mut self, lit: &StringLiteral<'a>) {
503        self.push(TokenKind::StringLiteral(lit.value.to_string()), lit.span);
504    }
505
506    fn visit_numeric_literal(&mut self, lit: &NumericLiteral<'a>) {
507        let raw_str = lit
508            .raw
509            .as_ref()
510            .map_or_else(|| lit.value.to_string(), |r| r.to_string());
511        self.push(TokenKind::NumericLiteral(raw_str), lit.span);
512    }
513
514    fn visit_boolean_literal(&mut self, lit: &BooleanLiteral) {
515        self.push(TokenKind::BooleanLiteral(lit.value), lit.span);
516    }
517
518    fn visit_null_literal(&mut self, lit: &NullLiteral) {
519        self.push(TokenKind::NullLiteral, lit.span);
520    }
521
522    fn visit_template_literal(&mut self, lit: &TemplateLiteral<'a>) {
523        self.push(TokenKind::TemplateLiteral, lit.span);
524        walk::walk_template_literal(self, lit);
525    }
526
527    fn visit_reg_exp_literal(&mut self, lit: &RegExpLiteral<'a>) {
528        self.push(TokenKind::RegExpLiteral, lit.span);
529    }
530
531    fn visit_this_expression(&mut self, expr: &ThisExpression) {
532        self.push_keyword(KeywordType::This, expr.span);
533    }
534
535    fn visit_super(&mut self, expr: &Super) {
536        self.push_keyword(KeywordType::Super, expr.span);
537    }
538
539    fn visit_array_expression(&mut self, expr: &ArrayExpression<'a>) {
540        self.push_punc(PunctuationType::OpenBracket, expr.span);
541        walk::walk_array_expression(self, expr);
542        self.push_punc(PunctuationType::CloseBracket, expr.span);
543    }
544
545    fn visit_object_expression(&mut self, expr: &ObjectExpression<'a>) {
546        self.push_punc(PunctuationType::OpenBrace, expr.span);
547        walk::walk_object_expression(self, expr);
548        self.push_punc(PunctuationType::CloseBrace, expr.span);
549    }
550
551    fn visit_call_expression(&mut self, expr: &CallExpression<'a>) {
552        self.visit_expression(&expr.callee);
553        // Use point spans for synthetic punctuation to avoid inflating clone
554        // ranges when call expressions are chained (expr.span covers the
555        // entire chain, not just this call's parentheses).
556        let open = point_span(expr.callee.span().end);
557        self.push_punc(PunctuationType::OpenParen, open);
558        for arg in &expr.arguments {
559            self.visit_argument(arg);
560            let comma = point_span(arg.span().end);
561            self.push_op(OperatorType::Comma, comma);
562        }
563        let close = point_span(expr.span.end.saturating_sub(1));
564        self.push_punc(PunctuationType::CloseParen, close);
565    }
566
567    fn visit_new_expression(&mut self, expr: &NewExpression<'a>) {
568        self.push_keyword(KeywordType::New, expr.span);
569        self.visit_expression(&expr.callee);
570        let open = point_span(expr.callee.span().end);
571        self.push_punc(PunctuationType::OpenParen, open);
572        for arg in &expr.arguments {
573            self.visit_argument(arg);
574            let comma = point_span(arg.span().end);
575            self.push_op(OperatorType::Comma, comma);
576        }
577        let close = point_span(expr.span.end.saturating_sub(1));
578        self.push_punc(PunctuationType::CloseParen, close);
579    }
580
581    fn visit_static_member_expression(&mut self, expr: &StaticMemberExpression<'a>) {
582        self.visit_expression(&expr.object);
583        // Use point span at the dot position (right after the object).
584        let dot = point_span(expr.object.span().end);
585        self.push_punc(PunctuationType::Dot, dot);
586        self.push(
587            TokenKind::Identifier(expr.property.name.to_string()),
588            expr.property.span,
589        );
590    }
591
592    fn visit_computed_member_expression(&mut self, expr: &ComputedMemberExpression<'a>) {
593        self.visit_expression(&expr.object);
594        let open = point_span(expr.object.span().end);
595        self.push_punc(PunctuationType::OpenBracket, open);
596        self.visit_expression(&expr.expression);
597        let close = point_span(expr.span.end.saturating_sub(1));
598        self.push_punc(PunctuationType::CloseBracket, close);
599    }
600
601    fn visit_assignment_expression(&mut self, expr: &AssignmentExpression<'a>) {
602        self.visit_assignment_target(&expr.left);
603        let op = match expr.operator {
604            AssignmentOperator::Assign => OperatorType::Assign,
605            AssignmentOperator::Addition => OperatorType::AddAssign,
606            AssignmentOperator::Subtraction => OperatorType::SubAssign,
607            AssignmentOperator::Multiplication => OperatorType::MulAssign,
608            AssignmentOperator::Division => OperatorType::DivAssign,
609            AssignmentOperator::Remainder => OperatorType::ModAssign,
610            AssignmentOperator::Exponential => OperatorType::ExpAssign,
611            AssignmentOperator::LogicalAnd => OperatorType::AndAssign,
612            AssignmentOperator::LogicalOr => OperatorType::OrAssign,
613            AssignmentOperator::LogicalNullish => OperatorType::NullishAssign,
614            AssignmentOperator::BitwiseAnd => OperatorType::BitwiseAndAssign,
615            AssignmentOperator::BitwiseOR => OperatorType::BitwiseOrAssign,
616            AssignmentOperator::BitwiseXOR => OperatorType::BitwiseXorAssign,
617            AssignmentOperator::ShiftLeft => OperatorType::ShiftLeftAssign,
618            AssignmentOperator::ShiftRight => OperatorType::ShiftRightAssign,
619            AssignmentOperator::ShiftRightZeroFill => OperatorType::UnsignedShiftRightAssign,
620        };
621        self.push_op(op, expr.span);
622        self.visit_expression(&expr.right);
623    }
624
625    fn visit_binary_expression(&mut self, expr: &BinaryExpression<'a>) {
626        self.visit_expression(&expr.left);
627        let op = match expr.operator {
628            BinaryOperator::Addition => OperatorType::Add,
629            BinaryOperator::Subtraction => OperatorType::Sub,
630            BinaryOperator::Multiplication => OperatorType::Mul,
631            BinaryOperator::Division => OperatorType::Div,
632            BinaryOperator::Remainder => OperatorType::Mod,
633            BinaryOperator::Exponential => OperatorType::Exp,
634            BinaryOperator::Equality => OperatorType::Eq,
635            BinaryOperator::Inequality => OperatorType::NEq,
636            BinaryOperator::StrictEquality => OperatorType::StrictEq,
637            BinaryOperator::StrictInequality => OperatorType::StrictNEq,
638            BinaryOperator::LessThan => OperatorType::Lt,
639            BinaryOperator::GreaterThan => OperatorType::Gt,
640            BinaryOperator::LessEqualThan => OperatorType::LtEq,
641            BinaryOperator::GreaterEqualThan => OperatorType::GtEq,
642            BinaryOperator::BitwiseAnd => OperatorType::BitwiseAnd,
643            BinaryOperator::BitwiseOR => OperatorType::BitwiseOr,
644            BinaryOperator::BitwiseXOR => OperatorType::BitwiseXor,
645            BinaryOperator::ShiftLeft => OperatorType::ShiftLeft,
646            BinaryOperator::ShiftRight => OperatorType::ShiftRight,
647            BinaryOperator::ShiftRightZeroFill => OperatorType::UnsignedShiftRight,
648            BinaryOperator::Instanceof => OperatorType::Instanceof,
649            BinaryOperator::In => OperatorType::In,
650        };
651        self.push_op(op, expr.span);
652        self.visit_expression(&expr.right);
653    }
654
655    fn visit_logical_expression(&mut self, expr: &LogicalExpression<'a>) {
656        self.visit_expression(&expr.left);
657        let op = match expr.operator {
658            LogicalOperator::And => OperatorType::And,
659            LogicalOperator::Or => OperatorType::Or,
660            LogicalOperator::Coalesce => OperatorType::NullishCoalescing,
661        };
662        self.push_op(op, expr.span);
663        self.visit_expression(&expr.right);
664    }
665
666    fn visit_unary_expression(&mut self, expr: &UnaryExpression<'a>) {
667        let op = match expr.operator {
668            UnaryOperator::UnaryPlus => OperatorType::Add,
669            UnaryOperator::UnaryNegation => OperatorType::Sub,
670            UnaryOperator::LogicalNot => OperatorType::Not,
671            UnaryOperator::BitwiseNot => OperatorType::BitwiseNot,
672            UnaryOperator::Typeof => {
673                self.push_keyword(KeywordType::Typeof, expr.span);
674                walk::walk_unary_expression(self, expr);
675                return;
676            }
677            UnaryOperator::Void => {
678                self.push_keyword(KeywordType::Void, expr.span);
679                walk::walk_unary_expression(self, expr);
680                return;
681            }
682            UnaryOperator::Delete => {
683                self.push_keyword(KeywordType::Delete, expr.span);
684                walk::walk_unary_expression(self, expr);
685                return;
686            }
687        };
688        self.push_op(op, expr.span);
689        walk::walk_unary_expression(self, expr);
690    }
691
692    fn visit_update_expression(&mut self, expr: &UpdateExpression<'a>) {
693        let op = match expr.operator {
694            UpdateOperator::Increment => OperatorType::Increment,
695            UpdateOperator::Decrement => OperatorType::Decrement,
696        };
697        if expr.prefix {
698            self.push_op(op, expr.span);
699        }
700        walk::walk_update_expression(self, expr);
701        if !expr.prefix {
702            self.push_op(op, expr.span);
703        }
704    }
705
706    fn visit_conditional_expression(&mut self, expr: &ConditionalExpression<'a>) {
707        self.visit_expression(&expr.test);
708        self.push_op(OperatorType::Ternary, expr.span);
709        self.visit_expression(&expr.consequent);
710        self.push_punc(PunctuationType::Colon, expr.span);
711        self.visit_expression(&expr.alternate);
712    }
713
714    fn visit_arrow_function_expression(&mut self, expr: &ArrowFunctionExpression<'a>) {
715        if expr.r#async {
716            self.push_keyword(KeywordType::Async, expr.span);
717        }
718        let params_span = expr.params.span;
719        self.push_punc(PunctuationType::OpenParen, point_span(params_span.start));
720        for param in &expr.params.items {
721            self.visit_binding_pattern(&param.pattern);
722            self.push_op(OperatorType::Comma, point_span(param.span.end));
723        }
724        self.push_punc(
725            PunctuationType::CloseParen,
726            point_span(params_span.end.saturating_sub(1)),
727        );
728        self.push_op(OperatorType::Arrow, point_span(params_span.end));
729        walk::walk_arrow_function_expression(self, expr);
730    }
731
732    fn visit_yield_expression(&mut self, expr: &YieldExpression<'a>) {
733        self.push_keyword(KeywordType::Yield, expr.span);
734        walk::walk_yield_expression(self, expr);
735    }
736
737    fn visit_await_expression(&mut self, expr: &AwaitExpression<'a>) {
738        self.push_keyword(KeywordType::Await, expr.span);
739        walk::walk_await_expression(self, expr);
740    }
741
742    fn visit_spread_element(&mut self, elem: &SpreadElement<'a>) {
743        self.push_op(OperatorType::Spread, elem.span);
744        walk::walk_spread_element(self, elem);
745    }
746
747    fn visit_sequence_expression(&mut self, expr: &SequenceExpression<'a>) {
748        for (i, sub_expr) in expr.expressions.iter().enumerate() {
749            if i > 0 {
750                self.push_op(OperatorType::Comma, expr.span);
751            }
752            self.visit_expression(sub_expr);
753        }
754    }
755
756    // ── Functions ──────────────────────────────────────────
757
758    fn visit_function(&mut self, func: &Function<'a>, flags: ScopeFlags) {
759        if func.r#async {
760            self.push_keyword(KeywordType::Async, func.span);
761        }
762        self.push_keyword(KeywordType::Function, func.span);
763        if let Some(id) = &func.id {
764            self.push(TokenKind::Identifier(id.name.to_string()), id.span);
765        }
766        let params_span = func.params.span;
767        self.push_punc(PunctuationType::OpenParen, point_span(params_span.start));
768        for param in &func.params.items {
769            self.visit_binding_pattern(&param.pattern);
770            self.push_op(OperatorType::Comma, point_span(param.span.end));
771        }
772        self.push_punc(
773            PunctuationType::CloseParen,
774            point_span(params_span.end.saturating_sub(1)),
775        );
776        walk::walk_function(self, func, flags);
777    }
778
779    // ── Classes ─────────────────────────────────────────────
780
781    fn visit_class(&mut self, class: &Class<'a>) {
782        self.push_keyword(KeywordType::Class, class.span);
783        if let Some(id) = &class.id {
784            self.push(TokenKind::Identifier(id.name.to_string()), id.span);
785        }
786        if class.super_class.is_some() {
787            self.push_keyword(KeywordType::Extends, class.span);
788        }
789        walk::walk_class(self, class);
790    }
791
792    // ── Import/Export ───────────────────────────────────────
793
794    fn visit_import_declaration(&mut self, decl: &ImportDeclaration<'a>) {
795        // Skip `import type { ... } from '...'` when stripping types
796        if self.strip_types && decl.import_kind.is_type() {
797            return;
798        }
799        self.push_keyword(KeywordType::Import, decl.span);
800        walk::walk_import_declaration(self, decl);
801        self.push_keyword(KeywordType::From, decl.span);
802        self.push(
803            TokenKind::StringLiteral(decl.source.value.to_string()),
804            decl.source.span,
805        );
806    }
807
808    fn visit_export_named_declaration(&mut self, decl: &ExportNamedDeclaration<'a>) {
809        // Skip `export type { ... }` when stripping types
810        if self.strip_types && decl.export_kind.is_type() {
811            return;
812        }
813        self.push_keyword(KeywordType::Export, decl.span);
814        walk::walk_export_named_declaration(self, decl);
815    }
816
817    fn visit_export_default_declaration(&mut self, decl: &ExportDefaultDeclaration<'a>) {
818        self.push_keyword(KeywordType::Export, decl.span);
819        self.push_keyword(KeywordType::Default, decl.span);
820        walk::walk_export_default_declaration(self, decl);
821    }
822
823    fn visit_export_all_declaration(&mut self, decl: &ExportAllDeclaration<'a>) {
824        self.push_keyword(KeywordType::Export, decl.span);
825        self.push_keyword(KeywordType::From, decl.span);
826        self.push(
827            TokenKind::StringLiteral(decl.source.value.to_string()),
828            decl.source.span,
829        );
830    }
831
832    // ── TypeScript declarations ────────────────────────────
833
834    fn visit_ts_interface_declaration(&mut self, decl: &TSInterfaceDeclaration<'a>) {
835        if self.strip_types {
836            return; // Skip entire interface when stripping types
837        }
838        self.push_keyword(KeywordType::Interface, decl.span);
839        walk::walk_ts_interface_declaration(self, decl);
840    }
841
842    fn visit_ts_interface_body(&mut self, body: &TSInterfaceBody<'a>) {
843        self.push_punc(PunctuationType::OpenBrace, body.span);
844        walk::walk_ts_interface_body(self, body);
845        self.push_punc(PunctuationType::CloseBrace, body.span);
846    }
847
848    fn visit_ts_type_alias_declaration(&mut self, decl: &TSTypeAliasDeclaration<'a>) {
849        if self.strip_types {
850            return; // Skip entire type alias when stripping types
851        }
852        self.push_keyword(KeywordType::Type, decl.span);
853        walk::walk_ts_type_alias_declaration(self, decl);
854    }
855
856    fn visit_ts_module_declaration(&mut self, decl: &TSModuleDeclaration<'a>) {
857        if self.strip_types && decl.declare {
858            return; // Skip `declare module` / `declare namespace` when stripping types
859        }
860        walk::walk_ts_module_declaration(self, decl);
861    }
862
863    fn visit_ts_enum_declaration(&mut self, decl: &TSEnumDeclaration<'a>) {
864        self.push_keyword(KeywordType::Enum, decl.span);
865        walk::walk_ts_enum_declaration(self, decl);
866    }
867
868    fn visit_ts_enum_body(&mut self, body: &TSEnumBody<'a>) {
869        self.push_punc(PunctuationType::OpenBrace, body.span);
870        walk::walk_ts_enum_body(self, body);
871        self.push_punc(PunctuationType::CloseBrace, body.span);
872    }
873
874    fn visit_ts_property_signature(&mut self, sig: &TSPropertySignature<'a>) {
875        walk::walk_ts_property_signature(self, sig);
876        self.push_punc(PunctuationType::Semicolon, sig.span);
877    }
878
879    fn visit_ts_type_annotation(&mut self, ann: &TSTypeAnnotation<'a>) {
880        if self.strip_types {
881            return; // Skip parameter/return type annotations when stripping types
882        }
883        self.push_punc(PunctuationType::Colon, ann.span);
884        walk::walk_ts_type_annotation(self, ann);
885    }
886
887    fn visit_ts_type_parameter_declaration(&mut self, decl: &TSTypeParameterDeclaration<'a>) {
888        if self.strip_types {
889            return; // Skip generic type parameters when stripping types
890        }
891        walk::walk_ts_type_parameter_declaration(self, decl);
892    }
893
894    fn visit_ts_type_parameter_instantiation(&mut self, inst: &TSTypeParameterInstantiation<'a>) {
895        if self.strip_types {
896            return; // Skip generic type arguments when stripping types
897        }
898        walk::walk_ts_type_parameter_instantiation(self, inst);
899    }
900
901    fn visit_ts_as_expression(&mut self, expr: &TSAsExpression<'a>) {
902        self.visit_expression(&expr.expression);
903        if !self.strip_types {
904            self.push_keyword(KeywordType::As, expr.span);
905            self.visit_ts_type(&expr.type_annotation);
906        }
907    }
908
909    fn visit_ts_satisfies_expression(&mut self, expr: &TSSatisfiesExpression<'a>) {
910        self.visit_expression(&expr.expression);
911        if !self.strip_types {
912            self.push_keyword(KeywordType::Satisfies, expr.span);
913            self.visit_ts_type(&expr.type_annotation);
914        }
915    }
916
917    fn visit_ts_non_null_expression(&mut self, expr: &TSNonNullExpression<'a>) {
918        self.visit_expression(&expr.expression);
919        // The `!` postfix is stripped when stripping types (it's a type assertion)
920    }
921
922    fn visit_identifier_name(&mut self, ident: &IdentifierName<'a>) {
923        self.push(TokenKind::Identifier(ident.name.to_string()), ident.span);
924    }
925
926    fn visit_ts_string_keyword(&mut self, it: &TSStringKeyword) {
927        self.push(TokenKind::Identifier("string".to_string()), it.span);
928    }
929
930    fn visit_ts_number_keyword(&mut self, it: &TSNumberKeyword) {
931        self.push(TokenKind::Identifier("number".to_string()), it.span);
932    }
933
934    fn visit_ts_boolean_keyword(&mut self, it: &TSBooleanKeyword) {
935        self.push(TokenKind::Identifier("boolean".to_string()), it.span);
936    }
937
938    fn visit_ts_any_keyword(&mut self, it: &TSAnyKeyword) {
939        self.push(TokenKind::Identifier("any".to_string()), it.span);
940    }
941
942    fn visit_ts_void_keyword(&mut self, it: &TSVoidKeyword) {
943        self.push(TokenKind::Identifier("void".to_string()), it.span);
944    }
945
946    fn visit_ts_null_keyword(&mut self, it: &TSNullKeyword) {
947        self.push(TokenKind::NullLiteral, it.span);
948    }
949
950    fn visit_ts_undefined_keyword(&mut self, it: &TSUndefinedKeyword) {
951        self.push(TokenKind::Identifier("undefined".to_string()), it.span);
952    }
953
954    fn visit_ts_never_keyword(&mut self, it: &TSNeverKeyword) {
955        self.push(TokenKind::Identifier("never".to_string()), it.span);
956    }
957
958    fn visit_ts_unknown_keyword(&mut self, it: &TSUnknownKeyword) {
959        self.push(TokenKind::Identifier("unknown".to_string()), it.span);
960    }
961
962    // ── JSX ─────────────────────────────────────────────────
963
964    fn visit_jsx_opening_element(&mut self, elem: &JSXOpeningElement<'a>) {
965        self.push_punc(PunctuationType::OpenBracket, elem.span);
966        walk::walk_jsx_opening_element(self, elem);
967        self.push_punc(PunctuationType::CloseBracket, elem.span);
968    }
969
970    fn visit_jsx_closing_element(&mut self, elem: &JSXClosingElement<'a>) {
971        self.push_punc(PunctuationType::OpenBracket, elem.span);
972        walk::walk_jsx_closing_element(self, elem);
973        self.push_punc(PunctuationType::CloseBracket, elem.span);
974    }
975
976    fn visit_jsx_identifier(&mut self, ident: &JSXIdentifier<'a>) {
977        self.push(TokenKind::Identifier(ident.name.to_string()), ident.span);
978    }
979
980    fn visit_jsx_spread_attribute(&mut self, attr: &JSXSpreadAttribute<'a>) {
981        self.push_op(OperatorType::Spread, attr.span);
982        walk::walk_jsx_spread_attribute(self, attr);
983    }
984
985    // ── Misc ────────────────────────────────────────────────
986
987    fn visit_variable_declarator(&mut self, decl: &VariableDeclarator<'a>) {
988        self.visit_binding_pattern(&decl.id);
989        if let Some(init) = &decl.init {
990            self.push_op(OperatorType::Assign, decl.span);
991            self.visit_expression(init);
992        }
993        self.push_punc(PunctuationType::Semicolon, decl.span);
994    }
995
996    fn visit_expression_statement(&mut self, stmt: &ExpressionStatement<'a>) {
997        walk::walk_expression_statement(self, stmt);
998        self.push_punc(PunctuationType::Semicolon, stmt.span);
999    }
1000}
1001
1002#[cfg(test)]
1003mod tests {
1004    use super::*;
1005    use std::path::PathBuf;
1006
1007    fn tokenize(code: &str) -> Vec<SourceToken> {
1008        let path = PathBuf::from("test.ts");
1009        tokenize_file(&path, code).tokens
1010    }
1011
1012    #[test]
1013    fn tokenize_variable_declaration() {
1014        let tokens = tokenize("const x = 42;");
1015        assert!(!tokens.is_empty());
1016        // Should have: const, x (identifier), = (assign), 42 (numeric), ;
1017        assert!(matches!(
1018            tokens[0].kind,
1019            TokenKind::Keyword(KeywordType::Const)
1020        ));
1021    }
1022
1023    #[test]
1024    fn tokenize_function_declaration() {
1025        let tokens = tokenize("function foo() { return 1; }");
1026        assert!(!tokens.is_empty());
1027        assert!(matches!(
1028            tokens[0].kind,
1029            TokenKind::Keyword(KeywordType::Function)
1030        ));
1031    }
1032
1033    #[test]
1034    fn tokenize_arrow_function() {
1035        let tokens = tokenize("const f = (a, b) => a + b;");
1036        assert!(!tokens.is_empty());
1037        let has_arrow = tokens
1038            .iter()
1039            .any(|t| matches!(t.kind, TokenKind::Operator(OperatorType::Arrow)));
1040        assert!(has_arrow, "Should contain arrow operator");
1041    }
1042
1043    #[test]
1044    fn tokenize_if_else() {
1045        let tokens = tokenize("if (x) { y; } else { z; }");
1046        assert!(!tokens.is_empty());
1047        assert!(matches!(
1048            tokens[0].kind,
1049            TokenKind::Keyword(KeywordType::If)
1050        ));
1051        let has_else = tokens
1052            .iter()
1053            .any(|t| matches!(t.kind, TokenKind::Keyword(KeywordType::Else)));
1054        assert!(has_else, "Should contain else keyword");
1055    }
1056
1057    #[test]
1058    fn tokenize_class() {
1059        let tokens = tokenize("class Foo extends Bar { }");
1060        assert!(!tokens.is_empty());
1061        assert!(matches!(
1062            tokens[0].kind,
1063            TokenKind::Keyword(KeywordType::Class)
1064        ));
1065        let has_extends = tokens
1066            .iter()
1067            .any(|t| matches!(t.kind, TokenKind::Keyword(KeywordType::Extends)));
1068        assert!(has_extends, "Should contain extends keyword");
1069    }
1070
1071    #[test]
1072    fn tokenize_string_literal() {
1073        let tokens = tokenize("const s = \"hello\";");
1074        let has_string = tokens
1075            .iter()
1076            .any(|t| matches!(&t.kind, TokenKind::StringLiteral(s) if s == "hello"));
1077        assert!(has_string, "Should contain string literal");
1078    }
1079
1080    #[test]
1081    fn tokenize_boolean_literal() {
1082        let tokens = tokenize("const b = true;");
1083        let has_bool = tokens
1084            .iter()
1085            .any(|t| matches!(t.kind, TokenKind::BooleanLiteral(true)));
1086        assert!(has_bool, "Should contain boolean literal");
1087    }
1088
1089    #[test]
1090    fn tokenize_null_literal() {
1091        let tokens = tokenize("const n = null;");
1092        let has_null = tokens
1093            .iter()
1094            .any(|t| matches!(t.kind, TokenKind::NullLiteral));
1095        assert!(has_null, "Should contain null literal");
1096    }
1097
1098    #[test]
1099    fn tokenize_empty_file() {
1100        let tokens = tokenize("");
1101        assert!(tokens.is_empty());
1102    }
1103
1104    #[test]
1105    fn tokenize_ts_interface() {
1106        let tokens = tokenize("interface Foo { bar: string; baz: number; }");
1107        let has_interface = tokens
1108            .iter()
1109            .any(|t| matches!(t.kind, TokenKind::Keyword(KeywordType::Interface)));
1110        assert!(has_interface, "Should contain interface keyword");
1111        let has_bar = tokens
1112            .iter()
1113            .any(|t| matches!(&t.kind, TokenKind::Identifier(name) if name == "bar"));
1114        assert!(has_bar, "Should contain property name 'bar'");
1115        let has_string = tokens
1116            .iter()
1117            .any(|t| matches!(&t.kind, TokenKind::Identifier(name) if name == "string"));
1118        assert!(has_string, "Should contain type 'string'");
1119        // Should have enough tokens for clone detection
1120        assert!(
1121            tokens.len() >= 10,
1122            "Interface should produce sufficient tokens, got {}",
1123            tokens.len()
1124        );
1125    }
1126
1127    #[test]
1128    fn tokenize_ts_type_alias() {
1129        let tokens = tokenize("type Result = { ok: boolean; error: string; }");
1130        let has_type = tokens
1131            .iter()
1132            .any(|t| matches!(t.kind, TokenKind::Keyword(KeywordType::Type)));
1133        assert!(has_type, "Should contain type keyword");
1134    }
1135
1136    #[test]
1137    fn tokenize_ts_enum() {
1138        let tokens = tokenize("enum Color { Red, Green, Blue }");
1139        let has_enum = tokens
1140            .iter()
1141            .any(|t| matches!(t.kind, TokenKind::Keyword(KeywordType::Enum)));
1142        assert!(has_enum, "Should contain enum keyword");
1143        let has_red = tokens
1144            .iter()
1145            .any(|t| matches!(&t.kind, TokenKind::Identifier(name) if name == "Red"));
1146        assert!(has_red, "Should contain enum member 'Red'");
1147    }
1148
1149    fn tokenize_tsx(code: &str) -> Vec<SourceToken> {
1150        let path = PathBuf::from("test.tsx");
1151        tokenize_file(&path, code).tokens
1152    }
1153
1154    fn tokenize_cross_language(code: &str) -> Vec<SourceToken> {
1155        let path = PathBuf::from("test.ts");
1156        tokenize_file_cross_language(&path, code, true).tokens
1157    }
1158
1159    #[test]
1160    fn tokenize_jsx_element() {
1161        let tokens =
1162            tokenize_tsx("const x = <div className=\"foo\"><Button onClick={handler} /></div>;");
1163        let has_div = tokens
1164            .iter()
1165            .any(|t| matches!(&t.kind, TokenKind::Identifier(name) if name == "div"));
1166        assert!(has_div, "Should contain JSX element name 'div'");
1167        let has_classname = tokens
1168            .iter()
1169            .any(|t| matches!(&t.kind, TokenKind::Identifier(name) if name == "className"));
1170        assert!(has_classname, "Should contain JSX attribute 'className'");
1171        let brackets = tokens
1172            .iter()
1173            .filter(|t| {
1174                matches!(
1175                    t.kind,
1176                    TokenKind::Punctuation(PunctuationType::OpenBracket)
1177                        | TokenKind::Punctuation(PunctuationType::CloseBracket)
1178                )
1179            })
1180            .count();
1181        assert!(
1182            brackets >= 4,
1183            "Should contain JSX angle brackets, got {brackets}"
1184        );
1185    }
1186
1187    // ── Cross-language type stripping tests ──────────────────────
1188
1189    #[test]
1190    fn strip_types_removes_parameter_type_annotations() {
1191        let ts_tokens = tokenize("function foo(x: string) { return x; }");
1192        let stripped = tokenize_cross_language("function foo(x: string) { return x; }");
1193
1194        // The stripped version should have fewer tokens (no `: string`)
1195        assert!(
1196            stripped.len() < ts_tokens.len(),
1197            "Stripped tokens ({}) should be fewer than full tokens ({})",
1198            stripped.len(),
1199            ts_tokens.len()
1200        );
1201
1202        // Should NOT contain type-annotation colon or the type name
1203        let has_colon_before_string = ts_tokens.windows(2).any(|w| {
1204            matches!(w[0].kind, TokenKind::Punctuation(PunctuationType::Colon))
1205                && matches!(&w[1].kind, TokenKind::Identifier(n) if n == "string")
1206        });
1207        assert!(has_colon_before_string, "Original should have `: string`");
1208
1209        // Stripped version should match JS version
1210        let js_tokens = {
1211            let path = PathBuf::from("test.js");
1212            tokenize_file(&path, "function foo(x) { return x; }").tokens
1213        };
1214        assert_eq!(
1215            stripped.len(),
1216            js_tokens.len(),
1217            "Stripped TS should produce same token count as JS"
1218        );
1219    }
1220
1221    #[test]
1222    fn strip_types_removes_return_type_annotations() {
1223        let stripped = tokenize_cross_language("function foo(): string { return 'hello'; }");
1224        // Should NOT contain the return type annotation
1225        let has_string_type = stripped.iter().enumerate().any(|(i, t)| {
1226            matches!(&t.kind, TokenKind::Identifier(n) if n == "string")
1227                && i > 0
1228                && matches!(
1229                    stripped[i - 1].kind,
1230                    TokenKind::Punctuation(PunctuationType::Colon)
1231                )
1232        });
1233        assert!(
1234            !has_string_type,
1235            "Stripped version should not have return type annotation"
1236        );
1237    }
1238
1239    #[test]
1240    fn strip_types_removes_interface_declarations() {
1241        let stripped = tokenize_cross_language("interface Foo { bar: string; }\nconst x = 42;");
1242        // Should NOT contain interface keyword
1243        let has_interface = stripped
1244            .iter()
1245            .any(|t| matches!(t.kind, TokenKind::Keyword(KeywordType::Interface)));
1246        assert!(
1247            !has_interface,
1248            "Stripped version should not contain interface declaration"
1249        );
1250        // Should still contain the const declaration
1251        let has_const = stripped
1252            .iter()
1253            .any(|t| matches!(t.kind, TokenKind::Keyword(KeywordType::Const)));
1254        assert!(has_const, "Should still contain const keyword");
1255    }
1256
1257    #[test]
1258    fn strip_types_removes_type_alias_declarations() {
1259        let stripped = tokenize_cross_language("type Result = string | number;\nconst x = 42;");
1260        let has_type = stripped
1261            .iter()
1262            .any(|t| matches!(t.kind, TokenKind::Keyword(KeywordType::Type)));
1263        assert!(!has_type, "Stripped version should not contain type alias");
1264        let has_const = stripped
1265            .iter()
1266            .any(|t| matches!(t.kind, TokenKind::Keyword(KeywordType::Const)));
1267        assert!(has_const, "Should still contain const keyword");
1268    }
1269
1270    #[test]
1271    fn strip_types_preserves_runtime_code() {
1272        let stripped =
1273            tokenize_cross_language("const x: number = 42;\nif (x > 0) { console.log(x); }");
1274        // Should have const, x, =, 42, if, x, >, 0, console, log, x, etc.
1275        let has_const = stripped
1276            .iter()
1277            .any(|t| matches!(t.kind, TokenKind::Keyword(KeywordType::Const)));
1278        let has_if = stripped
1279            .iter()
1280            .any(|t| matches!(t.kind, TokenKind::Keyword(KeywordType::If)));
1281        let has_42 = stripped
1282            .iter()
1283            .any(|t| matches!(&t.kind, TokenKind::NumericLiteral(n) if n == "42"));
1284        assert!(has_const, "Should preserve const");
1285        assert!(has_if, "Should preserve if");
1286        assert!(has_42, "Should preserve numeric literal");
1287    }
1288
1289    #[test]
1290    fn strip_types_preserves_enums() {
1291        // Enums have runtime semantics, so they should NOT be stripped
1292        let stripped = tokenize_cross_language("enum Color { Red, Green, Blue }");
1293        let has_enum = stripped
1294            .iter()
1295            .any(|t| matches!(t.kind, TokenKind::Keyword(KeywordType::Enum)));
1296        assert!(
1297            has_enum,
1298            "Enums should be preserved (they have runtime semantics)"
1299        );
1300    }
1301
1302    #[test]
1303    fn strip_types_removes_import_type() {
1304        let stripped = tokenize_cross_language("import type { Foo } from './foo';\nconst x = 42;");
1305        // Should NOT contain import keyword from the type-only import
1306        let import_count = stripped
1307            .iter()
1308            .filter(|t| matches!(t.kind, TokenKind::Keyword(KeywordType::Import)))
1309            .count();
1310        assert_eq!(import_count, 0, "import type should be stripped");
1311        // Should still contain the const declaration
1312        let has_const = stripped
1313            .iter()
1314            .any(|t| matches!(t.kind, TokenKind::Keyword(KeywordType::Const)));
1315        assert!(has_const, "Runtime code should be preserved");
1316    }
1317
1318    #[test]
1319    fn strip_types_preserves_value_imports() {
1320        let stripped = tokenize_cross_language("import { foo } from './foo';\nconst x = foo();");
1321        let has_import = stripped
1322            .iter()
1323            .any(|t| matches!(t.kind, TokenKind::Keyword(KeywordType::Import)));
1324        assert!(has_import, "Value imports should be preserved");
1325    }
1326
1327    #[test]
1328    fn strip_types_removes_export_type() {
1329        let stripped = tokenize_cross_language("export type { Foo };\nconst x = 42;");
1330        // The export type should be stripped
1331        let export_count = stripped
1332            .iter()
1333            .filter(|t| matches!(t.kind, TokenKind::Keyword(KeywordType::Export)))
1334            .count();
1335        assert_eq!(export_count, 0, "export type should be stripped");
1336    }
1337
1338    #[test]
1339    fn strip_types_removes_declare_module() {
1340        let stripped = tokenize_cross_language(
1341            "declare module 'foo' { export function bar(): void; }\nconst x = 42;",
1342        );
1343        // Should not contain function keyword from the declare block
1344        let has_function_keyword = stripped
1345            .iter()
1346            .any(|t| matches!(t.kind, TokenKind::Keyword(KeywordType::Function)));
1347        assert!(
1348            !has_function_keyword,
1349            "declare module contents should be stripped"
1350        );
1351        let has_const = stripped
1352            .iter()
1353            .any(|t| matches!(t.kind, TokenKind::Keyword(KeywordType::Const)));
1354        assert!(has_const, "Runtime code should be preserved");
1355    }
1356}