Skip to main content

fallow_core/duplicates/
tokenize.rs

1use std::path::Path;
2
3use oxc_allocator::Allocator;
4use oxc_ast::ast::*;
5use oxc_ast_visit::Visit;
6use oxc_ast_visit::walk;
7use oxc_parser::Parser;
8use oxc_span::{GetSpan, SourceType, Span};
9use oxc_syntax::scope::ScopeFlags;
10
11/// A single token extracted from the AST with its source location.
12#[derive(Debug, Clone)]
13pub struct SourceToken {
14    /// The kind of token.
15    pub kind: TokenKind,
16    /// Byte offset into the source file.
17    pub span: Span,
18}
19
20/// Normalized token types for clone detection.
21#[derive(Debug, Clone, PartialEq, Eq, Hash)]
22pub enum TokenKind {
23    // Keywords
24    Keyword(KeywordType),
25    // Identifiers -- value is the actual name (blinded in semantic mode)
26    Identifier(String),
27    // Literals
28    StringLiteral(String),
29    NumericLiteral(String),
30    BooleanLiteral(bool),
31    NullLiteral,
32    TemplateLiteral,
33    RegExpLiteral,
34    // Operators
35    Operator(OperatorType),
36    // Punctuation / delimiters
37    Punctuation(PunctuationType),
38}
39
40/// JavaScript/TypeScript keyword types.
41#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash)]
42pub enum KeywordType {
43    Var,
44    Let,
45    Const,
46    Function,
47    Return,
48    If,
49    Else,
50    For,
51    While,
52    Do,
53    Switch,
54    Case,
55    Break,
56    Continue,
57    Default,
58    Throw,
59    Try,
60    Catch,
61    Finally,
62    New,
63    Delete,
64    Typeof,
65    Instanceof,
66    In,
67    Of,
68    Void,
69    This,
70    Super,
71    Class,
72    Extends,
73    Import,
74    Export,
75    From,
76    As,
77    Async,
78    Await,
79    Yield,
80    Static,
81    Get,
82    Set,
83    Type,
84    Interface,
85    Enum,
86    Implements,
87    Abstract,
88    Declare,
89    Readonly,
90    Keyof,
91    Satisfies,
92}
93
94/// Operator categories.
95#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash)]
96pub enum OperatorType {
97    Assign,
98    Add,
99    Sub,
100    Mul,
101    Div,
102    Mod,
103    Exp,
104    Eq,
105    NEq,
106    StrictEq,
107    StrictNEq,
108    Lt,
109    Gt,
110    LtEq,
111    GtEq,
112    And,
113    Or,
114    Not,
115    BitwiseAnd,
116    BitwiseOr,
117    BitwiseXor,
118    BitwiseNot,
119    ShiftLeft,
120    ShiftRight,
121    UnsignedShiftRight,
122    NullishCoalescing,
123    OptionalChaining,
124    Spread,
125    Ternary,
126    Arrow,
127    Comma,
128    AddAssign,
129    SubAssign,
130    MulAssign,
131    DivAssign,
132    ModAssign,
133    ExpAssign,
134    AndAssign,
135    OrAssign,
136    NullishAssign,
137    BitwiseAndAssign,
138    BitwiseOrAssign,
139    BitwiseXorAssign,
140    ShiftLeftAssign,
141    ShiftRightAssign,
142    UnsignedShiftRightAssign,
143    Increment,
144    Decrement,
145    Instanceof,
146    In,
147}
148
149/// Punctuation / delimiter types.
150#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash)]
151pub enum PunctuationType {
152    OpenParen,
153    CloseParen,
154    OpenBrace,
155    CloseBrace,
156    OpenBracket,
157    CloseBracket,
158    Semicolon,
159    Colon,
160    Dot,
161}
162
163/// Result of tokenizing a source file.
164#[derive(Debug, Clone)]
165pub struct FileTokens {
166    /// The extracted token sequence.
167    pub tokens: Vec<SourceToken>,
168    /// Source text (needed for extracting fragments).
169    pub source: String,
170    /// Total number of lines in the source.
171    pub line_count: usize,
172}
173
174/// Create a 1-byte span at the given byte position.
175///
176/// Used for synthetic punctuation tokens (`(`, `)`, `,`, `.`) that don't
177/// have their own AST span. Using the parent expression's full span would
178/// inflate clone line ranges, especially in chained method calls.
179fn point_span(pos: u32) -> Span {
180    Span::new(pos, pos + 1)
181}
182
183/// Tokenize a source file into a sequence of normalized tokens.
184///
185/// For Vue/Svelte SFC files, extracts `<script>` blocks first and tokenizes
186/// their content, mirroring the main analysis pipeline's SFC handling.
187pub fn tokenize_file(path: &Path, source: &str) -> FileTokens {
188    use crate::extract::{extract_sfc_scripts, is_sfc_file};
189
190    // For Vue/Svelte SFCs, extract and tokenize `<script>` blocks.
191    if is_sfc_file(path) {
192        let scripts = extract_sfc_scripts(source);
193        let mut all_tokens = Vec::new();
194
195        for script in &scripts {
196            let source_type = if script.is_typescript {
197                SourceType::tsx()
198            } else {
199                SourceType::jsx()
200            };
201            let allocator = Allocator::default();
202            let parser_return = Parser::new(&allocator, &script.body, source_type).parse();
203
204            let mut extractor = TokenExtractor::new();
205            extractor.visit_program(&parser_return.program);
206
207            // Adjust token spans to reference positions in the full SFC source
208            // rather than the extracted script block.
209            let offset = script.byte_offset as u32;
210            for token in &mut extractor.tokens {
211                token.span = Span::new(token.span.start + offset, token.span.end + offset);
212            }
213            all_tokens.extend(extractor.tokens);
214        }
215
216        let line_count = source.lines().count().max(1);
217        return FileTokens {
218            tokens: all_tokens,
219            source: source.to_string(),
220            line_count,
221        };
222    }
223
224    let source_type = SourceType::from_path(path).unwrap_or_default();
225    let allocator = Allocator::default();
226    let parser_return = Parser::new(&allocator, source, source_type).parse();
227
228    let mut extractor = TokenExtractor::new();
229    extractor.visit_program(&parser_return.program);
230
231    let line_count = source.lines().count().max(1);
232
233    FileTokens {
234        tokens: extractor.tokens,
235        source: source.to_string(),
236        line_count,
237    }
238}
239
240/// AST visitor that extracts a flat sequence of normalized tokens.
241struct TokenExtractor {
242    tokens: Vec<SourceToken>,
243}
244
245impl TokenExtractor {
246    fn new() -> Self {
247        Self { tokens: Vec::new() }
248    }
249
250    fn push(&mut self, kind: TokenKind, span: Span) {
251        self.tokens.push(SourceToken { kind, span });
252    }
253
254    fn push_keyword(&mut self, kw: KeywordType, span: Span) {
255        self.push(TokenKind::Keyword(kw), span);
256    }
257
258    fn push_op(&mut self, op: OperatorType, span: Span) {
259        self.push(TokenKind::Operator(op), span);
260    }
261
262    fn push_punc(&mut self, p: PunctuationType, span: Span) {
263        self.push(TokenKind::Punctuation(p), span);
264    }
265}
266
267impl<'a> Visit<'a> for TokenExtractor {
268    // ── Statements ──────────────────────────────────────────
269
270    fn visit_variable_declaration(&mut self, decl: &VariableDeclaration<'a>) {
271        let kw = match decl.kind {
272            VariableDeclarationKind::Var => KeywordType::Var,
273            VariableDeclarationKind::Let => KeywordType::Let,
274            VariableDeclarationKind::Const => KeywordType::Const,
275            VariableDeclarationKind::Using | VariableDeclarationKind::AwaitUsing => {
276                KeywordType::Const
277            }
278        };
279        self.push_keyword(kw, decl.span);
280        walk::walk_variable_declaration(self, decl);
281    }
282
283    fn visit_return_statement(&mut self, stmt: &ReturnStatement<'a>) {
284        self.push_keyword(KeywordType::Return, stmt.span);
285        walk::walk_return_statement(self, stmt);
286    }
287
288    fn visit_if_statement(&mut self, stmt: &IfStatement<'a>) {
289        self.push_keyword(KeywordType::If, stmt.span);
290        self.push_punc(PunctuationType::OpenParen, stmt.span);
291        self.visit_expression(&stmt.test);
292        self.push_punc(PunctuationType::CloseParen, stmt.span);
293        self.visit_statement(&stmt.consequent);
294        if let Some(alt) = &stmt.alternate {
295            self.push_keyword(KeywordType::Else, stmt.span);
296            self.visit_statement(alt);
297        }
298    }
299
300    fn visit_for_statement(&mut self, stmt: &ForStatement<'a>) {
301        self.push_keyword(KeywordType::For, stmt.span);
302        self.push_punc(PunctuationType::OpenParen, stmt.span);
303        walk::walk_for_statement(self, stmt);
304        self.push_punc(PunctuationType::CloseParen, stmt.span);
305    }
306
307    fn visit_for_in_statement(&mut self, stmt: &ForInStatement<'a>) {
308        self.push_keyword(KeywordType::For, stmt.span);
309        self.push_punc(PunctuationType::OpenParen, stmt.span);
310        self.visit_for_statement_left(&stmt.left);
311        self.push_keyword(KeywordType::In, stmt.span);
312        self.visit_expression(&stmt.right);
313        self.push_punc(PunctuationType::CloseParen, stmt.span);
314        self.visit_statement(&stmt.body);
315    }
316
317    fn visit_for_of_statement(&mut self, stmt: &ForOfStatement<'a>) {
318        self.push_keyword(KeywordType::For, stmt.span);
319        self.push_punc(PunctuationType::OpenParen, stmt.span);
320        self.visit_for_statement_left(&stmt.left);
321        self.push_keyword(KeywordType::Of, stmt.span);
322        self.visit_expression(&stmt.right);
323        self.push_punc(PunctuationType::CloseParen, stmt.span);
324        self.visit_statement(&stmt.body);
325    }
326
327    fn visit_while_statement(&mut self, stmt: &WhileStatement<'a>) {
328        self.push_keyword(KeywordType::While, stmt.span);
329        self.push_punc(PunctuationType::OpenParen, stmt.span);
330        walk::walk_while_statement(self, stmt);
331        self.push_punc(PunctuationType::CloseParen, stmt.span);
332    }
333
334    fn visit_do_while_statement(&mut self, stmt: &DoWhileStatement<'a>) {
335        self.push_keyword(KeywordType::Do, stmt.span);
336        walk::walk_do_while_statement(self, stmt);
337    }
338
339    fn visit_switch_statement(&mut self, stmt: &SwitchStatement<'a>) {
340        self.push_keyword(KeywordType::Switch, stmt.span);
341        self.push_punc(PunctuationType::OpenParen, stmt.span);
342        walk::walk_switch_statement(self, stmt);
343        self.push_punc(PunctuationType::CloseParen, stmt.span);
344    }
345
346    fn visit_switch_case(&mut self, case: &SwitchCase<'a>) {
347        if case.test.is_some() {
348            self.push_keyword(KeywordType::Case, case.span);
349        } else {
350            self.push_keyword(KeywordType::Default, case.span);
351        }
352        self.push_punc(PunctuationType::Colon, case.span);
353        walk::walk_switch_case(self, case);
354    }
355
356    fn visit_break_statement(&mut self, stmt: &BreakStatement<'a>) {
357        self.push_keyword(KeywordType::Break, stmt.span);
358    }
359
360    fn visit_continue_statement(&mut self, stmt: &ContinueStatement<'a>) {
361        self.push_keyword(KeywordType::Continue, stmt.span);
362    }
363
364    fn visit_throw_statement(&mut self, stmt: &ThrowStatement<'a>) {
365        self.push_keyword(KeywordType::Throw, stmt.span);
366        walk::walk_throw_statement(self, stmt);
367    }
368
369    fn visit_try_statement(&mut self, stmt: &TryStatement<'a>) {
370        self.push_keyword(KeywordType::Try, stmt.span);
371        walk::walk_try_statement(self, stmt);
372    }
373
374    fn visit_catch_clause(&mut self, clause: &CatchClause<'a>) {
375        self.push_keyword(KeywordType::Catch, clause.span);
376        walk::walk_catch_clause(self, clause);
377    }
378
379    fn visit_block_statement(&mut self, block: &BlockStatement<'a>) {
380        self.push_punc(PunctuationType::OpenBrace, block.span);
381        walk::walk_block_statement(self, block);
382        self.push_punc(PunctuationType::CloseBrace, block.span);
383    }
384
385    // ── Expressions ─────────────────────────────────────────
386
387    fn visit_identifier_reference(&mut self, ident: &IdentifierReference<'a>) {
388        self.push(TokenKind::Identifier(ident.name.to_string()), ident.span);
389    }
390
391    fn visit_binding_identifier(&mut self, ident: &BindingIdentifier<'a>) {
392        self.push(TokenKind::Identifier(ident.name.to_string()), ident.span);
393    }
394
395    fn visit_string_literal(&mut self, lit: &StringLiteral<'a>) {
396        self.push(TokenKind::StringLiteral(lit.value.to_string()), lit.span);
397    }
398
399    fn visit_numeric_literal(&mut self, lit: &NumericLiteral<'a>) {
400        let raw_str = lit
401            .raw
402            .as_ref()
403            .map_or_else(|| lit.value.to_string(), |r| r.to_string());
404        self.push(TokenKind::NumericLiteral(raw_str), lit.span);
405    }
406
407    fn visit_boolean_literal(&mut self, lit: &BooleanLiteral) {
408        self.push(TokenKind::BooleanLiteral(lit.value), lit.span);
409    }
410
411    fn visit_null_literal(&mut self, lit: &NullLiteral) {
412        self.push(TokenKind::NullLiteral, lit.span);
413    }
414
415    fn visit_template_literal(&mut self, lit: &TemplateLiteral<'a>) {
416        self.push(TokenKind::TemplateLiteral, lit.span);
417        walk::walk_template_literal(self, lit);
418    }
419
420    fn visit_reg_exp_literal(&mut self, lit: &RegExpLiteral<'a>) {
421        self.push(TokenKind::RegExpLiteral, lit.span);
422    }
423
424    fn visit_this_expression(&mut self, expr: &ThisExpression) {
425        self.push_keyword(KeywordType::This, expr.span);
426    }
427
428    fn visit_super(&mut self, expr: &Super) {
429        self.push_keyword(KeywordType::Super, expr.span);
430    }
431
432    fn visit_array_expression(&mut self, expr: &ArrayExpression<'a>) {
433        self.push_punc(PunctuationType::OpenBracket, expr.span);
434        walk::walk_array_expression(self, expr);
435        self.push_punc(PunctuationType::CloseBracket, expr.span);
436    }
437
438    fn visit_object_expression(&mut self, expr: &ObjectExpression<'a>) {
439        self.push_punc(PunctuationType::OpenBrace, expr.span);
440        walk::walk_object_expression(self, expr);
441        self.push_punc(PunctuationType::CloseBrace, expr.span);
442    }
443
444    fn visit_call_expression(&mut self, expr: &CallExpression<'a>) {
445        self.visit_expression(&expr.callee);
446        // Use point spans for synthetic punctuation to avoid inflating clone
447        // ranges when call expressions are chained (expr.span covers the
448        // entire chain, not just this call's parentheses).
449        let open = point_span(expr.callee.span().end);
450        self.push_punc(PunctuationType::OpenParen, open);
451        for arg in &expr.arguments {
452            self.visit_argument(arg);
453            let comma = point_span(arg.span().end);
454            self.push_op(OperatorType::Comma, comma);
455        }
456        let close = point_span(expr.span.end.saturating_sub(1));
457        self.push_punc(PunctuationType::CloseParen, close);
458    }
459
460    fn visit_new_expression(&mut self, expr: &NewExpression<'a>) {
461        self.push_keyword(KeywordType::New, expr.span);
462        self.visit_expression(&expr.callee);
463        let open = point_span(expr.callee.span().end);
464        self.push_punc(PunctuationType::OpenParen, open);
465        for arg in &expr.arguments {
466            self.visit_argument(arg);
467            let comma = point_span(arg.span().end);
468            self.push_op(OperatorType::Comma, comma);
469        }
470        let close = point_span(expr.span.end.saturating_sub(1));
471        self.push_punc(PunctuationType::CloseParen, close);
472    }
473
474    fn visit_static_member_expression(&mut self, expr: &StaticMemberExpression<'a>) {
475        self.visit_expression(&expr.object);
476        // Use point span at the dot position (right after the object).
477        let dot = point_span(expr.object.span().end);
478        self.push_punc(PunctuationType::Dot, dot);
479        self.push(
480            TokenKind::Identifier(expr.property.name.to_string()),
481            expr.property.span,
482        );
483    }
484
485    fn visit_computed_member_expression(&mut self, expr: &ComputedMemberExpression<'a>) {
486        self.visit_expression(&expr.object);
487        let open = point_span(expr.object.span().end);
488        self.push_punc(PunctuationType::OpenBracket, open);
489        self.visit_expression(&expr.expression);
490        let close = point_span(expr.span.end.saturating_sub(1));
491        self.push_punc(PunctuationType::CloseBracket, close);
492    }
493
494    fn visit_assignment_expression(&mut self, expr: &AssignmentExpression<'a>) {
495        self.visit_assignment_target(&expr.left);
496        let op = match expr.operator {
497            AssignmentOperator::Assign => OperatorType::Assign,
498            AssignmentOperator::Addition => OperatorType::AddAssign,
499            AssignmentOperator::Subtraction => OperatorType::SubAssign,
500            AssignmentOperator::Multiplication => OperatorType::MulAssign,
501            AssignmentOperator::Division => OperatorType::DivAssign,
502            AssignmentOperator::Remainder => OperatorType::ModAssign,
503            AssignmentOperator::Exponential => OperatorType::ExpAssign,
504            AssignmentOperator::LogicalAnd => OperatorType::AndAssign,
505            AssignmentOperator::LogicalOr => OperatorType::OrAssign,
506            AssignmentOperator::LogicalNullish => OperatorType::NullishAssign,
507            AssignmentOperator::BitwiseAnd => OperatorType::BitwiseAndAssign,
508            AssignmentOperator::BitwiseOR => OperatorType::BitwiseOrAssign,
509            AssignmentOperator::BitwiseXOR => OperatorType::BitwiseXorAssign,
510            AssignmentOperator::ShiftLeft => OperatorType::ShiftLeftAssign,
511            AssignmentOperator::ShiftRight => OperatorType::ShiftRightAssign,
512            AssignmentOperator::ShiftRightZeroFill => OperatorType::UnsignedShiftRightAssign,
513        };
514        self.push_op(op, expr.span);
515        self.visit_expression(&expr.right);
516    }
517
518    fn visit_binary_expression(&mut self, expr: &BinaryExpression<'a>) {
519        self.visit_expression(&expr.left);
520        let op = match expr.operator {
521            BinaryOperator::Addition => OperatorType::Add,
522            BinaryOperator::Subtraction => OperatorType::Sub,
523            BinaryOperator::Multiplication => OperatorType::Mul,
524            BinaryOperator::Division => OperatorType::Div,
525            BinaryOperator::Remainder => OperatorType::Mod,
526            BinaryOperator::Exponential => OperatorType::Exp,
527            BinaryOperator::Equality => OperatorType::Eq,
528            BinaryOperator::Inequality => OperatorType::NEq,
529            BinaryOperator::StrictEquality => OperatorType::StrictEq,
530            BinaryOperator::StrictInequality => OperatorType::StrictNEq,
531            BinaryOperator::LessThan => OperatorType::Lt,
532            BinaryOperator::GreaterThan => OperatorType::Gt,
533            BinaryOperator::LessEqualThan => OperatorType::LtEq,
534            BinaryOperator::GreaterEqualThan => OperatorType::GtEq,
535            BinaryOperator::BitwiseAnd => OperatorType::BitwiseAnd,
536            BinaryOperator::BitwiseOR => OperatorType::BitwiseOr,
537            BinaryOperator::BitwiseXOR => OperatorType::BitwiseXor,
538            BinaryOperator::ShiftLeft => OperatorType::ShiftLeft,
539            BinaryOperator::ShiftRight => OperatorType::ShiftRight,
540            BinaryOperator::ShiftRightZeroFill => OperatorType::UnsignedShiftRight,
541            BinaryOperator::Instanceof => OperatorType::Instanceof,
542            BinaryOperator::In => OperatorType::In,
543        };
544        self.push_op(op, expr.span);
545        self.visit_expression(&expr.right);
546    }
547
548    fn visit_logical_expression(&mut self, expr: &LogicalExpression<'a>) {
549        self.visit_expression(&expr.left);
550        let op = match expr.operator {
551            LogicalOperator::And => OperatorType::And,
552            LogicalOperator::Or => OperatorType::Or,
553            LogicalOperator::Coalesce => OperatorType::NullishCoalescing,
554        };
555        self.push_op(op, expr.span);
556        self.visit_expression(&expr.right);
557    }
558
559    fn visit_unary_expression(&mut self, expr: &UnaryExpression<'a>) {
560        let op = match expr.operator {
561            UnaryOperator::UnaryPlus => OperatorType::Add,
562            UnaryOperator::UnaryNegation => OperatorType::Sub,
563            UnaryOperator::LogicalNot => OperatorType::Not,
564            UnaryOperator::BitwiseNot => OperatorType::BitwiseNot,
565            UnaryOperator::Typeof => {
566                self.push_keyword(KeywordType::Typeof, expr.span);
567                walk::walk_unary_expression(self, expr);
568                return;
569            }
570            UnaryOperator::Void => {
571                self.push_keyword(KeywordType::Void, expr.span);
572                walk::walk_unary_expression(self, expr);
573                return;
574            }
575            UnaryOperator::Delete => {
576                self.push_keyword(KeywordType::Delete, expr.span);
577                walk::walk_unary_expression(self, expr);
578                return;
579            }
580        };
581        self.push_op(op, expr.span);
582        walk::walk_unary_expression(self, expr);
583    }
584
585    fn visit_update_expression(&mut self, expr: &UpdateExpression<'a>) {
586        let op = match expr.operator {
587            UpdateOperator::Increment => OperatorType::Increment,
588            UpdateOperator::Decrement => OperatorType::Decrement,
589        };
590        if expr.prefix {
591            self.push_op(op, expr.span);
592        }
593        walk::walk_update_expression(self, expr);
594        if !expr.prefix {
595            self.push_op(op, expr.span);
596        }
597    }
598
599    fn visit_conditional_expression(&mut self, expr: &ConditionalExpression<'a>) {
600        self.visit_expression(&expr.test);
601        self.push_op(OperatorType::Ternary, expr.span);
602        self.visit_expression(&expr.consequent);
603        self.push_punc(PunctuationType::Colon, expr.span);
604        self.visit_expression(&expr.alternate);
605    }
606
607    fn visit_arrow_function_expression(&mut self, expr: &ArrowFunctionExpression<'a>) {
608        if expr.r#async {
609            self.push_keyword(KeywordType::Async, expr.span);
610        }
611        let params_span = expr.params.span;
612        self.push_punc(PunctuationType::OpenParen, point_span(params_span.start));
613        for param in &expr.params.items {
614            self.visit_binding_pattern(&param.pattern);
615            self.push_op(OperatorType::Comma, point_span(param.span.end));
616        }
617        self.push_punc(
618            PunctuationType::CloseParen,
619            point_span(params_span.end.saturating_sub(1)),
620        );
621        self.push_op(OperatorType::Arrow, point_span(params_span.end));
622        walk::walk_arrow_function_expression(self, expr);
623    }
624
625    fn visit_yield_expression(&mut self, expr: &YieldExpression<'a>) {
626        self.push_keyword(KeywordType::Yield, expr.span);
627        walk::walk_yield_expression(self, expr);
628    }
629
630    fn visit_await_expression(&mut self, expr: &AwaitExpression<'a>) {
631        self.push_keyword(KeywordType::Await, expr.span);
632        walk::walk_await_expression(self, expr);
633    }
634
635    fn visit_spread_element(&mut self, elem: &SpreadElement<'a>) {
636        self.push_op(OperatorType::Spread, elem.span);
637        walk::walk_spread_element(self, elem);
638    }
639
640    fn visit_sequence_expression(&mut self, expr: &SequenceExpression<'a>) {
641        for (i, sub_expr) in expr.expressions.iter().enumerate() {
642            if i > 0 {
643                self.push_op(OperatorType::Comma, expr.span);
644            }
645            self.visit_expression(sub_expr);
646        }
647    }
648
649    // ── Functions ──────────────────────────────────────────
650
651    fn visit_function(&mut self, func: &Function<'a>, flags: ScopeFlags) {
652        if func.r#async {
653            self.push_keyword(KeywordType::Async, func.span);
654        }
655        self.push_keyword(KeywordType::Function, func.span);
656        if let Some(id) = &func.id {
657            self.push(TokenKind::Identifier(id.name.to_string()), id.span);
658        }
659        let params_span = func.params.span;
660        self.push_punc(PunctuationType::OpenParen, point_span(params_span.start));
661        for param in &func.params.items {
662            self.visit_binding_pattern(&param.pattern);
663            self.push_op(OperatorType::Comma, point_span(param.span.end));
664        }
665        self.push_punc(
666            PunctuationType::CloseParen,
667            point_span(params_span.end.saturating_sub(1)),
668        );
669        walk::walk_function(self, func, flags);
670    }
671
672    // ── Classes ─────────────────────────────────────────────
673
674    fn visit_class(&mut self, class: &Class<'a>) {
675        self.push_keyword(KeywordType::Class, class.span);
676        if let Some(id) = &class.id {
677            self.push(TokenKind::Identifier(id.name.to_string()), id.span);
678        }
679        if class.super_class.is_some() {
680            self.push_keyword(KeywordType::Extends, class.span);
681        }
682        walk::walk_class(self, class);
683    }
684
685    // ── Import/Export ───────────────────────────────────────
686
687    fn visit_import_declaration(&mut self, decl: &ImportDeclaration<'a>) {
688        self.push_keyword(KeywordType::Import, decl.span);
689        walk::walk_import_declaration(self, decl);
690        self.push_keyword(KeywordType::From, decl.span);
691        self.push(
692            TokenKind::StringLiteral(decl.source.value.to_string()),
693            decl.source.span,
694        );
695    }
696
697    fn visit_export_named_declaration(&mut self, decl: &ExportNamedDeclaration<'a>) {
698        self.push_keyword(KeywordType::Export, decl.span);
699        walk::walk_export_named_declaration(self, decl);
700    }
701
702    fn visit_export_default_declaration(&mut self, decl: &ExportDefaultDeclaration<'a>) {
703        self.push_keyword(KeywordType::Export, decl.span);
704        self.push_keyword(KeywordType::Default, decl.span);
705        walk::walk_export_default_declaration(self, decl);
706    }
707
708    fn visit_export_all_declaration(&mut self, decl: &ExportAllDeclaration<'a>) {
709        self.push_keyword(KeywordType::Export, decl.span);
710        self.push_keyword(KeywordType::From, decl.span);
711        self.push(
712            TokenKind::StringLiteral(decl.source.value.to_string()),
713            decl.source.span,
714        );
715    }
716
717    // ── TypeScript declarations ────────────────────────────
718
719    fn visit_ts_interface_declaration(&mut self, decl: &TSInterfaceDeclaration<'a>) {
720        self.push_keyword(KeywordType::Interface, decl.span);
721        walk::walk_ts_interface_declaration(self, decl);
722    }
723
724    fn visit_ts_interface_body(&mut self, body: &TSInterfaceBody<'a>) {
725        self.push_punc(PunctuationType::OpenBrace, body.span);
726        walk::walk_ts_interface_body(self, body);
727        self.push_punc(PunctuationType::CloseBrace, body.span);
728    }
729
730    fn visit_ts_type_alias_declaration(&mut self, decl: &TSTypeAliasDeclaration<'a>) {
731        self.push_keyword(KeywordType::Type, decl.span);
732        walk::walk_ts_type_alias_declaration(self, decl);
733    }
734
735    fn visit_ts_enum_declaration(&mut self, decl: &TSEnumDeclaration<'a>) {
736        self.push_keyword(KeywordType::Enum, decl.span);
737        walk::walk_ts_enum_declaration(self, decl);
738    }
739
740    fn visit_ts_enum_body(&mut self, body: &TSEnumBody<'a>) {
741        self.push_punc(PunctuationType::OpenBrace, body.span);
742        walk::walk_ts_enum_body(self, body);
743        self.push_punc(PunctuationType::CloseBrace, body.span);
744    }
745
746    fn visit_ts_property_signature(&mut self, sig: &TSPropertySignature<'a>) {
747        walk::walk_ts_property_signature(self, sig);
748        self.push_punc(PunctuationType::Semicolon, sig.span);
749    }
750
751    fn visit_ts_type_annotation(&mut self, ann: &TSTypeAnnotation<'a>) {
752        self.push_punc(PunctuationType::Colon, ann.span);
753        walk::walk_ts_type_annotation(self, ann);
754    }
755
756    fn visit_identifier_name(&mut self, ident: &IdentifierName<'a>) {
757        self.push(TokenKind::Identifier(ident.name.to_string()), ident.span);
758    }
759
760    fn visit_ts_string_keyword(&mut self, it: &TSStringKeyword) {
761        self.push(TokenKind::Identifier("string".to_string()), it.span);
762    }
763
764    fn visit_ts_number_keyword(&mut self, it: &TSNumberKeyword) {
765        self.push(TokenKind::Identifier("number".to_string()), it.span);
766    }
767
768    fn visit_ts_boolean_keyword(&mut self, it: &TSBooleanKeyword) {
769        self.push(TokenKind::Identifier("boolean".to_string()), it.span);
770    }
771
772    fn visit_ts_any_keyword(&mut self, it: &TSAnyKeyword) {
773        self.push(TokenKind::Identifier("any".to_string()), it.span);
774    }
775
776    fn visit_ts_void_keyword(&mut self, it: &TSVoidKeyword) {
777        self.push(TokenKind::Identifier("void".to_string()), it.span);
778    }
779
780    fn visit_ts_null_keyword(&mut self, it: &TSNullKeyword) {
781        self.push(TokenKind::NullLiteral, it.span);
782    }
783
784    fn visit_ts_undefined_keyword(&mut self, it: &TSUndefinedKeyword) {
785        self.push(TokenKind::Identifier("undefined".to_string()), it.span);
786    }
787
788    fn visit_ts_never_keyword(&mut self, it: &TSNeverKeyword) {
789        self.push(TokenKind::Identifier("never".to_string()), it.span);
790    }
791
792    fn visit_ts_unknown_keyword(&mut self, it: &TSUnknownKeyword) {
793        self.push(TokenKind::Identifier("unknown".to_string()), it.span);
794    }
795
796    // ── JSX ─────────────────────────────────────────────────
797
798    fn visit_jsx_opening_element(&mut self, elem: &JSXOpeningElement<'a>) {
799        self.push_punc(PunctuationType::OpenBracket, elem.span);
800        walk::walk_jsx_opening_element(self, elem);
801        self.push_punc(PunctuationType::CloseBracket, elem.span);
802    }
803
804    fn visit_jsx_closing_element(&mut self, elem: &JSXClosingElement<'a>) {
805        self.push_punc(PunctuationType::OpenBracket, elem.span);
806        walk::walk_jsx_closing_element(self, elem);
807        self.push_punc(PunctuationType::CloseBracket, elem.span);
808    }
809
810    fn visit_jsx_identifier(&mut self, ident: &JSXIdentifier<'a>) {
811        self.push(TokenKind::Identifier(ident.name.to_string()), ident.span);
812    }
813
814    fn visit_jsx_spread_attribute(&mut self, attr: &JSXSpreadAttribute<'a>) {
815        self.push_op(OperatorType::Spread, attr.span);
816        walk::walk_jsx_spread_attribute(self, attr);
817    }
818
819    // ── Misc ────────────────────────────────────────────────
820
821    fn visit_variable_declarator(&mut self, decl: &VariableDeclarator<'a>) {
822        self.visit_binding_pattern(&decl.id);
823        if let Some(init) = &decl.init {
824            self.push_op(OperatorType::Assign, decl.span);
825            self.visit_expression(init);
826        }
827        self.push_punc(PunctuationType::Semicolon, decl.span);
828    }
829
830    fn visit_expression_statement(&mut self, stmt: &ExpressionStatement<'a>) {
831        walk::walk_expression_statement(self, stmt);
832        self.push_punc(PunctuationType::Semicolon, stmt.span);
833    }
834}
835
836#[cfg(test)]
837mod tests {
838    use super::*;
839    use std::path::PathBuf;
840
841    fn tokenize(code: &str) -> Vec<SourceToken> {
842        let path = PathBuf::from("test.ts");
843        tokenize_file(&path, code).tokens
844    }
845
846    #[test]
847    fn tokenize_variable_declaration() {
848        let tokens = tokenize("const x = 42;");
849        assert!(!tokens.is_empty());
850        // Should have: const, x (identifier), = (assign), 42 (numeric), ;
851        assert!(matches!(
852            tokens[0].kind,
853            TokenKind::Keyword(KeywordType::Const)
854        ));
855    }
856
857    #[test]
858    fn tokenize_function_declaration() {
859        let tokens = tokenize("function foo() { return 1; }");
860        assert!(!tokens.is_empty());
861        assert!(matches!(
862            tokens[0].kind,
863            TokenKind::Keyword(KeywordType::Function)
864        ));
865    }
866
867    #[test]
868    fn tokenize_arrow_function() {
869        let tokens = tokenize("const f = (a, b) => a + b;");
870        assert!(!tokens.is_empty());
871        let has_arrow = tokens
872            .iter()
873            .any(|t| matches!(t.kind, TokenKind::Operator(OperatorType::Arrow)));
874        assert!(has_arrow, "Should contain arrow operator");
875    }
876
877    #[test]
878    fn tokenize_if_else() {
879        let tokens = tokenize("if (x) { y; } else { z; }");
880        assert!(!tokens.is_empty());
881        assert!(matches!(
882            tokens[0].kind,
883            TokenKind::Keyword(KeywordType::If)
884        ));
885        let has_else = tokens
886            .iter()
887            .any(|t| matches!(t.kind, TokenKind::Keyword(KeywordType::Else)));
888        assert!(has_else, "Should contain else keyword");
889    }
890
891    #[test]
892    fn tokenize_class() {
893        let tokens = tokenize("class Foo extends Bar { }");
894        assert!(!tokens.is_empty());
895        assert!(matches!(
896            tokens[0].kind,
897            TokenKind::Keyword(KeywordType::Class)
898        ));
899        let has_extends = tokens
900            .iter()
901            .any(|t| matches!(t.kind, TokenKind::Keyword(KeywordType::Extends)));
902        assert!(has_extends, "Should contain extends keyword");
903    }
904
905    #[test]
906    fn tokenize_string_literal() {
907        let tokens = tokenize("const s = \"hello\";");
908        let has_string = tokens
909            .iter()
910            .any(|t| matches!(&t.kind, TokenKind::StringLiteral(s) if s == "hello"));
911        assert!(has_string, "Should contain string literal");
912    }
913
914    #[test]
915    fn tokenize_boolean_literal() {
916        let tokens = tokenize("const b = true;");
917        let has_bool = tokens
918            .iter()
919            .any(|t| matches!(t.kind, TokenKind::BooleanLiteral(true)));
920        assert!(has_bool, "Should contain boolean literal");
921    }
922
923    #[test]
924    fn tokenize_null_literal() {
925        let tokens = tokenize("const n = null;");
926        let has_null = tokens
927            .iter()
928            .any(|t| matches!(t.kind, TokenKind::NullLiteral));
929        assert!(has_null, "Should contain null literal");
930    }
931
932    #[test]
933    fn tokenize_empty_file() {
934        let tokens = tokenize("");
935        assert!(tokens.is_empty());
936    }
937
938    #[test]
939    fn tokenize_ts_interface() {
940        let tokens = tokenize("interface Foo { bar: string; baz: number; }");
941        let has_interface = tokens
942            .iter()
943            .any(|t| matches!(t.kind, TokenKind::Keyword(KeywordType::Interface)));
944        assert!(has_interface, "Should contain interface keyword");
945        let has_bar = tokens
946            .iter()
947            .any(|t| matches!(&t.kind, TokenKind::Identifier(name) if name == "bar"));
948        assert!(has_bar, "Should contain property name 'bar'");
949        let has_string = tokens
950            .iter()
951            .any(|t| matches!(&t.kind, TokenKind::Identifier(name) if name == "string"));
952        assert!(has_string, "Should contain type 'string'");
953        // Should have enough tokens for clone detection
954        assert!(
955            tokens.len() >= 10,
956            "Interface should produce sufficient tokens, got {}",
957            tokens.len()
958        );
959    }
960
961    #[test]
962    fn tokenize_ts_type_alias() {
963        let tokens = tokenize("type Result = { ok: boolean; error: string; }");
964        let has_type = tokens
965            .iter()
966            .any(|t| matches!(t.kind, TokenKind::Keyword(KeywordType::Type)));
967        assert!(has_type, "Should contain type keyword");
968    }
969
970    #[test]
971    fn tokenize_ts_enum() {
972        let tokens = tokenize("enum Color { Red, Green, Blue }");
973        let has_enum = tokens
974            .iter()
975            .any(|t| matches!(t.kind, TokenKind::Keyword(KeywordType::Enum)));
976        assert!(has_enum, "Should contain enum keyword");
977        let has_red = tokens
978            .iter()
979            .any(|t| matches!(&t.kind, TokenKind::Identifier(name) if name == "Red"));
980        assert!(has_red, "Should contain enum member 'Red'");
981    }
982
983    fn tokenize_tsx(code: &str) -> Vec<SourceToken> {
984        let path = PathBuf::from("test.tsx");
985        tokenize_file(&path, code).tokens
986    }
987
988    #[test]
989    fn tokenize_jsx_element() {
990        let tokens =
991            tokenize_tsx("const x = <div className=\"foo\"><Button onClick={handler} /></div>;");
992        let has_div = tokens
993            .iter()
994            .any(|t| matches!(&t.kind, TokenKind::Identifier(name) if name == "div"));
995        assert!(has_div, "Should contain JSX element name 'div'");
996        let has_classname = tokens
997            .iter()
998            .any(|t| matches!(&t.kind, TokenKind::Identifier(name) if name == "className"));
999        assert!(has_classname, "Should contain JSX attribute 'className'");
1000        let brackets = tokens
1001            .iter()
1002            .filter(|t| {
1003                matches!(
1004                    t.kind,
1005                    TokenKind::Punctuation(PunctuationType::OpenBracket)
1006                        | TokenKind::Punctuation(PunctuationType::CloseBracket)
1007                )
1008            })
1009            .count();
1010        assert!(
1011            brackets >= 4,
1012            "Should contain JSX angle brackets, got {brackets}"
1013        );
1014    }
1015}