Skip to main content

fallow_core/duplicates/
tokenize.rs

1use std::path::Path;
2
3use oxc_allocator::Allocator;
4use oxc_ast::ast::*;
5use oxc_ast_visit::Visit;
6use oxc_ast_visit::walk;
7use oxc_parser::Parser;
8use oxc_span::{GetSpan, SourceType, Span};
9use oxc_syntax::scope::ScopeFlags;
10
11/// A single token extracted from the AST with its source location.
12#[derive(Debug, Clone)]
13pub struct SourceToken {
14    /// The kind of token.
15    pub kind: TokenKind,
16    /// Byte offset into the source file.
17    pub span: Span,
18}
19
20/// Normalized token types for clone detection.
21#[derive(Debug, Clone, PartialEq, Eq, Hash)]
22pub enum TokenKind {
23    // Keywords
24    Keyword(KeywordType),
25    // Identifiers -- value is the actual name (blinded in semantic mode)
26    Identifier(String),
27    // Literals
28    StringLiteral(String),
29    NumericLiteral(String),
30    BooleanLiteral(bool),
31    NullLiteral,
32    TemplateLiteral,
33    RegExpLiteral,
34    // Operators
35    Operator(OperatorType),
36    // Punctuation / delimiters
37    Punctuation(PunctuationType),
38}
39
40/// JavaScript/TypeScript keyword types.
41#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash)]
42pub enum KeywordType {
43    Var,
44    Let,
45    Const,
46    Function,
47    Return,
48    If,
49    Else,
50    For,
51    While,
52    Do,
53    Switch,
54    Case,
55    Break,
56    Continue,
57    Default,
58    Throw,
59    Try,
60    Catch,
61    Finally,
62    New,
63    Delete,
64    Typeof,
65    Instanceof,
66    In,
67    Of,
68    Void,
69    This,
70    Super,
71    Class,
72    Extends,
73    Import,
74    Export,
75    From,
76    As,
77    Async,
78    Await,
79    Yield,
80    Static,
81    Get,
82    Set,
83    Type,
84    Interface,
85    Enum,
86    Implements,
87    Abstract,
88    Declare,
89    Readonly,
90    Keyof,
91    Satisfies,
92}
93
94/// Operator categories.
95#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash)]
96pub enum OperatorType {
97    Assign,
98    Add,
99    Sub,
100    Mul,
101    Div,
102    Mod,
103    Exp,
104    Eq,
105    NEq,
106    StrictEq,
107    StrictNEq,
108    Lt,
109    Gt,
110    LtEq,
111    GtEq,
112    And,
113    Or,
114    Not,
115    BitwiseAnd,
116    BitwiseOr,
117    BitwiseXor,
118    BitwiseNot,
119    ShiftLeft,
120    ShiftRight,
121    UnsignedShiftRight,
122    NullishCoalescing,
123    OptionalChaining,
124    Spread,
125    Ternary,
126    Arrow,
127    Comma,
128    AddAssign,
129    SubAssign,
130    MulAssign,
131    DivAssign,
132    ModAssign,
133    ExpAssign,
134    AndAssign,
135    OrAssign,
136    NullishAssign,
137    BitwiseAndAssign,
138    BitwiseOrAssign,
139    BitwiseXorAssign,
140    ShiftLeftAssign,
141    ShiftRightAssign,
142    UnsignedShiftRightAssign,
143    Increment,
144    Decrement,
145    Instanceof,
146    In,
147}
148
149/// Punctuation / delimiter types.
150#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash)]
151pub enum PunctuationType {
152    OpenParen,
153    CloseParen,
154    OpenBrace,
155    CloseBrace,
156    OpenBracket,
157    CloseBracket,
158    Semicolon,
159    Colon,
160    Dot,
161}
162
163/// Result of tokenizing a source file.
164#[derive(Debug, Clone)]
165pub struct FileTokens {
166    /// The extracted token sequence.
167    pub tokens: Vec<SourceToken>,
168    /// Source text (needed for extracting fragments).
169    pub source: String,
170    /// Total number of lines in the source.
171    pub line_count: usize,
172}
173
174/// Create a 1-byte span at the given byte position.
175///
176/// Used for synthetic punctuation tokens (`(`, `)`, `,`, `.`) that don't
177/// have their own AST span. Using the parent expression's full span would
178/// inflate clone line ranges, especially in chained method calls.
179fn point_span(pos: u32) -> Span {
180    Span::new(pos, pos + 1)
181}
182
183/// Tokenize a source file into a sequence of normalized tokens.
184///
185/// For Vue/Svelte SFC files, extracts `<script>` blocks first and tokenizes
186/// their content, mirroring the main analysis pipeline's SFC handling.
187pub fn tokenize_file(path: &Path, source: &str) -> FileTokens {
188    use crate::extract::{extract_sfc_scripts, is_sfc_file};
189
190    // For Vue/Svelte SFCs, extract and tokenize `<script>` blocks.
191    if is_sfc_file(path) {
192        let scripts = extract_sfc_scripts(source);
193        let mut all_tokens = Vec::new();
194
195        for script in &scripts {
196            let source_type = if script.is_typescript {
197                SourceType::tsx()
198            } else {
199                SourceType::jsx()
200            };
201            let allocator = Allocator::default();
202            let parser_return = Parser::new(&allocator, &script.body, source_type).parse();
203
204            let mut extractor = TokenExtractor::new();
205            extractor.visit_program(&parser_return.program);
206
207            // Adjust token spans to reference positions in the full SFC source
208            // rather than the extracted script block.
209            let offset = script.byte_offset as u32;
210            for token in &mut extractor.tokens {
211                token.span = Span::new(token.span.start + offset, token.span.end + offset);
212            }
213            all_tokens.extend(extractor.tokens);
214        }
215
216        let line_count = source.lines().count().max(1);
217        return FileTokens {
218            tokens: all_tokens,
219            source: source.to_string(),
220            line_count,
221        };
222    }
223
224    let source_type = SourceType::from_path(path).unwrap_or_default();
225    let allocator = Allocator::default();
226    let parser_return = Parser::new(&allocator, source, source_type).parse();
227
228    let mut extractor = TokenExtractor::new();
229    extractor.visit_program(&parser_return.program);
230
231    // If parsing produced very few tokens relative to source size (likely parse errors
232    // from Flow types or JSX in .js files), retry with JSX/TSX source type as a fallback.
233    if extractor.tokens.len() < 5 && source.len() > 100 && !source_type.is_jsx() {
234        let jsx_type = if source_type.is_typescript() {
235            SourceType::tsx()
236        } else {
237            SourceType::jsx()
238        };
239        let allocator2 = Allocator::default();
240        let retry_return = Parser::new(&allocator2, source, jsx_type).parse();
241        let mut retry_extractor = TokenExtractor::new();
242        retry_extractor.visit_program(&retry_return.program);
243        if retry_extractor.tokens.len() > extractor.tokens.len() {
244            extractor = retry_extractor;
245        }
246    }
247
248    let line_count = source.lines().count().max(1);
249
250    FileTokens {
251        tokens: extractor.tokens,
252        source: source.to_string(),
253        line_count,
254    }
255}
256
257/// AST visitor that extracts a flat sequence of normalized tokens.
258struct TokenExtractor {
259    tokens: Vec<SourceToken>,
260}
261
262impl TokenExtractor {
263    fn new() -> Self {
264        Self { tokens: Vec::new() }
265    }
266
267    fn push(&mut self, kind: TokenKind, span: Span) {
268        self.tokens.push(SourceToken { kind, span });
269    }
270
271    fn push_keyword(&mut self, kw: KeywordType, span: Span) {
272        self.push(TokenKind::Keyword(kw), span);
273    }
274
275    fn push_op(&mut self, op: OperatorType, span: Span) {
276        self.push(TokenKind::Operator(op), span);
277    }
278
279    fn push_punc(&mut self, p: PunctuationType, span: Span) {
280        self.push(TokenKind::Punctuation(p), span);
281    }
282}
283
284impl<'a> Visit<'a> for TokenExtractor {
285    // ── Statements ──────────────────────────────────────────
286
287    fn visit_variable_declaration(&mut self, decl: &VariableDeclaration<'a>) {
288        let kw = match decl.kind {
289            VariableDeclarationKind::Var => KeywordType::Var,
290            VariableDeclarationKind::Let => KeywordType::Let,
291            VariableDeclarationKind::Const => KeywordType::Const,
292            VariableDeclarationKind::Using | VariableDeclarationKind::AwaitUsing => {
293                KeywordType::Const
294            }
295        };
296        self.push_keyword(kw, decl.span);
297        walk::walk_variable_declaration(self, decl);
298    }
299
300    fn visit_return_statement(&mut self, stmt: &ReturnStatement<'a>) {
301        self.push_keyword(KeywordType::Return, stmt.span);
302        walk::walk_return_statement(self, stmt);
303    }
304
305    fn visit_if_statement(&mut self, stmt: &IfStatement<'a>) {
306        self.push_keyword(KeywordType::If, stmt.span);
307        self.push_punc(PunctuationType::OpenParen, stmt.span);
308        self.visit_expression(&stmt.test);
309        self.push_punc(PunctuationType::CloseParen, stmt.span);
310        self.visit_statement(&stmt.consequent);
311        if let Some(alt) = &stmt.alternate {
312            self.push_keyword(KeywordType::Else, stmt.span);
313            self.visit_statement(alt);
314        }
315    }
316
317    fn visit_for_statement(&mut self, stmt: &ForStatement<'a>) {
318        self.push_keyword(KeywordType::For, stmt.span);
319        self.push_punc(PunctuationType::OpenParen, stmt.span);
320        walk::walk_for_statement(self, stmt);
321        self.push_punc(PunctuationType::CloseParen, stmt.span);
322    }
323
324    fn visit_for_in_statement(&mut self, stmt: &ForInStatement<'a>) {
325        self.push_keyword(KeywordType::For, stmt.span);
326        self.push_punc(PunctuationType::OpenParen, stmt.span);
327        self.visit_for_statement_left(&stmt.left);
328        self.push_keyword(KeywordType::In, stmt.span);
329        self.visit_expression(&stmt.right);
330        self.push_punc(PunctuationType::CloseParen, stmt.span);
331        self.visit_statement(&stmt.body);
332    }
333
334    fn visit_for_of_statement(&mut self, stmt: &ForOfStatement<'a>) {
335        self.push_keyword(KeywordType::For, stmt.span);
336        self.push_punc(PunctuationType::OpenParen, stmt.span);
337        self.visit_for_statement_left(&stmt.left);
338        self.push_keyword(KeywordType::Of, stmt.span);
339        self.visit_expression(&stmt.right);
340        self.push_punc(PunctuationType::CloseParen, stmt.span);
341        self.visit_statement(&stmt.body);
342    }
343
344    fn visit_while_statement(&mut self, stmt: &WhileStatement<'a>) {
345        self.push_keyword(KeywordType::While, stmt.span);
346        self.push_punc(PunctuationType::OpenParen, stmt.span);
347        walk::walk_while_statement(self, stmt);
348        self.push_punc(PunctuationType::CloseParen, stmt.span);
349    }
350
351    fn visit_do_while_statement(&mut self, stmt: &DoWhileStatement<'a>) {
352        self.push_keyword(KeywordType::Do, stmt.span);
353        walk::walk_do_while_statement(self, stmt);
354    }
355
356    fn visit_switch_statement(&mut self, stmt: &SwitchStatement<'a>) {
357        self.push_keyword(KeywordType::Switch, stmt.span);
358        self.push_punc(PunctuationType::OpenParen, stmt.span);
359        walk::walk_switch_statement(self, stmt);
360        self.push_punc(PunctuationType::CloseParen, stmt.span);
361    }
362
363    fn visit_switch_case(&mut self, case: &SwitchCase<'a>) {
364        if case.test.is_some() {
365            self.push_keyword(KeywordType::Case, case.span);
366        } else {
367            self.push_keyword(KeywordType::Default, case.span);
368        }
369        self.push_punc(PunctuationType::Colon, case.span);
370        walk::walk_switch_case(self, case);
371    }
372
373    fn visit_break_statement(&mut self, stmt: &BreakStatement<'a>) {
374        self.push_keyword(KeywordType::Break, stmt.span);
375    }
376
377    fn visit_continue_statement(&mut self, stmt: &ContinueStatement<'a>) {
378        self.push_keyword(KeywordType::Continue, stmt.span);
379    }
380
381    fn visit_throw_statement(&mut self, stmt: &ThrowStatement<'a>) {
382        self.push_keyword(KeywordType::Throw, stmt.span);
383        walk::walk_throw_statement(self, stmt);
384    }
385
386    fn visit_try_statement(&mut self, stmt: &TryStatement<'a>) {
387        self.push_keyword(KeywordType::Try, stmt.span);
388        walk::walk_try_statement(self, stmt);
389    }
390
391    fn visit_catch_clause(&mut self, clause: &CatchClause<'a>) {
392        self.push_keyword(KeywordType::Catch, clause.span);
393        walk::walk_catch_clause(self, clause);
394    }
395
396    fn visit_block_statement(&mut self, block: &BlockStatement<'a>) {
397        self.push_punc(PunctuationType::OpenBrace, block.span);
398        walk::walk_block_statement(self, block);
399        self.push_punc(PunctuationType::CloseBrace, block.span);
400    }
401
402    // ── Expressions ─────────────────────────────────────────
403
404    fn visit_identifier_reference(&mut self, ident: &IdentifierReference<'a>) {
405        self.push(TokenKind::Identifier(ident.name.to_string()), ident.span);
406    }
407
408    fn visit_binding_identifier(&mut self, ident: &BindingIdentifier<'a>) {
409        self.push(TokenKind::Identifier(ident.name.to_string()), ident.span);
410    }
411
412    fn visit_string_literal(&mut self, lit: &StringLiteral<'a>) {
413        self.push(TokenKind::StringLiteral(lit.value.to_string()), lit.span);
414    }
415
416    fn visit_numeric_literal(&mut self, lit: &NumericLiteral<'a>) {
417        let raw_str = lit
418            .raw
419            .as_ref()
420            .map_or_else(|| lit.value.to_string(), |r| r.to_string());
421        self.push(TokenKind::NumericLiteral(raw_str), lit.span);
422    }
423
424    fn visit_boolean_literal(&mut self, lit: &BooleanLiteral) {
425        self.push(TokenKind::BooleanLiteral(lit.value), lit.span);
426    }
427
428    fn visit_null_literal(&mut self, lit: &NullLiteral) {
429        self.push(TokenKind::NullLiteral, lit.span);
430    }
431
432    fn visit_template_literal(&mut self, lit: &TemplateLiteral<'a>) {
433        self.push(TokenKind::TemplateLiteral, lit.span);
434        walk::walk_template_literal(self, lit);
435    }
436
437    fn visit_reg_exp_literal(&mut self, lit: &RegExpLiteral<'a>) {
438        self.push(TokenKind::RegExpLiteral, lit.span);
439    }
440
441    fn visit_this_expression(&mut self, expr: &ThisExpression) {
442        self.push_keyword(KeywordType::This, expr.span);
443    }
444
445    fn visit_super(&mut self, expr: &Super) {
446        self.push_keyword(KeywordType::Super, expr.span);
447    }
448
449    fn visit_array_expression(&mut self, expr: &ArrayExpression<'a>) {
450        self.push_punc(PunctuationType::OpenBracket, expr.span);
451        walk::walk_array_expression(self, expr);
452        self.push_punc(PunctuationType::CloseBracket, expr.span);
453    }
454
455    fn visit_object_expression(&mut self, expr: &ObjectExpression<'a>) {
456        self.push_punc(PunctuationType::OpenBrace, expr.span);
457        walk::walk_object_expression(self, expr);
458        self.push_punc(PunctuationType::CloseBrace, expr.span);
459    }
460
461    fn visit_call_expression(&mut self, expr: &CallExpression<'a>) {
462        self.visit_expression(&expr.callee);
463        // Use point spans for synthetic punctuation to avoid inflating clone
464        // ranges when call expressions are chained (expr.span covers the
465        // entire chain, not just this call's parentheses).
466        let open = point_span(expr.callee.span().end);
467        self.push_punc(PunctuationType::OpenParen, open);
468        for arg in &expr.arguments {
469            self.visit_argument(arg);
470            let comma = point_span(arg.span().end);
471            self.push_op(OperatorType::Comma, comma);
472        }
473        let close = point_span(expr.span.end.saturating_sub(1));
474        self.push_punc(PunctuationType::CloseParen, close);
475    }
476
477    fn visit_new_expression(&mut self, expr: &NewExpression<'a>) {
478        self.push_keyword(KeywordType::New, expr.span);
479        self.visit_expression(&expr.callee);
480        let open = point_span(expr.callee.span().end);
481        self.push_punc(PunctuationType::OpenParen, open);
482        for arg in &expr.arguments {
483            self.visit_argument(arg);
484            let comma = point_span(arg.span().end);
485            self.push_op(OperatorType::Comma, comma);
486        }
487        let close = point_span(expr.span.end.saturating_sub(1));
488        self.push_punc(PunctuationType::CloseParen, close);
489    }
490
491    fn visit_static_member_expression(&mut self, expr: &StaticMemberExpression<'a>) {
492        self.visit_expression(&expr.object);
493        // Use point span at the dot position (right after the object).
494        let dot = point_span(expr.object.span().end);
495        self.push_punc(PunctuationType::Dot, dot);
496        self.push(
497            TokenKind::Identifier(expr.property.name.to_string()),
498            expr.property.span,
499        );
500    }
501
502    fn visit_computed_member_expression(&mut self, expr: &ComputedMemberExpression<'a>) {
503        self.visit_expression(&expr.object);
504        let open = point_span(expr.object.span().end);
505        self.push_punc(PunctuationType::OpenBracket, open);
506        self.visit_expression(&expr.expression);
507        let close = point_span(expr.span.end.saturating_sub(1));
508        self.push_punc(PunctuationType::CloseBracket, close);
509    }
510
511    fn visit_assignment_expression(&mut self, expr: &AssignmentExpression<'a>) {
512        self.visit_assignment_target(&expr.left);
513        let op = match expr.operator {
514            AssignmentOperator::Assign => OperatorType::Assign,
515            AssignmentOperator::Addition => OperatorType::AddAssign,
516            AssignmentOperator::Subtraction => OperatorType::SubAssign,
517            AssignmentOperator::Multiplication => OperatorType::MulAssign,
518            AssignmentOperator::Division => OperatorType::DivAssign,
519            AssignmentOperator::Remainder => OperatorType::ModAssign,
520            AssignmentOperator::Exponential => OperatorType::ExpAssign,
521            AssignmentOperator::LogicalAnd => OperatorType::AndAssign,
522            AssignmentOperator::LogicalOr => OperatorType::OrAssign,
523            AssignmentOperator::LogicalNullish => OperatorType::NullishAssign,
524            AssignmentOperator::BitwiseAnd => OperatorType::BitwiseAndAssign,
525            AssignmentOperator::BitwiseOR => OperatorType::BitwiseOrAssign,
526            AssignmentOperator::BitwiseXOR => OperatorType::BitwiseXorAssign,
527            AssignmentOperator::ShiftLeft => OperatorType::ShiftLeftAssign,
528            AssignmentOperator::ShiftRight => OperatorType::ShiftRightAssign,
529            AssignmentOperator::ShiftRightZeroFill => OperatorType::UnsignedShiftRightAssign,
530        };
531        self.push_op(op, expr.span);
532        self.visit_expression(&expr.right);
533    }
534
535    fn visit_binary_expression(&mut self, expr: &BinaryExpression<'a>) {
536        self.visit_expression(&expr.left);
537        let op = match expr.operator {
538            BinaryOperator::Addition => OperatorType::Add,
539            BinaryOperator::Subtraction => OperatorType::Sub,
540            BinaryOperator::Multiplication => OperatorType::Mul,
541            BinaryOperator::Division => OperatorType::Div,
542            BinaryOperator::Remainder => OperatorType::Mod,
543            BinaryOperator::Exponential => OperatorType::Exp,
544            BinaryOperator::Equality => OperatorType::Eq,
545            BinaryOperator::Inequality => OperatorType::NEq,
546            BinaryOperator::StrictEquality => OperatorType::StrictEq,
547            BinaryOperator::StrictInequality => OperatorType::StrictNEq,
548            BinaryOperator::LessThan => OperatorType::Lt,
549            BinaryOperator::GreaterThan => OperatorType::Gt,
550            BinaryOperator::LessEqualThan => OperatorType::LtEq,
551            BinaryOperator::GreaterEqualThan => OperatorType::GtEq,
552            BinaryOperator::BitwiseAnd => OperatorType::BitwiseAnd,
553            BinaryOperator::BitwiseOR => OperatorType::BitwiseOr,
554            BinaryOperator::BitwiseXOR => OperatorType::BitwiseXor,
555            BinaryOperator::ShiftLeft => OperatorType::ShiftLeft,
556            BinaryOperator::ShiftRight => OperatorType::ShiftRight,
557            BinaryOperator::ShiftRightZeroFill => OperatorType::UnsignedShiftRight,
558            BinaryOperator::Instanceof => OperatorType::Instanceof,
559            BinaryOperator::In => OperatorType::In,
560        };
561        self.push_op(op, expr.span);
562        self.visit_expression(&expr.right);
563    }
564
565    fn visit_logical_expression(&mut self, expr: &LogicalExpression<'a>) {
566        self.visit_expression(&expr.left);
567        let op = match expr.operator {
568            LogicalOperator::And => OperatorType::And,
569            LogicalOperator::Or => OperatorType::Or,
570            LogicalOperator::Coalesce => OperatorType::NullishCoalescing,
571        };
572        self.push_op(op, expr.span);
573        self.visit_expression(&expr.right);
574    }
575
576    fn visit_unary_expression(&mut self, expr: &UnaryExpression<'a>) {
577        let op = match expr.operator {
578            UnaryOperator::UnaryPlus => OperatorType::Add,
579            UnaryOperator::UnaryNegation => OperatorType::Sub,
580            UnaryOperator::LogicalNot => OperatorType::Not,
581            UnaryOperator::BitwiseNot => OperatorType::BitwiseNot,
582            UnaryOperator::Typeof => {
583                self.push_keyword(KeywordType::Typeof, expr.span);
584                walk::walk_unary_expression(self, expr);
585                return;
586            }
587            UnaryOperator::Void => {
588                self.push_keyword(KeywordType::Void, expr.span);
589                walk::walk_unary_expression(self, expr);
590                return;
591            }
592            UnaryOperator::Delete => {
593                self.push_keyword(KeywordType::Delete, expr.span);
594                walk::walk_unary_expression(self, expr);
595                return;
596            }
597        };
598        self.push_op(op, expr.span);
599        walk::walk_unary_expression(self, expr);
600    }
601
602    fn visit_update_expression(&mut self, expr: &UpdateExpression<'a>) {
603        let op = match expr.operator {
604            UpdateOperator::Increment => OperatorType::Increment,
605            UpdateOperator::Decrement => OperatorType::Decrement,
606        };
607        if expr.prefix {
608            self.push_op(op, expr.span);
609        }
610        walk::walk_update_expression(self, expr);
611        if !expr.prefix {
612            self.push_op(op, expr.span);
613        }
614    }
615
616    fn visit_conditional_expression(&mut self, expr: &ConditionalExpression<'a>) {
617        self.visit_expression(&expr.test);
618        self.push_op(OperatorType::Ternary, expr.span);
619        self.visit_expression(&expr.consequent);
620        self.push_punc(PunctuationType::Colon, expr.span);
621        self.visit_expression(&expr.alternate);
622    }
623
624    fn visit_arrow_function_expression(&mut self, expr: &ArrowFunctionExpression<'a>) {
625        if expr.r#async {
626            self.push_keyword(KeywordType::Async, expr.span);
627        }
628        let params_span = expr.params.span;
629        self.push_punc(PunctuationType::OpenParen, point_span(params_span.start));
630        for param in &expr.params.items {
631            self.visit_binding_pattern(&param.pattern);
632            self.push_op(OperatorType::Comma, point_span(param.span.end));
633        }
634        self.push_punc(
635            PunctuationType::CloseParen,
636            point_span(params_span.end.saturating_sub(1)),
637        );
638        self.push_op(OperatorType::Arrow, point_span(params_span.end));
639        walk::walk_arrow_function_expression(self, expr);
640    }
641
642    fn visit_yield_expression(&mut self, expr: &YieldExpression<'a>) {
643        self.push_keyword(KeywordType::Yield, expr.span);
644        walk::walk_yield_expression(self, expr);
645    }
646
647    fn visit_await_expression(&mut self, expr: &AwaitExpression<'a>) {
648        self.push_keyword(KeywordType::Await, expr.span);
649        walk::walk_await_expression(self, expr);
650    }
651
652    fn visit_spread_element(&mut self, elem: &SpreadElement<'a>) {
653        self.push_op(OperatorType::Spread, elem.span);
654        walk::walk_spread_element(self, elem);
655    }
656
657    fn visit_sequence_expression(&mut self, expr: &SequenceExpression<'a>) {
658        for (i, sub_expr) in expr.expressions.iter().enumerate() {
659            if i > 0 {
660                self.push_op(OperatorType::Comma, expr.span);
661            }
662            self.visit_expression(sub_expr);
663        }
664    }
665
666    // ── Functions ──────────────────────────────────────────
667
668    fn visit_function(&mut self, func: &Function<'a>, flags: ScopeFlags) {
669        if func.r#async {
670            self.push_keyword(KeywordType::Async, func.span);
671        }
672        self.push_keyword(KeywordType::Function, func.span);
673        if let Some(id) = &func.id {
674            self.push(TokenKind::Identifier(id.name.to_string()), id.span);
675        }
676        let params_span = func.params.span;
677        self.push_punc(PunctuationType::OpenParen, point_span(params_span.start));
678        for param in &func.params.items {
679            self.visit_binding_pattern(&param.pattern);
680            self.push_op(OperatorType::Comma, point_span(param.span.end));
681        }
682        self.push_punc(
683            PunctuationType::CloseParen,
684            point_span(params_span.end.saturating_sub(1)),
685        );
686        walk::walk_function(self, func, flags);
687    }
688
689    // ── Classes ─────────────────────────────────────────────
690
691    fn visit_class(&mut self, class: &Class<'a>) {
692        self.push_keyword(KeywordType::Class, class.span);
693        if let Some(id) = &class.id {
694            self.push(TokenKind::Identifier(id.name.to_string()), id.span);
695        }
696        if class.super_class.is_some() {
697            self.push_keyword(KeywordType::Extends, class.span);
698        }
699        walk::walk_class(self, class);
700    }
701
702    // ── Import/Export ───────────────────────────────────────
703
704    fn visit_import_declaration(&mut self, decl: &ImportDeclaration<'a>) {
705        self.push_keyword(KeywordType::Import, decl.span);
706        walk::walk_import_declaration(self, decl);
707        self.push_keyword(KeywordType::From, decl.span);
708        self.push(
709            TokenKind::StringLiteral(decl.source.value.to_string()),
710            decl.source.span,
711        );
712    }
713
714    fn visit_export_named_declaration(&mut self, decl: &ExportNamedDeclaration<'a>) {
715        self.push_keyword(KeywordType::Export, decl.span);
716        walk::walk_export_named_declaration(self, decl);
717    }
718
719    fn visit_export_default_declaration(&mut self, decl: &ExportDefaultDeclaration<'a>) {
720        self.push_keyword(KeywordType::Export, decl.span);
721        self.push_keyword(KeywordType::Default, decl.span);
722        walk::walk_export_default_declaration(self, decl);
723    }
724
725    fn visit_export_all_declaration(&mut self, decl: &ExportAllDeclaration<'a>) {
726        self.push_keyword(KeywordType::Export, decl.span);
727        self.push_keyword(KeywordType::From, decl.span);
728        self.push(
729            TokenKind::StringLiteral(decl.source.value.to_string()),
730            decl.source.span,
731        );
732    }
733
734    // ── TypeScript declarations ────────────────────────────
735
736    fn visit_ts_interface_declaration(&mut self, decl: &TSInterfaceDeclaration<'a>) {
737        self.push_keyword(KeywordType::Interface, decl.span);
738        walk::walk_ts_interface_declaration(self, decl);
739    }
740
741    fn visit_ts_interface_body(&mut self, body: &TSInterfaceBody<'a>) {
742        self.push_punc(PunctuationType::OpenBrace, body.span);
743        walk::walk_ts_interface_body(self, body);
744        self.push_punc(PunctuationType::CloseBrace, body.span);
745    }
746
747    fn visit_ts_type_alias_declaration(&mut self, decl: &TSTypeAliasDeclaration<'a>) {
748        self.push_keyword(KeywordType::Type, decl.span);
749        walk::walk_ts_type_alias_declaration(self, decl);
750    }
751
752    fn visit_ts_enum_declaration(&mut self, decl: &TSEnumDeclaration<'a>) {
753        self.push_keyword(KeywordType::Enum, decl.span);
754        walk::walk_ts_enum_declaration(self, decl);
755    }
756
757    fn visit_ts_enum_body(&mut self, body: &TSEnumBody<'a>) {
758        self.push_punc(PunctuationType::OpenBrace, body.span);
759        walk::walk_ts_enum_body(self, body);
760        self.push_punc(PunctuationType::CloseBrace, body.span);
761    }
762
763    fn visit_ts_property_signature(&mut self, sig: &TSPropertySignature<'a>) {
764        walk::walk_ts_property_signature(self, sig);
765        self.push_punc(PunctuationType::Semicolon, sig.span);
766    }
767
768    fn visit_ts_type_annotation(&mut self, ann: &TSTypeAnnotation<'a>) {
769        self.push_punc(PunctuationType::Colon, ann.span);
770        walk::walk_ts_type_annotation(self, ann);
771    }
772
773    fn visit_identifier_name(&mut self, ident: &IdentifierName<'a>) {
774        self.push(TokenKind::Identifier(ident.name.to_string()), ident.span);
775    }
776
777    fn visit_ts_string_keyword(&mut self, it: &TSStringKeyword) {
778        self.push(TokenKind::Identifier("string".to_string()), it.span);
779    }
780
781    fn visit_ts_number_keyword(&mut self, it: &TSNumberKeyword) {
782        self.push(TokenKind::Identifier("number".to_string()), it.span);
783    }
784
785    fn visit_ts_boolean_keyword(&mut self, it: &TSBooleanKeyword) {
786        self.push(TokenKind::Identifier("boolean".to_string()), it.span);
787    }
788
789    fn visit_ts_any_keyword(&mut self, it: &TSAnyKeyword) {
790        self.push(TokenKind::Identifier("any".to_string()), it.span);
791    }
792
793    fn visit_ts_void_keyword(&mut self, it: &TSVoidKeyword) {
794        self.push(TokenKind::Identifier("void".to_string()), it.span);
795    }
796
797    fn visit_ts_null_keyword(&mut self, it: &TSNullKeyword) {
798        self.push(TokenKind::NullLiteral, it.span);
799    }
800
801    fn visit_ts_undefined_keyword(&mut self, it: &TSUndefinedKeyword) {
802        self.push(TokenKind::Identifier("undefined".to_string()), it.span);
803    }
804
805    fn visit_ts_never_keyword(&mut self, it: &TSNeverKeyword) {
806        self.push(TokenKind::Identifier("never".to_string()), it.span);
807    }
808
809    fn visit_ts_unknown_keyword(&mut self, it: &TSUnknownKeyword) {
810        self.push(TokenKind::Identifier("unknown".to_string()), it.span);
811    }
812
813    // ── JSX ─────────────────────────────────────────────────
814
815    fn visit_jsx_opening_element(&mut self, elem: &JSXOpeningElement<'a>) {
816        self.push_punc(PunctuationType::OpenBracket, elem.span);
817        walk::walk_jsx_opening_element(self, elem);
818        self.push_punc(PunctuationType::CloseBracket, elem.span);
819    }
820
821    fn visit_jsx_closing_element(&mut self, elem: &JSXClosingElement<'a>) {
822        self.push_punc(PunctuationType::OpenBracket, elem.span);
823        walk::walk_jsx_closing_element(self, elem);
824        self.push_punc(PunctuationType::CloseBracket, elem.span);
825    }
826
827    fn visit_jsx_identifier(&mut self, ident: &JSXIdentifier<'a>) {
828        self.push(TokenKind::Identifier(ident.name.to_string()), ident.span);
829    }
830
831    fn visit_jsx_spread_attribute(&mut self, attr: &JSXSpreadAttribute<'a>) {
832        self.push_op(OperatorType::Spread, attr.span);
833        walk::walk_jsx_spread_attribute(self, attr);
834    }
835
836    // ── Misc ────────────────────────────────────────────────
837
838    fn visit_variable_declarator(&mut self, decl: &VariableDeclarator<'a>) {
839        self.visit_binding_pattern(&decl.id);
840        if let Some(init) = &decl.init {
841            self.push_op(OperatorType::Assign, decl.span);
842            self.visit_expression(init);
843        }
844        self.push_punc(PunctuationType::Semicolon, decl.span);
845    }
846
847    fn visit_expression_statement(&mut self, stmt: &ExpressionStatement<'a>) {
848        walk::walk_expression_statement(self, stmt);
849        self.push_punc(PunctuationType::Semicolon, stmt.span);
850    }
851}
852
853#[cfg(test)]
854mod tests {
855    use super::*;
856    use std::path::PathBuf;
857
858    fn tokenize(code: &str) -> Vec<SourceToken> {
859        let path = PathBuf::from("test.ts");
860        tokenize_file(&path, code).tokens
861    }
862
863    #[test]
864    fn tokenize_variable_declaration() {
865        let tokens = tokenize("const x = 42;");
866        assert!(!tokens.is_empty());
867        // Should have: const, x (identifier), = (assign), 42 (numeric), ;
868        assert!(matches!(
869            tokens[0].kind,
870            TokenKind::Keyword(KeywordType::Const)
871        ));
872    }
873
874    #[test]
875    fn tokenize_function_declaration() {
876        let tokens = tokenize("function foo() { return 1; }");
877        assert!(!tokens.is_empty());
878        assert!(matches!(
879            tokens[0].kind,
880            TokenKind::Keyword(KeywordType::Function)
881        ));
882    }
883
884    #[test]
885    fn tokenize_arrow_function() {
886        let tokens = tokenize("const f = (a, b) => a + b;");
887        assert!(!tokens.is_empty());
888        let has_arrow = tokens
889            .iter()
890            .any(|t| matches!(t.kind, TokenKind::Operator(OperatorType::Arrow)));
891        assert!(has_arrow, "Should contain arrow operator");
892    }
893
894    #[test]
895    fn tokenize_if_else() {
896        let tokens = tokenize("if (x) { y; } else { z; }");
897        assert!(!tokens.is_empty());
898        assert!(matches!(
899            tokens[0].kind,
900            TokenKind::Keyword(KeywordType::If)
901        ));
902        let has_else = tokens
903            .iter()
904            .any(|t| matches!(t.kind, TokenKind::Keyword(KeywordType::Else)));
905        assert!(has_else, "Should contain else keyword");
906    }
907
908    #[test]
909    fn tokenize_class() {
910        let tokens = tokenize("class Foo extends Bar { }");
911        assert!(!tokens.is_empty());
912        assert!(matches!(
913            tokens[0].kind,
914            TokenKind::Keyword(KeywordType::Class)
915        ));
916        let has_extends = tokens
917            .iter()
918            .any(|t| matches!(t.kind, TokenKind::Keyword(KeywordType::Extends)));
919        assert!(has_extends, "Should contain extends keyword");
920    }
921
922    #[test]
923    fn tokenize_string_literal() {
924        let tokens = tokenize("const s = \"hello\";");
925        let has_string = tokens
926            .iter()
927            .any(|t| matches!(&t.kind, TokenKind::StringLiteral(s) if s == "hello"));
928        assert!(has_string, "Should contain string literal");
929    }
930
931    #[test]
932    fn tokenize_boolean_literal() {
933        let tokens = tokenize("const b = true;");
934        let has_bool = tokens
935            .iter()
936            .any(|t| matches!(t.kind, TokenKind::BooleanLiteral(true)));
937        assert!(has_bool, "Should contain boolean literal");
938    }
939
940    #[test]
941    fn tokenize_null_literal() {
942        let tokens = tokenize("const n = null;");
943        let has_null = tokens
944            .iter()
945            .any(|t| matches!(t.kind, TokenKind::NullLiteral));
946        assert!(has_null, "Should contain null literal");
947    }
948
949    #[test]
950    fn tokenize_empty_file() {
951        let tokens = tokenize("");
952        assert!(tokens.is_empty());
953    }
954
955    #[test]
956    fn tokenize_ts_interface() {
957        let tokens = tokenize("interface Foo { bar: string; baz: number; }");
958        let has_interface = tokens
959            .iter()
960            .any(|t| matches!(t.kind, TokenKind::Keyword(KeywordType::Interface)));
961        assert!(has_interface, "Should contain interface keyword");
962        let has_bar = tokens
963            .iter()
964            .any(|t| matches!(&t.kind, TokenKind::Identifier(name) if name == "bar"));
965        assert!(has_bar, "Should contain property name 'bar'");
966        let has_string = tokens
967            .iter()
968            .any(|t| matches!(&t.kind, TokenKind::Identifier(name) if name == "string"));
969        assert!(has_string, "Should contain type 'string'");
970        // Should have enough tokens for clone detection
971        assert!(
972            tokens.len() >= 10,
973            "Interface should produce sufficient tokens, got {}",
974            tokens.len()
975        );
976    }
977
978    #[test]
979    fn tokenize_ts_type_alias() {
980        let tokens = tokenize("type Result = { ok: boolean; error: string; }");
981        let has_type = tokens
982            .iter()
983            .any(|t| matches!(t.kind, TokenKind::Keyword(KeywordType::Type)));
984        assert!(has_type, "Should contain type keyword");
985    }
986
987    #[test]
988    fn tokenize_ts_enum() {
989        let tokens = tokenize("enum Color { Red, Green, Blue }");
990        let has_enum = tokens
991            .iter()
992            .any(|t| matches!(t.kind, TokenKind::Keyword(KeywordType::Enum)));
993        assert!(has_enum, "Should contain enum keyword");
994        let has_red = tokens
995            .iter()
996            .any(|t| matches!(&t.kind, TokenKind::Identifier(name) if name == "Red"));
997        assert!(has_red, "Should contain enum member 'Red'");
998    }
999
1000    fn tokenize_tsx(code: &str) -> Vec<SourceToken> {
1001        let path = PathBuf::from("test.tsx");
1002        tokenize_file(&path, code).tokens
1003    }
1004
1005    #[test]
1006    fn tokenize_jsx_element() {
1007        let tokens =
1008            tokenize_tsx("const x = <div className=\"foo\"><Button onClick={handler} /></div>;");
1009        let has_div = tokens
1010            .iter()
1011            .any(|t| matches!(&t.kind, TokenKind::Identifier(name) if name == "div"));
1012        assert!(has_div, "Should contain JSX element name 'div'");
1013        let has_classname = tokens
1014            .iter()
1015            .any(|t| matches!(&t.kind, TokenKind::Identifier(name) if name == "className"));
1016        assert!(has_classname, "Should contain JSX attribute 'className'");
1017        let brackets = tokens
1018            .iter()
1019            .filter(|t| {
1020                matches!(
1021                    t.kind,
1022                    TokenKind::Punctuation(PunctuationType::OpenBracket)
1023                        | TokenKind::Punctuation(PunctuationType::CloseBracket)
1024                )
1025            })
1026            .count();
1027        assert!(
1028            brackets >= 4,
1029            "Should contain JSX angle brackets, got {brackets}"
1030        );
1031    }
1032}