1use std::path::Path;
2
3use oxc_allocator::Allocator;
4use oxc_ast::ast::*;
5use oxc_ast_visit::Visit;
6use oxc_ast_visit::walk;
7use oxc_parser::Parser;
8use oxc_span::{GetSpan, SourceType, Span};
9use oxc_syntax::scope::ScopeFlags;
10
11#[derive(Debug, Clone)]
13pub struct SourceToken {
14 pub kind: TokenKind,
16 pub span: Span,
18}
19
20#[derive(Debug, Clone, PartialEq, Eq, Hash)]
22pub enum TokenKind {
23 Keyword(KeywordType),
25 Identifier(String),
27 StringLiteral(String),
29 NumericLiteral(String),
30 BooleanLiteral(bool),
31 NullLiteral,
32 TemplateLiteral,
33 RegExpLiteral,
34 Operator(OperatorType),
36 Punctuation(PunctuationType),
38}
39
40#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash)]
42pub enum KeywordType {
43 Var,
44 Let,
45 Const,
46 Function,
47 Return,
48 If,
49 Else,
50 For,
51 While,
52 Do,
53 Switch,
54 Case,
55 Break,
56 Continue,
57 Default,
58 Throw,
59 Try,
60 Catch,
61 Finally,
62 New,
63 Delete,
64 Typeof,
65 Instanceof,
66 In,
67 Of,
68 Void,
69 This,
70 Super,
71 Class,
72 Extends,
73 Import,
74 Export,
75 From,
76 As,
77 Async,
78 Await,
79 Yield,
80 Static,
81 Get,
82 Set,
83 Type,
84 Interface,
85 Enum,
86 Implements,
87 Abstract,
88 Declare,
89 Readonly,
90 Keyof,
91 Satisfies,
92}
93
94#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash)]
96pub enum OperatorType {
97 Assign,
98 Add,
99 Sub,
100 Mul,
101 Div,
102 Mod,
103 Exp,
104 Eq,
105 NEq,
106 StrictEq,
107 StrictNEq,
108 Lt,
109 Gt,
110 LtEq,
111 GtEq,
112 And,
113 Or,
114 Not,
115 BitwiseAnd,
116 BitwiseOr,
117 BitwiseXor,
118 BitwiseNot,
119 ShiftLeft,
120 ShiftRight,
121 UnsignedShiftRight,
122 NullishCoalescing,
123 OptionalChaining,
124 Spread,
125 Ternary,
126 Arrow,
127 Comma,
128 AddAssign,
129 SubAssign,
130 MulAssign,
131 DivAssign,
132 ModAssign,
133 ExpAssign,
134 AndAssign,
135 OrAssign,
136 NullishAssign,
137 BitwiseAndAssign,
138 BitwiseOrAssign,
139 BitwiseXorAssign,
140 ShiftLeftAssign,
141 ShiftRightAssign,
142 UnsignedShiftRightAssign,
143 Increment,
144 Decrement,
145 Instanceof,
146 In,
147}
148
149#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash)]
151pub enum PunctuationType {
152 OpenParen,
153 CloseParen,
154 OpenBrace,
155 CloseBrace,
156 OpenBracket,
157 CloseBracket,
158 Semicolon,
159 Colon,
160 Dot,
161}
162
163#[derive(Debug, Clone)]
165pub struct FileTokens {
166 pub tokens: Vec<SourceToken>,
168 pub source: String,
170 pub line_count: usize,
172}
173
174const fn point_span(pos: u32) -> Span {
180 Span::new(pos, pos + 1)
181}
182
183pub fn tokenize_file(path: &Path, source: &str) -> FileTokens {
193 tokenize_file_inner(path, source, false)
194}
195
196pub fn tokenize_file_cross_language(path: &Path, source: &str, strip_types: bool) -> FileTokens {
198 tokenize_file_inner(path, source, strip_types)
199}
200
201fn tokenize_file_inner(path: &Path, source: &str, strip_types: bool) -> FileTokens {
202 use crate::extract::{
203 extract_astro_frontmatter, extract_mdx_statements, extract_sfc_scripts, is_sfc_file,
204 };
205
206 let ext = path.extension().and_then(|e| e.to_str()).unwrap_or("");
207
208 if is_sfc_file(path) {
210 let scripts = extract_sfc_scripts(source);
211 let mut all_tokens = Vec::new();
212
213 for script in &scripts {
214 let source_type = match (script.is_typescript, script.is_jsx) {
215 (true, true) => SourceType::tsx(),
216 (true, false) => SourceType::ts(),
217 (false, true) => SourceType::jsx(),
218 (false, false) => SourceType::mjs(),
219 };
220 let allocator = Allocator::default();
221 let parser_return = Parser::new(&allocator, &script.body, source_type).parse();
222
223 let mut extractor = TokenExtractor::with_strip_types(strip_types);
224 extractor.visit_program(&parser_return.program);
225
226 let offset = script.byte_offset as u32;
229 for token in &mut extractor.tokens {
230 token.span = Span::new(token.span.start + offset, token.span.end + offset);
231 }
232 all_tokens.extend(extractor.tokens);
233 }
234
235 let line_count = source.lines().count().max(1);
236 return FileTokens {
237 tokens: all_tokens,
238 source: source.to_string(),
239 line_count,
240 };
241 }
242
243 if ext == "astro" {
245 if let Some(script) = extract_astro_frontmatter(source) {
246 let allocator = Allocator::default();
247 let parser_return = Parser::new(&allocator, &script.body, SourceType::ts()).parse();
248
249 let mut extractor = TokenExtractor::with_strip_types(strip_types);
250 extractor.visit_program(&parser_return.program);
251
252 let offset = script.byte_offset as u32;
253 for token in &mut extractor.tokens {
254 token.span = Span::new(token.span.start + offset, token.span.end + offset);
255 }
256
257 let line_count = source.lines().count().max(1);
258 return FileTokens {
259 tokens: extractor.tokens,
260 source: source.to_string(),
261 line_count,
262 };
263 }
264 let line_count = source.lines().count().max(1);
266 return FileTokens {
267 tokens: Vec::new(),
268 source: source.to_string(),
269 line_count,
270 };
271 }
272
273 if ext == "mdx" {
275 let statements = extract_mdx_statements(source);
276 if !statements.is_empty() {
277 let allocator = Allocator::default();
278 let parser_return = Parser::new(&allocator, &statements, SourceType::jsx()).parse();
279
280 let mut extractor = TokenExtractor::with_strip_types(strip_types);
281 extractor.visit_program(&parser_return.program);
282
283 let line_count = source.lines().count().max(1);
284 return FileTokens {
285 tokens: extractor.tokens,
286 source: source.to_string(),
287 line_count,
288 };
289 }
290 let line_count = source.lines().count().max(1);
291 return FileTokens {
292 tokens: Vec::new(),
293 source: source.to_string(),
294 line_count,
295 };
296 }
297
298 if ext == "css" || ext == "scss" {
300 let line_count = source.lines().count().max(1);
301 return FileTokens {
302 tokens: Vec::new(),
303 source: source.to_string(),
304 line_count,
305 };
306 }
307
308 let source_type = SourceType::from_path(path).unwrap_or_default();
309 let allocator = Allocator::default();
310 let parser_return = Parser::new(&allocator, source, source_type).parse();
311
312 let mut extractor = TokenExtractor::with_strip_types(strip_types);
313 extractor.visit_program(&parser_return.program);
314
315 if extractor.tokens.len() < 5 && source.len() > 100 && !source_type.is_jsx() {
318 let jsx_type = if source_type.is_typescript() {
319 SourceType::tsx()
320 } else {
321 SourceType::jsx()
322 };
323 let allocator2 = Allocator::default();
324 let retry_return = Parser::new(&allocator2, source, jsx_type).parse();
325 let mut retry_extractor = TokenExtractor::with_strip_types(strip_types);
326 retry_extractor.visit_program(&retry_return.program);
327 if retry_extractor.tokens.len() > extractor.tokens.len() {
328 extractor = retry_extractor;
329 }
330 }
331
332 let line_count = source.lines().count().max(1);
333
334 FileTokens {
335 tokens: extractor.tokens,
336 source: source.to_string(),
337 line_count,
338 }
339}
340
341struct TokenExtractor {
343 tokens: Vec<SourceToken>,
344 strip_types: bool,
347}
348
349impl TokenExtractor {
350 const fn with_strip_types(strip_types: bool) -> Self {
351 Self {
352 tokens: Vec::new(),
353 strip_types,
354 }
355 }
356
357 fn push(&mut self, kind: TokenKind, span: Span) {
358 self.tokens.push(SourceToken { kind, span });
359 }
360
361 fn push_keyword(&mut self, kw: KeywordType, span: Span) {
362 self.push(TokenKind::Keyword(kw), span);
363 }
364
365 fn push_op(&mut self, op: OperatorType, span: Span) {
366 self.push(TokenKind::Operator(op), span);
367 }
368
369 fn push_punc(&mut self, p: PunctuationType, span: Span) {
370 self.push(TokenKind::Punctuation(p), span);
371 }
372}
373
374impl<'a> Visit<'a> for TokenExtractor {
375 fn visit_variable_declaration(&mut self, decl: &VariableDeclaration<'a>) {
378 let kw = match decl.kind {
379 VariableDeclarationKind::Var => KeywordType::Var,
380 VariableDeclarationKind::Let => KeywordType::Let,
381 VariableDeclarationKind::Const => KeywordType::Const,
382 VariableDeclarationKind::Using | VariableDeclarationKind::AwaitUsing => {
383 KeywordType::Const
384 }
385 };
386 self.push_keyword(kw, decl.span);
387 walk::walk_variable_declaration(self, decl);
388 }
389
390 fn visit_return_statement(&mut self, stmt: &ReturnStatement<'a>) {
391 self.push_keyword(KeywordType::Return, stmt.span);
392 walk::walk_return_statement(self, stmt);
393 }
394
395 fn visit_if_statement(&mut self, stmt: &IfStatement<'a>) {
396 self.push_keyword(KeywordType::If, stmt.span);
397 self.push_punc(PunctuationType::OpenParen, stmt.span);
398 self.visit_expression(&stmt.test);
399 self.push_punc(PunctuationType::CloseParen, stmt.span);
400 self.visit_statement(&stmt.consequent);
401 if let Some(alt) = &stmt.alternate {
402 self.push_keyword(KeywordType::Else, stmt.span);
403 self.visit_statement(alt);
404 }
405 }
406
407 fn visit_for_statement(&mut self, stmt: &ForStatement<'a>) {
408 self.push_keyword(KeywordType::For, stmt.span);
409 self.push_punc(PunctuationType::OpenParen, stmt.span);
410 walk::walk_for_statement(self, stmt);
411 self.push_punc(PunctuationType::CloseParen, stmt.span);
412 }
413
414 fn visit_for_in_statement(&mut self, stmt: &ForInStatement<'a>) {
415 self.push_keyword(KeywordType::For, stmt.span);
416 self.push_punc(PunctuationType::OpenParen, stmt.span);
417 self.visit_for_statement_left(&stmt.left);
418 self.push_keyword(KeywordType::In, stmt.span);
419 self.visit_expression(&stmt.right);
420 self.push_punc(PunctuationType::CloseParen, stmt.span);
421 self.visit_statement(&stmt.body);
422 }
423
424 fn visit_for_of_statement(&mut self, stmt: &ForOfStatement<'a>) {
425 self.push_keyword(KeywordType::For, stmt.span);
426 self.push_punc(PunctuationType::OpenParen, stmt.span);
427 self.visit_for_statement_left(&stmt.left);
428 self.push_keyword(KeywordType::Of, stmt.span);
429 self.visit_expression(&stmt.right);
430 self.push_punc(PunctuationType::CloseParen, stmt.span);
431 self.visit_statement(&stmt.body);
432 }
433
434 fn visit_while_statement(&mut self, stmt: &WhileStatement<'a>) {
435 self.push_keyword(KeywordType::While, stmt.span);
436 self.push_punc(PunctuationType::OpenParen, stmt.span);
437 walk::walk_while_statement(self, stmt);
438 self.push_punc(PunctuationType::CloseParen, stmt.span);
439 }
440
441 fn visit_do_while_statement(&mut self, stmt: &DoWhileStatement<'a>) {
442 self.push_keyword(KeywordType::Do, stmt.span);
443 walk::walk_do_while_statement(self, stmt);
444 }
445
446 fn visit_switch_statement(&mut self, stmt: &SwitchStatement<'a>) {
447 self.push_keyword(KeywordType::Switch, stmt.span);
448 self.push_punc(PunctuationType::OpenParen, stmt.span);
449 walk::walk_switch_statement(self, stmt);
450 self.push_punc(PunctuationType::CloseParen, stmt.span);
451 }
452
453 fn visit_switch_case(&mut self, case: &SwitchCase<'a>) {
454 if case.test.is_some() {
455 self.push_keyword(KeywordType::Case, case.span);
456 } else {
457 self.push_keyword(KeywordType::Default, case.span);
458 }
459 self.push_punc(PunctuationType::Colon, case.span);
460 walk::walk_switch_case(self, case);
461 }
462
463 fn visit_break_statement(&mut self, stmt: &BreakStatement<'a>) {
464 self.push_keyword(KeywordType::Break, stmt.span);
465 }
466
467 fn visit_continue_statement(&mut self, stmt: &ContinueStatement<'a>) {
468 self.push_keyword(KeywordType::Continue, stmt.span);
469 }
470
471 fn visit_throw_statement(&mut self, stmt: &ThrowStatement<'a>) {
472 self.push_keyword(KeywordType::Throw, stmt.span);
473 walk::walk_throw_statement(self, stmt);
474 }
475
476 fn visit_try_statement(&mut self, stmt: &TryStatement<'a>) {
477 self.push_keyword(KeywordType::Try, stmt.span);
478 walk::walk_try_statement(self, stmt);
479 }
480
481 fn visit_catch_clause(&mut self, clause: &CatchClause<'a>) {
482 self.push_keyword(KeywordType::Catch, clause.span);
483 walk::walk_catch_clause(self, clause);
484 }
485
486 fn visit_block_statement(&mut self, block: &BlockStatement<'a>) {
487 self.push_punc(PunctuationType::OpenBrace, block.span);
488 walk::walk_block_statement(self, block);
489 self.push_punc(PunctuationType::CloseBrace, block.span);
490 }
491
492 fn visit_identifier_reference(&mut self, ident: &IdentifierReference<'a>) {
495 self.push(TokenKind::Identifier(ident.name.to_string()), ident.span);
496 }
497
498 fn visit_binding_identifier(&mut self, ident: &BindingIdentifier<'a>) {
499 self.push(TokenKind::Identifier(ident.name.to_string()), ident.span);
500 }
501
502 fn visit_string_literal(&mut self, lit: &StringLiteral<'a>) {
503 self.push(TokenKind::StringLiteral(lit.value.to_string()), lit.span);
504 }
505
506 fn visit_numeric_literal(&mut self, lit: &NumericLiteral<'a>) {
507 let raw_str = lit
508 .raw
509 .as_ref()
510 .map_or_else(|| lit.value.to_string(), |r| r.to_string());
511 self.push(TokenKind::NumericLiteral(raw_str), lit.span);
512 }
513
514 fn visit_boolean_literal(&mut self, lit: &BooleanLiteral) {
515 self.push(TokenKind::BooleanLiteral(lit.value), lit.span);
516 }
517
518 fn visit_null_literal(&mut self, lit: &NullLiteral) {
519 self.push(TokenKind::NullLiteral, lit.span);
520 }
521
522 fn visit_template_literal(&mut self, lit: &TemplateLiteral<'a>) {
523 self.push(TokenKind::TemplateLiteral, lit.span);
524 walk::walk_template_literal(self, lit);
525 }
526
527 fn visit_reg_exp_literal(&mut self, lit: &RegExpLiteral<'a>) {
528 self.push(TokenKind::RegExpLiteral, lit.span);
529 }
530
531 fn visit_this_expression(&mut self, expr: &ThisExpression) {
532 self.push_keyword(KeywordType::This, expr.span);
533 }
534
535 fn visit_super(&mut self, expr: &Super) {
536 self.push_keyword(KeywordType::Super, expr.span);
537 }
538
539 fn visit_array_expression(&mut self, expr: &ArrayExpression<'a>) {
540 self.push_punc(PunctuationType::OpenBracket, expr.span);
541 walk::walk_array_expression(self, expr);
542 self.push_punc(PunctuationType::CloseBracket, expr.span);
543 }
544
545 fn visit_object_expression(&mut self, expr: &ObjectExpression<'a>) {
546 self.push_punc(PunctuationType::OpenBrace, expr.span);
547 walk::walk_object_expression(self, expr);
548 self.push_punc(PunctuationType::CloseBrace, expr.span);
549 }
550
551 fn visit_call_expression(&mut self, expr: &CallExpression<'a>) {
552 self.visit_expression(&expr.callee);
553 let open = point_span(expr.callee.span().end);
557 self.push_punc(PunctuationType::OpenParen, open);
558 for arg in &expr.arguments {
559 self.visit_argument(arg);
560 let comma = point_span(arg.span().end);
561 self.push_op(OperatorType::Comma, comma);
562 }
563 let close = point_span(expr.span.end.saturating_sub(1));
564 self.push_punc(PunctuationType::CloseParen, close);
565 }
566
567 fn visit_new_expression(&mut self, expr: &NewExpression<'a>) {
568 self.push_keyword(KeywordType::New, expr.span);
569 self.visit_expression(&expr.callee);
570 let open = point_span(expr.callee.span().end);
571 self.push_punc(PunctuationType::OpenParen, open);
572 for arg in &expr.arguments {
573 self.visit_argument(arg);
574 let comma = point_span(arg.span().end);
575 self.push_op(OperatorType::Comma, comma);
576 }
577 let close = point_span(expr.span.end.saturating_sub(1));
578 self.push_punc(PunctuationType::CloseParen, close);
579 }
580
581 fn visit_static_member_expression(&mut self, expr: &StaticMemberExpression<'a>) {
582 self.visit_expression(&expr.object);
583 let dot = point_span(expr.object.span().end);
585 self.push_punc(PunctuationType::Dot, dot);
586 self.push(
587 TokenKind::Identifier(expr.property.name.to_string()),
588 expr.property.span,
589 );
590 }
591
592 fn visit_computed_member_expression(&mut self, expr: &ComputedMemberExpression<'a>) {
593 self.visit_expression(&expr.object);
594 let open = point_span(expr.object.span().end);
595 self.push_punc(PunctuationType::OpenBracket, open);
596 self.visit_expression(&expr.expression);
597 let close = point_span(expr.span.end.saturating_sub(1));
598 self.push_punc(PunctuationType::CloseBracket, close);
599 }
600
601 fn visit_assignment_expression(&mut self, expr: &AssignmentExpression<'a>) {
602 self.visit_assignment_target(&expr.left);
603 let op = match expr.operator {
604 AssignmentOperator::Assign => OperatorType::Assign,
605 AssignmentOperator::Addition => OperatorType::AddAssign,
606 AssignmentOperator::Subtraction => OperatorType::SubAssign,
607 AssignmentOperator::Multiplication => OperatorType::MulAssign,
608 AssignmentOperator::Division => OperatorType::DivAssign,
609 AssignmentOperator::Remainder => OperatorType::ModAssign,
610 AssignmentOperator::Exponential => OperatorType::ExpAssign,
611 AssignmentOperator::LogicalAnd => OperatorType::AndAssign,
612 AssignmentOperator::LogicalOr => OperatorType::OrAssign,
613 AssignmentOperator::LogicalNullish => OperatorType::NullishAssign,
614 AssignmentOperator::BitwiseAnd => OperatorType::BitwiseAndAssign,
615 AssignmentOperator::BitwiseOR => OperatorType::BitwiseOrAssign,
616 AssignmentOperator::BitwiseXOR => OperatorType::BitwiseXorAssign,
617 AssignmentOperator::ShiftLeft => OperatorType::ShiftLeftAssign,
618 AssignmentOperator::ShiftRight => OperatorType::ShiftRightAssign,
619 AssignmentOperator::ShiftRightZeroFill => OperatorType::UnsignedShiftRightAssign,
620 };
621 self.push_op(op, expr.span);
622 self.visit_expression(&expr.right);
623 }
624
625 fn visit_binary_expression(&mut self, expr: &BinaryExpression<'a>) {
626 self.visit_expression(&expr.left);
627 let op = match expr.operator {
628 BinaryOperator::Addition => OperatorType::Add,
629 BinaryOperator::Subtraction => OperatorType::Sub,
630 BinaryOperator::Multiplication => OperatorType::Mul,
631 BinaryOperator::Division => OperatorType::Div,
632 BinaryOperator::Remainder => OperatorType::Mod,
633 BinaryOperator::Exponential => OperatorType::Exp,
634 BinaryOperator::Equality => OperatorType::Eq,
635 BinaryOperator::Inequality => OperatorType::NEq,
636 BinaryOperator::StrictEquality => OperatorType::StrictEq,
637 BinaryOperator::StrictInequality => OperatorType::StrictNEq,
638 BinaryOperator::LessThan => OperatorType::Lt,
639 BinaryOperator::GreaterThan => OperatorType::Gt,
640 BinaryOperator::LessEqualThan => OperatorType::LtEq,
641 BinaryOperator::GreaterEqualThan => OperatorType::GtEq,
642 BinaryOperator::BitwiseAnd => OperatorType::BitwiseAnd,
643 BinaryOperator::BitwiseOR => OperatorType::BitwiseOr,
644 BinaryOperator::BitwiseXOR => OperatorType::BitwiseXor,
645 BinaryOperator::ShiftLeft => OperatorType::ShiftLeft,
646 BinaryOperator::ShiftRight => OperatorType::ShiftRight,
647 BinaryOperator::ShiftRightZeroFill => OperatorType::UnsignedShiftRight,
648 BinaryOperator::Instanceof => OperatorType::Instanceof,
649 BinaryOperator::In => OperatorType::In,
650 };
651 self.push_op(op, expr.span);
652 self.visit_expression(&expr.right);
653 }
654
655 fn visit_logical_expression(&mut self, expr: &LogicalExpression<'a>) {
656 self.visit_expression(&expr.left);
657 let op = match expr.operator {
658 LogicalOperator::And => OperatorType::And,
659 LogicalOperator::Or => OperatorType::Or,
660 LogicalOperator::Coalesce => OperatorType::NullishCoalescing,
661 };
662 self.push_op(op, expr.span);
663 self.visit_expression(&expr.right);
664 }
665
666 fn visit_unary_expression(&mut self, expr: &UnaryExpression<'a>) {
667 let op = match expr.operator {
668 UnaryOperator::UnaryPlus => OperatorType::Add,
669 UnaryOperator::UnaryNegation => OperatorType::Sub,
670 UnaryOperator::LogicalNot => OperatorType::Not,
671 UnaryOperator::BitwiseNot => OperatorType::BitwiseNot,
672 UnaryOperator::Typeof => {
673 self.push_keyword(KeywordType::Typeof, expr.span);
674 walk::walk_unary_expression(self, expr);
675 return;
676 }
677 UnaryOperator::Void => {
678 self.push_keyword(KeywordType::Void, expr.span);
679 walk::walk_unary_expression(self, expr);
680 return;
681 }
682 UnaryOperator::Delete => {
683 self.push_keyword(KeywordType::Delete, expr.span);
684 walk::walk_unary_expression(self, expr);
685 return;
686 }
687 };
688 self.push_op(op, expr.span);
689 walk::walk_unary_expression(self, expr);
690 }
691
692 fn visit_update_expression(&mut self, expr: &UpdateExpression<'a>) {
693 let op = match expr.operator {
694 UpdateOperator::Increment => OperatorType::Increment,
695 UpdateOperator::Decrement => OperatorType::Decrement,
696 };
697 if expr.prefix {
698 self.push_op(op, expr.span);
699 }
700 walk::walk_update_expression(self, expr);
701 if !expr.prefix {
702 self.push_op(op, expr.span);
703 }
704 }
705
706 fn visit_conditional_expression(&mut self, expr: &ConditionalExpression<'a>) {
707 self.visit_expression(&expr.test);
708 self.push_op(OperatorType::Ternary, expr.span);
709 self.visit_expression(&expr.consequent);
710 self.push_punc(PunctuationType::Colon, expr.span);
711 self.visit_expression(&expr.alternate);
712 }
713
714 fn visit_arrow_function_expression(&mut self, expr: &ArrowFunctionExpression<'a>) {
715 if expr.r#async {
716 self.push_keyword(KeywordType::Async, expr.span);
717 }
718 let params_span = expr.params.span;
719 self.push_punc(PunctuationType::OpenParen, point_span(params_span.start));
720 for param in &expr.params.items {
721 self.visit_binding_pattern(¶m.pattern);
722 self.push_op(OperatorType::Comma, point_span(param.span.end));
723 }
724 self.push_punc(
725 PunctuationType::CloseParen,
726 point_span(params_span.end.saturating_sub(1)),
727 );
728 self.push_op(OperatorType::Arrow, point_span(params_span.end));
729 walk::walk_arrow_function_expression(self, expr);
730 }
731
732 fn visit_yield_expression(&mut self, expr: &YieldExpression<'a>) {
733 self.push_keyword(KeywordType::Yield, expr.span);
734 walk::walk_yield_expression(self, expr);
735 }
736
737 fn visit_await_expression(&mut self, expr: &AwaitExpression<'a>) {
738 self.push_keyword(KeywordType::Await, expr.span);
739 walk::walk_await_expression(self, expr);
740 }
741
742 fn visit_spread_element(&mut self, elem: &SpreadElement<'a>) {
743 self.push_op(OperatorType::Spread, elem.span);
744 walk::walk_spread_element(self, elem);
745 }
746
747 fn visit_sequence_expression(&mut self, expr: &SequenceExpression<'a>) {
748 for (i, sub_expr) in expr.expressions.iter().enumerate() {
749 if i > 0 {
750 self.push_op(OperatorType::Comma, expr.span);
751 }
752 self.visit_expression(sub_expr);
753 }
754 }
755
756 fn visit_function(&mut self, func: &Function<'a>, flags: ScopeFlags) {
759 if func.r#async {
760 self.push_keyword(KeywordType::Async, func.span);
761 }
762 self.push_keyword(KeywordType::Function, func.span);
763 if let Some(id) = &func.id {
764 self.push(TokenKind::Identifier(id.name.to_string()), id.span);
765 }
766 let params_span = func.params.span;
767 self.push_punc(PunctuationType::OpenParen, point_span(params_span.start));
768 for param in &func.params.items {
769 self.visit_binding_pattern(¶m.pattern);
770 self.push_op(OperatorType::Comma, point_span(param.span.end));
771 }
772 self.push_punc(
773 PunctuationType::CloseParen,
774 point_span(params_span.end.saturating_sub(1)),
775 );
776 walk::walk_function(self, func, flags);
777 }
778
779 fn visit_class(&mut self, class: &Class<'a>) {
782 self.push_keyword(KeywordType::Class, class.span);
783 if let Some(id) = &class.id {
784 self.push(TokenKind::Identifier(id.name.to_string()), id.span);
785 }
786 if class.super_class.is_some() {
787 self.push_keyword(KeywordType::Extends, class.span);
788 }
789 walk::walk_class(self, class);
790 }
791
792 fn visit_import_declaration(&mut self, decl: &ImportDeclaration<'a>) {
795 if self.strip_types && decl.import_kind.is_type() {
797 return;
798 }
799 self.push_keyword(KeywordType::Import, decl.span);
800 walk::walk_import_declaration(self, decl);
801 self.push_keyword(KeywordType::From, decl.span);
802 self.push(
803 TokenKind::StringLiteral(decl.source.value.to_string()),
804 decl.source.span,
805 );
806 }
807
808 fn visit_export_named_declaration(&mut self, decl: &ExportNamedDeclaration<'a>) {
809 if self.strip_types && decl.export_kind.is_type() {
811 return;
812 }
813 self.push_keyword(KeywordType::Export, decl.span);
814 walk::walk_export_named_declaration(self, decl);
815 }
816
817 fn visit_export_default_declaration(&mut self, decl: &ExportDefaultDeclaration<'a>) {
818 self.push_keyword(KeywordType::Export, decl.span);
819 self.push_keyword(KeywordType::Default, decl.span);
820 walk::walk_export_default_declaration(self, decl);
821 }
822
823 fn visit_export_all_declaration(&mut self, decl: &ExportAllDeclaration<'a>) {
824 self.push_keyword(KeywordType::Export, decl.span);
825 self.push_keyword(KeywordType::From, decl.span);
826 self.push(
827 TokenKind::StringLiteral(decl.source.value.to_string()),
828 decl.source.span,
829 );
830 }
831
832 fn visit_ts_interface_declaration(&mut self, decl: &TSInterfaceDeclaration<'a>) {
835 if self.strip_types {
836 return; }
838 self.push_keyword(KeywordType::Interface, decl.span);
839 walk::walk_ts_interface_declaration(self, decl);
840 }
841
842 fn visit_ts_interface_body(&mut self, body: &TSInterfaceBody<'a>) {
843 self.push_punc(PunctuationType::OpenBrace, body.span);
844 walk::walk_ts_interface_body(self, body);
845 self.push_punc(PunctuationType::CloseBrace, body.span);
846 }
847
848 fn visit_ts_type_alias_declaration(&mut self, decl: &TSTypeAliasDeclaration<'a>) {
849 if self.strip_types {
850 return; }
852 self.push_keyword(KeywordType::Type, decl.span);
853 walk::walk_ts_type_alias_declaration(self, decl);
854 }
855
856 fn visit_ts_module_declaration(&mut self, decl: &TSModuleDeclaration<'a>) {
857 if self.strip_types && decl.declare {
858 return; }
860 walk::walk_ts_module_declaration(self, decl);
861 }
862
863 fn visit_ts_enum_declaration(&mut self, decl: &TSEnumDeclaration<'a>) {
864 self.push_keyword(KeywordType::Enum, decl.span);
865 walk::walk_ts_enum_declaration(self, decl);
866 }
867
868 fn visit_ts_enum_body(&mut self, body: &TSEnumBody<'a>) {
869 self.push_punc(PunctuationType::OpenBrace, body.span);
870 walk::walk_ts_enum_body(self, body);
871 self.push_punc(PunctuationType::CloseBrace, body.span);
872 }
873
874 fn visit_ts_property_signature(&mut self, sig: &TSPropertySignature<'a>) {
875 walk::walk_ts_property_signature(self, sig);
876 self.push_punc(PunctuationType::Semicolon, sig.span);
877 }
878
879 fn visit_ts_type_annotation(&mut self, ann: &TSTypeAnnotation<'a>) {
880 if self.strip_types {
881 return; }
883 self.push_punc(PunctuationType::Colon, ann.span);
884 walk::walk_ts_type_annotation(self, ann);
885 }
886
887 fn visit_ts_type_parameter_declaration(&mut self, decl: &TSTypeParameterDeclaration<'a>) {
888 if self.strip_types {
889 return; }
891 walk::walk_ts_type_parameter_declaration(self, decl);
892 }
893
894 fn visit_ts_type_parameter_instantiation(&mut self, inst: &TSTypeParameterInstantiation<'a>) {
895 if self.strip_types {
896 return; }
898 walk::walk_ts_type_parameter_instantiation(self, inst);
899 }
900
901 fn visit_ts_as_expression(&mut self, expr: &TSAsExpression<'a>) {
902 self.visit_expression(&expr.expression);
903 if !self.strip_types {
904 self.push_keyword(KeywordType::As, expr.span);
905 self.visit_ts_type(&expr.type_annotation);
906 }
907 }
908
909 fn visit_ts_satisfies_expression(&mut self, expr: &TSSatisfiesExpression<'a>) {
910 self.visit_expression(&expr.expression);
911 if !self.strip_types {
912 self.push_keyword(KeywordType::Satisfies, expr.span);
913 self.visit_ts_type(&expr.type_annotation);
914 }
915 }
916
917 fn visit_ts_non_null_expression(&mut self, expr: &TSNonNullExpression<'a>) {
918 self.visit_expression(&expr.expression);
919 }
921
922 fn visit_identifier_name(&mut self, ident: &IdentifierName<'a>) {
923 self.push(TokenKind::Identifier(ident.name.to_string()), ident.span);
924 }
925
926 fn visit_ts_string_keyword(&mut self, it: &TSStringKeyword) {
927 self.push(TokenKind::Identifier("string".to_string()), it.span);
928 }
929
930 fn visit_ts_number_keyword(&mut self, it: &TSNumberKeyword) {
931 self.push(TokenKind::Identifier("number".to_string()), it.span);
932 }
933
934 fn visit_ts_boolean_keyword(&mut self, it: &TSBooleanKeyword) {
935 self.push(TokenKind::Identifier("boolean".to_string()), it.span);
936 }
937
938 fn visit_ts_any_keyword(&mut self, it: &TSAnyKeyword) {
939 self.push(TokenKind::Identifier("any".to_string()), it.span);
940 }
941
942 fn visit_ts_void_keyword(&mut self, it: &TSVoidKeyword) {
943 self.push(TokenKind::Identifier("void".to_string()), it.span);
944 }
945
946 fn visit_ts_null_keyword(&mut self, it: &TSNullKeyword) {
947 self.push(TokenKind::NullLiteral, it.span);
948 }
949
950 fn visit_ts_undefined_keyword(&mut self, it: &TSUndefinedKeyword) {
951 self.push(TokenKind::Identifier("undefined".to_string()), it.span);
952 }
953
954 fn visit_ts_never_keyword(&mut self, it: &TSNeverKeyword) {
955 self.push(TokenKind::Identifier("never".to_string()), it.span);
956 }
957
958 fn visit_ts_unknown_keyword(&mut self, it: &TSUnknownKeyword) {
959 self.push(TokenKind::Identifier("unknown".to_string()), it.span);
960 }
961
962 fn visit_jsx_opening_element(&mut self, elem: &JSXOpeningElement<'a>) {
965 self.push_punc(PunctuationType::OpenBracket, elem.span);
966 walk::walk_jsx_opening_element(self, elem);
967 self.push_punc(PunctuationType::CloseBracket, elem.span);
968 }
969
970 fn visit_jsx_closing_element(&mut self, elem: &JSXClosingElement<'a>) {
971 self.push_punc(PunctuationType::OpenBracket, elem.span);
972 walk::walk_jsx_closing_element(self, elem);
973 self.push_punc(PunctuationType::CloseBracket, elem.span);
974 }
975
976 fn visit_jsx_identifier(&mut self, ident: &JSXIdentifier<'a>) {
977 self.push(TokenKind::Identifier(ident.name.to_string()), ident.span);
978 }
979
980 fn visit_jsx_spread_attribute(&mut self, attr: &JSXSpreadAttribute<'a>) {
981 self.push_op(OperatorType::Spread, attr.span);
982 walk::walk_jsx_spread_attribute(self, attr);
983 }
984
985 fn visit_variable_declarator(&mut self, decl: &VariableDeclarator<'a>) {
988 self.visit_binding_pattern(&decl.id);
989 if let Some(init) = &decl.init {
990 self.push_op(OperatorType::Assign, decl.span);
991 self.visit_expression(init);
992 }
993 self.push_punc(PunctuationType::Semicolon, decl.span);
994 }
995
996 fn visit_expression_statement(&mut self, stmt: &ExpressionStatement<'a>) {
997 walk::walk_expression_statement(self, stmt);
998 self.push_punc(PunctuationType::Semicolon, stmt.span);
999 }
1000}
1001
1002#[cfg(test)]
1003mod tests {
1004 use super::*;
1005 use std::path::PathBuf;
1006
1007 fn tokenize(code: &str) -> Vec<SourceToken> {
1008 let path = PathBuf::from("test.ts");
1009 tokenize_file(&path, code).tokens
1010 }
1011
1012 #[test]
1013 fn tokenize_variable_declaration() {
1014 let tokens = tokenize("const x = 42;");
1015 assert!(!tokens.is_empty());
1016 assert!(matches!(
1018 tokens[0].kind,
1019 TokenKind::Keyword(KeywordType::Const)
1020 ));
1021 }
1022
1023 #[test]
1024 fn tokenize_function_declaration() {
1025 let tokens = tokenize("function foo() { return 1; }");
1026 assert!(!tokens.is_empty());
1027 assert!(matches!(
1028 tokens[0].kind,
1029 TokenKind::Keyword(KeywordType::Function)
1030 ));
1031 }
1032
1033 #[test]
1034 fn tokenize_arrow_function() {
1035 let tokens = tokenize("const f = (a, b) => a + b;");
1036 assert!(!tokens.is_empty());
1037 let has_arrow = tokens
1038 .iter()
1039 .any(|t| matches!(t.kind, TokenKind::Operator(OperatorType::Arrow)));
1040 assert!(has_arrow, "Should contain arrow operator");
1041 }
1042
1043 #[test]
1044 fn tokenize_if_else() {
1045 let tokens = tokenize("if (x) { y; } else { z; }");
1046 assert!(!tokens.is_empty());
1047 assert!(matches!(
1048 tokens[0].kind,
1049 TokenKind::Keyword(KeywordType::If)
1050 ));
1051 let has_else = tokens
1052 .iter()
1053 .any(|t| matches!(t.kind, TokenKind::Keyword(KeywordType::Else)));
1054 assert!(has_else, "Should contain else keyword");
1055 }
1056
1057 #[test]
1058 fn tokenize_class() {
1059 let tokens = tokenize("class Foo extends Bar { }");
1060 assert!(!tokens.is_empty());
1061 assert!(matches!(
1062 tokens[0].kind,
1063 TokenKind::Keyword(KeywordType::Class)
1064 ));
1065 let has_extends = tokens
1066 .iter()
1067 .any(|t| matches!(t.kind, TokenKind::Keyword(KeywordType::Extends)));
1068 assert!(has_extends, "Should contain extends keyword");
1069 }
1070
1071 #[test]
1072 fn tokenize_string_literal() {
1073 let tokens = tokenize("const s = \"hello\";");
1074 let has_string = tokens
1075 .iter()
1076 .any(|t| matches!(&t.kind, TokenKind::StringLiteral(s) if s == "hello"));
1077 assert!(has_string, "Should contain string literal");
1078 }
1079
1080 #[test]
1081 fn tokenize_boolean_literal() {
1082 let tokens = tokenize("const b = true;");
1083 let has_bool = tokens
1084 .iter()
1085 .any(|t| matches!(t.kind, TokenKind::BooleanLiteral(true)));
1086 assert!(has_bool, "Should contain boolean literal");
1087 }
1088
1089 #[test]
1090 fn tokenize_null_literal() {
1091 let tokens = tokenize("const n = null;");
1092 let has_null = tokens
1093 .iter()
1094 .any(|t| matches!(t.kind, TokenKind::NullLiteral));
1095 assert!(has_null, "Should contain null literal");
1096 }
1097
1098 #[test]
1099 fn tokenize_empty_file() {
1100 let tokens = tokenize("");
1101 assert!(tokens.is_empty());
1102 }
1103
1104 #[test]
1105 fn tokenize_ts_interface() {
1106 let tokens = tokenize("interface Foo { bar: string; baz: number; }");
1107 let has_interface = tokens
1108 .iter()
1109 .any(|t| matches!(t.kind, TokenKind::Keyword(KeywordType::Interface)));
1110 assert!(has_interface, "Should contain interface keyword");
1111 let has_bar = tokens
1112 .iter()
1113 .any(|t| matches!(&t.kind, TokenKind::Identifier(name) if name == "bar"));
1114 assert!(has_bar, "Should contain property name 'bar'");
1115 let has_string = tokens
1116 .iter()
1117 .any(|t| matches!(&t.kind, TokenKind::Identifier(name) if name == "string"));
1118 assert!(has_string, "Should contain type 'string'");
1119 assert!(
1121 tokens.len() >= 10,
1122 "Interface should produce sufficient tokens, got {}",
1123 tokens.len()
1124 );
1125 }
1126
1127 #[test]
1128 fn tokenize_ts_type_alias() {
1129 let tokens = tokenize("type Result = { ok: boolean; error: string; }");
1130 let has_type = tokens
1131 .iter()
1132 .any(|t| matches!(t.kind, TokenKind::Keyword(KeywordType::Type)));
1133 assert!(has_type, "Should contain type keyword");
1134 }
1135
1136 #[test]
1137 fn tokenize_ts_enum() {
1138 let tokens = tokenize("enum Color { Red, Green, Blue }");
1139 let has_enum = tokens
1140 .iter()
1141 .any(|t| matches!(t.kind, TokenKind::Keyword(KeywordType::Enum)));
1142 assert!(has_enum, "Should contain enum keyword");
1143 let has_red = tokens
1144 .iter()
1145 .any(|t| matches!(&t.kind, TokenKind::Identifier(name) if name == "Red"));
1146 assert!(has_red, "Should contain enum member 'Red'");
1147 }
1148
1149 fn tokenize_tsx(code: &str) -> Vec<SourceToken> {
1150 let path = PathBuf::from("test.tsx");
1151 tokenize_file(&path, code).tokens
1152 }
1153
1154 fn tokenize_cross_language(code: &str) -> Vec<SourceToken> {
1155 let path = PathBuf::from("test.ts");
1156 tokenize_file_cross_language(&path, code, true).tokens
1157 }
1158
1159 #[test]
1160 fn tokenize_jsx_element() {
1161 let tokens =
1162 tokenize_tsx("const x = <div className=\"foo\"><Button onClick={handler} /></div>;");
1163 let has_div = tokens
1164 .iter()
1165 .any(|t| matches!(&t.kind, TokenKind::Identifier(name) if name == "div"));
1166 assert!(has_div, "Should contain JSX element name 'div'");
1167 let has_classname = tokens
1168 .iter()
1169 .any(|t| matches!(&t.kind, TokenKind::Identifier(name) if name == "className"));
1170 assert!(has_classname, "Should contain JSX attribute 'className'");
1171 let brackets = tokens
1172 .iter()
1173 .filter(|t| {
1174 matches!(
1175 t.kind,
1176 TokenKind::Punctuation(PunctuationType::OpenBracket)
1177 | TokenKind::Punctuation(PunctuationType::CloseBracket)
1178 )
1179 })
1180 .count();
1181 assert!(
1182 brackets >= 4,
1183 "Should contain JSX angle brackets, got {brackets}"
1184 );
1185 }
1186
1187 #[test]
1190 fn strip_types_removes_parameter_type_annotations() {
1191 let ts_tokens = tokenize("function foo(x: string) { return x; }");
1192 let stripped = tokenize_cross_language("function foo(x: string) { return x; }");
1193
1194 assert!(
1196 stripped.len() < ts_tokens.len(),
1197 "Stripped tokens ({}) should be fewer than full tokens ({})",
1198 stripped.len(),
1199 ts_tokens.len()
1200 );
1201
1202 let has_colon_before_string = ts_tokens.windows(2).any(|w| {
1204 matches!(w[0].kind, TokenKind::Punctuation(PunctuationType::Colon))
1205 && matches!(&w[1].kind, TokenKind::Identifier(n) if n == "string")
1206 });
1207 assert!(has_colon_before_string, "Original should have `: string`");
1208
1209 let js_tokens = {
1211 let path = PathBuf::from("test.js");
1212 tokenize_file(&path, "function foo(x) { return x; }").tokens
1213 };
1214 assert_eq!(
1215 stripped.len(),
1216 js_tokens.len(),
1217 "Stripped TS should produce same token count as JS"
1218 );
1219 }
1220
1221 #[test]
1222 fn strip_types_removes_return_type_annotations() {
1223 let stripped = tokenize_cross_language("function foo(): string { return 'hello'; }");
1224 let has_string_type = stripped.iter().enumerate().any(|(i, t)| {
1226 matches!(&t.kind, TokenKind::Identifier(n) if n == "string")
1227 && i > 0
1228 && matches!(
1229 stripped[i - 1].kind,
1230 TokenKind::Punctuation(PunctuationType::Colon)
1231 )
1232 });
1233 assert!(
1234 !has_string_type,
1235 "Stripped version should not have return type annotation"
1236 );
1237 }
1238
1239 #[test]
1240 fn strip_types_removes_interface_declarations() {
1241 let stripped = tokenize_cross_language("interface Foo { bar: string; }\nconst x = 42;");
1242 let has_interface = stripped
1244 .iter()
1245 .any(|t| matches!(t.kind, TokenKind::Keyword(KeywordType::Interface)));
1246 assert!(
1247 !has_interface,
1248 "Stripped version should not contain interface declaration"
1249 );
1250 let has_const = stripped
1252 .iter()
1253 .any(|t| matches!(t.kind, TokenKind::Keyword(KeywordType::Const)));
1254 assert!(has_const, "Should still contain const keyword");
1255 }
1256
1257 #[test]
1258 fn strip_types_removes_type_alias_declarations() {
1259 let stripped = tokenize_cross_language("type Result = string | number;\nconst x = 42;");
1260 let has_type = stripped
1261 .iter()
1262 .any(|t| matches!(t.kind, TokenKind::Keyword(KeywordType::Type)));
1263 assert!(!has_type, "Stripped version should not contain type alias");
1264 let has_const = stripped
1265 .iter()
1266 .any(|t| matches!(t.kind, TokenKind::Keyword(KeywordType::Const)));
1267 assert!(has_const, "Should still contain const keyword");
1268 }
1269
1270 #[test]
1271 fn strip_types_preserves_runtime_code() {
1272 let stripped =
1273 tokenize_cross_language("const x: number = 42;\nif (x > 0) { console.log(x); }");
1274 let has_const = stripped
1276 .iter()
1277 .any(|t| matches!(t.kind, TokenKind::Keyword(KeywordType::Const)));
1278 let has_if = stripped
1279 .iter()
1280 .any(|t| matches!(t.kind, TokenKind::Keyword(KeywordType::If)));
1281 let has_42 = stripped
1282 .iter()
1283 .any(|t| matches!(&t.kind, TokenKind::NumericLiteral(n) if n == "42"));
1284 assert!(has_const, "Should preserve const");
1285 assert!(has_if, "Should preserve if");
1286 assert!(has_42, "Should preserve numeric literal");
1287 }
1288
1289 #[test]
1290 fn strip_types_preserves_enums() {
1291 let stripped = tokenize_cross_language("enum Color { Red, Green, Blue }");
1293 let has_enum = stripped
1294 .iter()
1295 .any(|t| matches!(t.kind, TokenKind::Keyword(KeywordType::Enum)));
1296 assert!(
1297 has_enum,
1298 "Enums should be preserved (they have runtime semantics)"
1299 );
1300 }
1301
1302 #[test]
1303 fn strip_types_removes_import_type() {
1304 let stripped = tokenize_cross_language("import type { Foo } from './foo';\nconst x = 42;");
1305 let import_count = stripped
1307 .iter()
1308 .filter(|t| matches!(t.kind, TokenKind::Keyword(KeywordType::Import)))
1309 .count();
1310 assert_eq!(import_count, 0, "import type should be stripped");
1311 let has_const = stripped
1313 .iter()
1314 .any(|t| matches!(t.kind, TokenKind::Keyword(KeywordType::Const)));
1315 assert!(has_const, "Runtime code should be preserved");
1316 }
1317
1318 #[test]
1319 fn strip_types_preserves_value_imports() {
1320 let stripped = tokenize_cross_language("import { foo } from './foo';\nconst x = foo();");
1321 let has_import = stripped
1322 .iter()
1323 .any(|t| matches!(t.kind, TokenKind::Keyword(KeywordType::Import)));
1324 assert!(has_import, "Value imports should be preserved");
1325 }
1326
1327 #[test]
1328 fn strip_types_removes_export_type() {
1329 let stripped = tokenize_cross_language("export type { Foo };\nconst x = 42;");
1330 let export_count = stripped
1332 .iter()
1333 .filter(|t| matches!(t.kind, TokenKind::Keyword(KeywordType::Export)))
1334 .count();
1335 assert_eq!(export_count, 0, "export type should be stripped");
1336 }
1337
1338 #[test]
1339 fn strip_types_removes_declare_module() {
1340 let stripped = tokenize_cross_language(
1341 "declare module 'foo' { export function bar(): void; }\nconst x = 42;",
1342 );
1343 let has_function_keyword = stripped
1345 .iter()
1346 .any(|t| matches!(t.kind, TokenKind::Keyword(KeywordType::Function)));
1347 assert!(
1348 !has_function_keyword,
1349 "declare module contents should be stripped"
1350 );
1351 let has_const = stripped
1352 .iter()
1353 .any(|t| matches!(t.kind, TokenKind::Keyword(KeywordType::Const)));
1354 assert!(has_const, "Runtime code should be preserved");
1355 }
1356}