1use std::path::Path;
2
3use oxc_allocator::Allocator;
4use oxc_ast::ast::*;
5use oxc_ast_visit::Visit;
6use oxc_ast_visit::walk;
7use oxc_parser::Parser;
8use oxc_span::{GetSpan, SourceType, Span};
9use oxc_syntax::scope::ScopeFlags;
10
11#[derive(Debug, Clone)]
13pub struct SourceToken {
14 pub kind: TokenKind,
16 pub span: Span,
18}
19
20#[derive(Debug, Clone, PartialEq, Eq, Hash)]
22pub enum TokenKind {
23 Keyword(KeywordType),
25 Identifier(String),
27 StringLiteral(String),
29 NumericLiteral(String),
30 BooleanLiteral(bool),
31 NullLiteral,
32 TemplateLiteral,
33 RegExpLiteral,
34 Operator(OperatorType),
36 Punctuation(PunctuationType),
38}
39
40#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash)]
42pub enum KeywordType {
43 Var,
44 Let,
45 Const,
46 Function,
47 Return,
48 If,
49 Else,
50 For,
51 While,
52 Do,
53 Switch,
54 Case,
55 Break,
56 Continue,
57 Default,
58 Throw,
59 Try,
60 Catch,
61 Finally,
62 New,
63 Delete,
64 Typeof,
65 Instanceof,
66 In,
67 Of,
68 Void,
69 This,
70 Super,
71 Class,
72 Extends,
73 Import,
74 Export,
75 From,
76 As,
77 Async,
78 Await,
79 Yield,
80 Static,
81 Get,
82 Set,
83 Type,
84 Interface,
85 Enum,
86 Implements,
87 Abstract,
88 Declare,
89 Readonly,
90 Keyof,
91 Satisfies,
92}
93
94#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash)]
96pub enum OperatorType {
97 Assign,
98 Add,
99 Sub,
100 Mul,
101 Div,
102 Mod,
103 Exp,
104 Eq,
105 NEq,
106 StrictEq,
107 StrictNEq,
108 Lt,
109 Gt,
110 LtEq,
111 GtEq,
112 And,
113 Or,
114 Not,
115 BitwiseAnd,
116 BitwiseOr,
117 BitwiseXor,
118 BitwiseNot,
119 ShiftLeft,
120 ShiftRight,
121 UnsignedShiftRight,
122 NullishCoalescing,
123 OptionalChaining,
124 Spread,
125 Ternary,
126 Arrow,
127 Comma,
128 AddAssign,
129 SubAssign,
130 MulAssign,
131 DivAssign,
132 ModAssign,
133 ExpAssign,
134 AndAssign,
135 OrAssign,
136 NullishAssign,
137 BitwiseAndAssign,
138 BitwiseOrAssign,
139 BitwiseXorAssign,
140 ShiftLeftAssign,
141 ShiftRightAssign,
142 UnsignedShiftRightAssign,
143 Increment,
144 Decrement,
145 Instanceof,
146 In,
147}
148
149#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash)]
151pub enum PunctuationType {
152 OpenParen,
153 CloseParen,
154 OpenBrace,
155 CloseBrace,
156 OpenBracket,
157 CloseBracket,
158 Semicolon,
159 Colon,
160 Dot,
161}
162
163#[derive(Debug, Clone)]
165pub struct FileTokens {
166 pub tokens: Vec<SourceToken>,
168 pub source: String,
170 pub line_count: usize,
172}
173
174const fn point_span(pos: u32) -> Span {
180 Span::new(pos, pos + 1)
181}
182
183pub fn tokenize_file(path: &Path, source: &str) -> FileTokens {
193 tokenize_file_inner(path, source, false)
194}
195
196pub fn tokenize_file_cross_language(path: &Path, source: &str, strip_types: bool) -> FileTokens {
198 tokenize_file_inner(path, source, strip_types)
199}
200
201fn tokenize_file_inner(path: &Path, source: &str, strip_types: bool) -> FileTokens {
202 use crate::extract::{
203 extract_astro_frontmatter, extract_mdx_statements, extract_sfc_scripts, is_sfc_file,
204 };
205
206 let ext = path.extension().and_then(|e| e.to_str()).unwrap_or("");
207
208 if is_sfc_file(path) {
210 let scripts = extract_sfc_scripts(source);
211 let mut all_tokens = Vec::new();
212
213 for script in &scripts {
214 let source_type = match (script.is_typescript, script.is_jsx) {
215 (true, true) => SourceType::tsx(),
216 (true, false) => SourceType::ts(),
217 (false, true) => SourceType::jsx(),
218 (false, false) => SourceType::mjs(),
219 };
220 let allocator = Allocator::default();
221 let parser_return = Parser::new(&allocator, &script.body, source_type).parse();
222
223 let mut extractor = TokenExtractor::with_strip_types(strip_types);
224 extractor.visit_program(&parser_return.program);
225
226 let offset = script.byte_offset as u32;
229 for token in &mut extractor.tokens {
230 token.span = Span::new(token.span.start + offset, token.span.end + offset);
231 }
232 all_tokens.extend(extractor.tokens);
233 }
234
235 let line_count = source.lines().count().max(1);
236 return FileTokens {
237 tokens: all_tokens,
238 source: source.to_string(),
239 line_count,
240 };
241 }
242
243 if ext == "astro" {
245 if let Some(script) = extract_astro_frontmatter(source) {
246 let allocator = Allocator::default();
247 let parser_return = Parser::new(&allocator, &script.body, SourceType::ts()).parse();
248
249 let mut extractor = TokenExtractor::with_strip_types(strip_types);
250 extractor.visit_program(&parser_return.program);
251
252 let offset = script.byte_offset as u32;
253 for token in &mut extractor.tokens {
254 token.span = Span::new(token.span.start + offset, token.span.end + offset);
255 }
256
257 let line_count = source.lines().count().max(1);
258 return FileTokens {
259 tokens: extractor.tokens,
260 source: source.to_string(),
261 line_count,
262 };
263 }
264 let line_count = source.lines().count().max(1);
266 return FileTokens {
267 tokens: Vec::new(),
268 source: source.to_string(),
269 line_count,
270 };
271 }
272
273 if ext == "mdx" {
275 let statements = extract_mdx_statements(source);
276 if !statements.is_empty() {
277 let allocator = Allocator::default();
278 let parser_return = Parser::new(&allocator, &statements, SourceType::jsx()).parse();
279
280 let mut extractor = TokenExtractor::with_strip_types(strip_types);
281 extractor.visit_program(&parser_return.program);
282
283 let line_count = source.lines().count().max(1);
284 return FileTokens {
285 tokens: extractor.tokens,
286 source: source.to_string(),
287 line_count,
288 };
289 }
290 let line_count = source.lines().count().max(1);
291 return FileTokens {
292 tokens: Vec::new(),
293 source: source.to_string(),
294 line_count,
295 };
296 }
297
298 if ext == "css" || ext == "scss" {
300 let line_count = source.lines().count().max(1);
301 return FileTokens {
302 tokens: Vec::new(),
303 source: source.to_string(),
304 line_count,
305 };
306 }
307
308 let source_type = SourceType::from_path(path).unwrap_or_default();
309 let allocator = Allocator::default();
310 let parser_return = Parser::new(&allocator, source, source_type).parse();
311
312 let mut extractor = TokenExtractor::with_strip_types(strip_types);
313 extractor.visit_program(&parser_return.program);
314
315 if extractor.tokens.len() < 5 && source.len() > 100 && !source_type.is_jsx() {
318 let jsx_type = if source_type.is_typescript() {
319 SourceType::tsx()
320 } else {
321 SourceType::jsx()
322 };
323 let allocator2 = Allocator::default();
324 let retry_return = Parser::new(&allocator2, source, jsx_type).parse();
325 let mut retry_extractor = TokenExtractor::with_strip_types(strip_types);
326 retry_extractor.visit_program(&retry_return.program);
327 if retry_extractor.tokens.len() > extractor.tokens.len() {
328 extractor = retry_extractor;
329 }
330 }
331
332 let line_count = source.lines().count().max(1);
333
334 FileTokens {
335 tokens: extractor.tokens,
336 source: source.to_string(),
337 line_count,
338 }
339}
340
341struct TokenExtractor {
343 tokens: Vec<SourceToken>,
344 strip_types: bool,
347}
348
349impl TokenExtractor {
350 const fn with_strip_types(strip_types: bool) -> Self {
351 Self {
352 tokens: Vec::new(),
353 strip_types,
354 }
355 }
356
357 fn push(&mut self, kind: TokenKind, span: Span) {
358 self.tokens.push(SourceToken { kind, span });
359 }
360
361 fn push_keyword(&mut self, kw: KeywordType, span: Span) {
362 self.push(TokenKind::Keyword(kw), span);
363 }
364
365 fn push_op(&mut self, op: OperatorType, span: Span) {
366 self.push(TokenKind::Operator(op), span);
367 }
368
369 fn push_punc(&mut self, p: PunctuationType, span: Span) {
370 self.push(TokenKind::Punctuation(p), span);
371 }
372}
373
374impl<'a> Visit<'a> for TokenExtractor {
375 fn visit_variable_declaration(&mut self, decl: &VariableDeclaration<'a>) {
378 let kw = match decl.kind {
379 VariableDeclarationKind::Var => KeywordType::Var,
380 VariableDeclarationKind::Let => KeywordType::Let,
381 VariableDeclarationKind::Const => KeywordType::Const,
382 VariableDeclarationKind::Using | VariableDeclarationKind::AwaitUsing => {
383 KeywordType::Const
384 }
385 };
386 self.push_keyword(kw, decl.span);
387 walk::walk_variable_declaration(self, decl);
388 }
389
390 fn visit_return_statement(&mut self, stmt: &ReturnStatement<'a>) {
391 self.push_keyword(KeywordType::Return, stmt.span);
392 walk::walk_return_statement(self, stmt);
393 }
394
395 fn visit_if_statement(&mut self, stmt: &IfStatement<'a>) {
396 self.push_keyword(KeywordType::If, stmt.span);
397 self.push_punc(PunctuationType::OpenParen, stmt.span);
398 self.visit_expression(&stmt.test);
399 self.push_punc(PunctuationType::CloseParen, stmt.span);
400 self.visit_statement(&stmt.consequent);
401 if let Some(alt) = &stmt.alternate {
402 self.push_keyword(KeywordType::Else, stmt.span);
403 self.visit_statement(alt);
404 }
405 }
406
407 fn visit_for_statement(&mut self, stmt: &ForStatement<'a>) {
408 self.push_keyword(KeywordType::For, stmt.span);
409 self.push_punc(PunctuationType::OpenParen, stmt.span);
410 walk::walk_for_statement(self, stmt);
411 self.push_punc(PunctuationType::CloseParen, stmt.span);
412 }
413
414 fn visit_for_in_statement(&mut self, stmt: &ForInStatement<'a>) {
415 self.push_keyword(KeywordType::For, stmt.span);
416 self.push_punc(PunctuationType::OpenParen, stmt.span);
417 self.visit_for_statement_left(&stmt.left);
418 self.push_keyword(KeywordType::In, stmt.span);
419 self.visit_expression(&stmt.right);
420 self.push_punc(PunctuationType::CloseParen, stmt.span);
421 self.visit_statement(&stmt.body);
422 }
423
424 fn visit_for_of_statement(&mut self, stmt: &ForOfStatement<'a>) {
425 self.push_keyword(KeywordType::For, stmt.span);
426 self.push_punc(PunctuationType::OpenParen, stmt.span);
427 self.visit_for_statement_left(&stmt.left);
428 self.push_keyword(KeywordType::Of, stmt.span);
429 self.visit_expression(&stmt.right);
430 self.push_punc(PunctuationType::CloseParen, stmt.span);
431 self.visit_statement(&stmt.body);
432 }
433
434 fn visit_while_statement(&mut self, stmt: &WhileStatement<'a>) {
435 self.push_keyword(KeywordType::While, stmt.span);
436 self.push_punc(PunctuationType::OpenParen, stmt.span);
437 walk::walk_while_statement(self, stmt);
438 self.push_punc(PunctuationType::CloseParen, stmt.span);
439 }
440
441 fn visit_do_while_statement(&mut self, stmt: &DoWhileStatement<'a>) {
442 self.push_keyword(KeywordType::Do, stmt.span);
443 walk::walk_do_while_statement(self, stmt);
444 }
445
446 fn visit_switch_statement(&mut self, stmt: &SwitchStatement<'a>) {
447 self.push_keyword(KeywordType::Switch, stmt.span);
448 self.push_punc(PunctuationType::OpenParen, stmt.span);
449 walk::walk_switch_statement(self, stmt);
450 self.push_punc(PunctuationType::CloseParen, stmt.span);
451 }
452
453 fn visit_switch_case(&mut self, case: &SwitchCase<'a>) {
454 if case.test.is_some() {
455 self.push_keyword(KeywordType::Case, case.span);
456 } else {
457 self.push_keyword(KeywordType::Default, case.span);
458 }
459 self.push_punc(PunctuationType::Colon, case.span);
460 walk::walk_switch_case(self, case);
461 }
462
463 fn visit_break_statement(&mut self, stmt: &BreakStatement<'a>) {
464 self.push_keyword(KeywordType::Break, stmt.span);
465 }
466
467 fn visit_continue_statement(&mut self, stmt: &ContinueStatement<'a>) {
468 self.push_keyword(KeywordType::Continue, stmt.span);
469 }
470
471 fn visit_throw_statement(&mut self, stmt: &ThrowStatement<'a>) {
472 self.push_keyword(KeywordType::Throw, stmt.span);
473 walk::walk_throw_statement(self, stmt);
474 }
475
476 fn visit_try_statement(&mut self, stmt: &TryStatement<'a>) {
477 self.push_keyword(KeywordType::Try, stmt.span);
478 walk::walk_try_statement(self, stmt);
479 }
480
481 fn visit_catch_clause(&mut self, clause: &CatchClause<'a>) {
482 self.push_keyword(KeywordType::Catch, clause.span);
483 walk::walk_catch_clause(self, clause);
484 }
485
486 fn visit_block_statement(&mut self, block: &BlockStatement<'a>) {
487 self.push_punc(PunctuationType::OpenBrace, block.span);
488 walk::walk_block_statement(self, block);
489 self.push_punc(PunctuationType::CloseBrace, block.span);
490 }
491
492 fn visit_identifier_reference(&mut self, ident: &IdentifierReference<'a>) {
495 self.push(TokenKind::Identifier(ident.name.to_string()), ident.span);
496 }
497
498 fn visit_binding_identifier(&mut self, ident: &BindingIdentifier<'a>) {
499 self.push(TokenKind::Identifier(ident.name.to_string()), ident.span);
500 }
501
502 fn visit_string_literal(&mut self, lit: &StringLiteral<'a>) {
503 self.push(TokenKind::StringLiteral(lit.value.to_string()), lit.span);
504 }
505
506 fn visit_numeric_literal(&mut self, lit: &NumericLiteral<'a>) {
507 let raw_str = lit
508 .raw
509 .as_ref()
510 .map_or_else(|| lit.value.to_string(), |r| r.to_string());
511 self.push(TokenKind::NumericLiteral(raw_str), lit.span);
512 }
513
514 fn visit_boolean_literal(&mut self, lit: &BooleanLiteral) {
515 self.push(TokenKind::BooleanLiteral(lit.value), lit.span);
516 }
517
518 fn visit_null_literal(&mut self, lit: &NullLiteral) {
519 self.push(TokenKind::NullLiteral, lit.span);
520 }
521
522 fn visit_template_literal(&mut self, lit: &TemplateLiteral<'a>) {
523 self.push(TokenKind::TemplateLiteral, lit.span);
524 walk::walk_template_literal(self, lit);
525 }
526
527 fn visit_reg_exp_literal(&mut self, lit: &RegExpLiteral<'a>) {
528 self.push(TokenKind::RegExpLiteral, lit.span);
529 }
530
531 fn visit_this_expression(&mut self, expr: &ThisExpression) {
532 self.push_keyword(KeywordType::This, expr.span);
533 }
534
535 fn visit_super(&mut self, expr: &Super) {
536 self.push_keyword(KeywordType::Super, expr.span);
537 }
538
539 fn visit_array_expression(&mut self, expr: &ArrayExpression<'a>) {
540 self.push_punc(PunctuationType::OpenBracket, expr.span);
541 walk::walk_array_expression(self, expr);
542 self.push_punc(PunctuationType::CloseBracket, expr.span);
543 }
544
545 fn visit_object_expression(&mut self, expr: &ObjectExpression<'a>) {
546 self.push_punc(PunctuationType::OpenBrace, expr.span);
547 walk::walk_object_expression(self, expr);
548 self.push_punc(PunctuationType::CloseBrace, expr.span);
549 }
550
551 fn visit_call_expression(&mut self, expr: &CallExpression<'a>) {
552 self.visit_expression(&expr.callee);
553 let open = point_span(expr.callee.span().end);
557 self.push_punc(PunctuationType::OpenParen, open);
558 for arg in &expr.arguments {
559 self.visit_argument(arg);
560 let comma = point_span(arg.span().end);
561 self.push_op(OperatorType::Comma, comma);
562 }
563 let close = point_span(expr.span.end.saturating_sub(1));
564 self.push_punc(PunctuationType::CloseParen, close);
565 }
566
567 fn visit_new_expression(&mut self, expr: &NewExpression<'a>) {
568 self.push_keyword(KeywordType::New, expr.span);
569 self.visit_expression(&expr.callee);
570 let open = point_span(expr.callee.span().end);
571 self.push_punc(PunctuationType::OpenParen, open);
572 for arg in &expr.arguments {
573 self.visit_argument(arg);
574 let comma = point_span(arg.span().end);
575 self.push_op(OperatorType::Comma, comma);
576 }
577 let close = point_span(expr.span.end.saturating_sub(1));
578 self.push_punc(PunctuationType::CloseParen, close);
579 }
580
581 fn visit_static_member_expression(&mut self, expr: &StaticMemberExpression<'a>) {
582 self.visit_expression(&expr.object);
583 let dot = point_span(expr.object.span().end);
585 self.push_punc(PunctuationType::Dot, dot);
586 self.push(
587 TokenKind::Identifier(expr.property.name.to_string()),
588 expr.property.span,
589 );
590 }
591
592 fn visit_computed_member_expression(&mut self, expr: &ComputedMemberExpression<'a>) {
593 self.visit_expression(&expr.object);
594 let open = point_span(expr.object.span().end);
595 self.push_punc(PunctuationType::OpenBracket, open);
596 self.visit_expression(&expr.expression);
597 let close = point_span(expr.span.end.saturating_sub(1));
598 self.push_punc(PunctuationType::CloseBracket, close);
599 }
600
601 fn visit_assignment_expression(&mut self, expr: &AssignmentExpression<'a>) {
602 self.visit_assignment_target(&expr.left);
603 let op = match expr.operator {
604 AssignmentOperator::Assign => OperatorType::Assign,
605 AssignmentOperator::Addition => OperatorType::AddAssign,
606 AssignmentOperator::Subtraction => OperatorType::SubAssign,
607 AssignmentOperator::Multiplication => OperatorType::MulAssign,
608 AssignmentOperator::Division => OperatorType::DivAssign,
609 AssignmentOperator::Remainder => OperatorType::ModAssign,
610 AssignmentOperator::Exponential => OperatorType::ExpAssign,
611 AssignmentOperator::LogicalAnd => OperatorType::AndAssign,
612 AssignmentOperator::LogicalOr => OperatorType::OrAssign,
613 AssignmentOperator::LogicalNullish => OperatorType::NullishAssign,
614 AssignmentOperator::BitwiseAnd => OperatorType::BitwiseAndAssign,
615 AssignmentOperator::BitwiseOR => OperatorType::BitwiseOrAssign,
616 AssignmentOperator::BitwiseXOR => OperatorType::BitwiseXorAssign,
617 AssignmentOperator::ShiftLeft => OperatorType::ShiftLeftAssign,
618 AssignmentOperator::ShiftRight => OperatorType::ShiftRightAssign,
619 AssignmentOperator::ShiftRightZeroFill => OperatorType::UnsignedShiftRightAssign,
620 };
621 self.push_op(op, expr.span);
622 self.visit_expression(&expr.right);
623 }
624
625 fn visit_binary_expression(&mut self, expr: &BinaryExpression<'a>) {
626 self.visit_expression(&expr.left);
627 let op = match expr.operator {
628 BinaryOperator::Addition => OperatorType::Add,
629 BinaryOperator::Subtraction => OperatorType::Sub,
630 BinaryOperator::Multiplication => OperatorType::Mul,
631 BinaryOperator::Division => OperatorType::Div,
632 BinaryOperator::Remainder => OperatorType::Mod,
633 BinaryOperator::Exponential => OperatorType::Exp,
634 BinaryOperator::Equality => OperatorType::Eq,
635 BinaryOperator::Inequality => OperatorType::NEq,
636 BinaryOperator::StrictEquality => OperatorType::StrictEq,
637 BinaryOperator::StrictInequality => OperatorType::StrictNEq,
638 BinaryOperator::LessThan => OperatorType::Lt,
639 BinaryOperator::GreaterThan => OperatorType::Gt,
640 BinaryOperator::LessEqualThan => OperatorType::LtEq,
641 BinaryOperator::GreaterEqualThan => OperatorType::GtEq,
642 BinaryOperator::BitwiseAnd => OperatorType::BitwiseAnd,
643 BinaryOperator::BitwiseOR => OperatorType::BitwiseOr,
644 BinaryOperator::BitwiseXOR => OperatorType::BitwiseXor,
645 BinaryOperator::ShiftLeft => OperatorType::ShiftLeft,
646 BinaryOperator::ShiftRight => OperatorType::ShiftRight,
647 BinaryOperator::ShiftRightZeroFill => OperatorType::UnsignedShiftRight,
648 BinaryOperator::Instanceof => OperatorType::Instanceof,
649 BinaryOperator::In => OperatorType::In,
650 };
651 self.push_op(op, expr.span);
652 self.visit_expression(&expr.right);
653 }
654
655 fn visit_logical_expression(&mut self, expr: &LogicalExpression<'a>) {
656 self.visit_expression(&expr.left);
657 let op = match expr.operator {
658 LogicalOperator::And => OperatorType::And,
659 LogicalOperator::Or => OperatorType::Or,
660 LogicalOperator::Coalesce => OperatorType::NullishCoalescing,
661 };
662 self.push_op(op, expr.span);
663 self.visit_expression(&expr.right);
664 }
665
666 fn visit_unary_expression(&mut self, expr: &UnaryExpression<'a>) {
667 let op = match expr.operator {
668 UnaryOperator::UnaryPlus => OperatorType::Add,
669 UnaryOperator::UnaryNegation => OperatorType::Sub,
670 UnaryOperator::LogicalNot => OperatorType::Not,
671 UnaryOperator::BitwiseNot => OperatorType::BitwiseNot,
672 UnaryOperator::Typeof => {
673 self.push_keyword(KeywordType::Typeof, expr.span);
674 walk::walk_unary_expression(self, expr);
675 return;
676 }
677 UnaryOperator::Void => {
678 self.push_keyword(KeywordType::Void, expr.span);
679 walk::walk_unary_expression(self, expr);
680 return;
681 }
682 UnaryOperator::Delete => {
683 self.push_keyword(KeywordType::Delete, expr.span);
684 walk::walk_unary_expression(self, expr);
685 return;
686 }
687 };
688 self.push_op(op, expr.span);
689 walk::walk_unary_expression(self, expr);
690 }
691
692 fn visit_update_expression(&mut self, expr: &UpdateExpression<'a>) {
693 let op = match expr.operator {
694 UpdateOperator::Increment => OperatorType::Increment,
695 UpdateOperator::Decrement => OperatorType::Decrement,
696 };
697 if expr.prefix {
698 self.push_op(op, expr.span);
699 }
700 walk::walk_update_expression(self, expr);
701 if !expr.prefix {
702 self.push_op(op, expr.span);
703 }
704 }
705
706 fn visit_conditional_expression(&mut self, expr: &ConditionalExpression<'a>) {
707 self.visit_expression(&expr.test);
708 self.push_op(OperatorType::Ternary, expr.span);
709 self.visit_expression(&expr.consequent);
710 self.push_punc(PunctuationType::Colon, expr.span);
711 self.visit_expression(&expr.alternate);
712 }
713
714 fn visit_arrow_function_expression(&mut self, expr: &ArrowFunctionExpression<'a>) {
715 if expr.r#async {
716 self.push_keyword(KeywordType::Async, expr.span);
717 }
718 let params_span = expr.params.span;
719 self.push_punc(PunctuationType::OpenParen, point_span(params_span.start));
720 for param in &expr.params.items {
721 self.visit_binding_pattern(¶m.pattern);
722 self.push_op(OperatorType::Comma, point_span(param.span.end));
723 }
724 self.push_punc(
725 PunctuationType::CloseParen,
726 point_span(params_span.end.saturating_sub(1)),
727 );
728 self.push_op(OperatorType::Arrow, point_span(params_span.end));
729 walk::walk_arrow_function_expression(self, expr);
730 }
731
732 fn visit_yield_expression(&mut self, expr: &YieldExpression<'a>) {
733 self.push_keyword(KeywordType::Yield, expr.span);
734 walk::walk_yield_expression(self, expr);
735 }
736
737 fn visit_await_expression(&mut self, expr: &AwaitExpression<'a>) {
738 self.push_keyword(KeywordType::Await, expr.span);
739 walk::walk_await_expression(self, expr);
740 }
741
742 fn visit_spread_element(&mut self, elem: &SpreadElement<'a>) {
743 self.push_op(OperatorType::Spread, elem.span);
744 walk::walk_spread_element(self, elem);
745 }
746
747 fn visit_sequence_expression(&mut self, expr: &SequenceExpression<'a>) {
748 for (i, sub_expr) in expr.expressions.iter().enumerate() {
749 if i > 0 {
750 self.push_op(OperatorType::Comma, expr.span);
751 }
752 self.visit_expression(sub_expr);
753 }
754 }
755
756 fn visit_function(&mut self, func: &Function<'a>, flags: ScopeFlags) {
759 if func.r#async {
760 self.push_keyword(KeywordType::Async, func.span);
761 }
762 self.push_keyword(KeywordType::Function, func.span);
763 if let Some(id) = &func.id {
764 self.push(TokenKind::Identifier(id.name.to_string()), id.span);
765 }
766 let params_span = func.params.span;
767 self.push_punc(PunctuationType::OpenParen, point_span(params_span.start));
768 for param in &func.params.items {
769 self.visit_binding_pattern(¶m.pattern);
770 self.push_op(OperatorType::Comma, point_span(param.span.end));
771 }
772 self.push_punc(
773 PunctuationType::CloseParen,
774 point_span(params_span.end.saturating_sub(1)),
775 );
776 walk::walk_function(self, func, flags);
777 }
778
779 fn visit_class(&mut self, class: &Class<'a>) {
782 self.push_keyword(KeywordType::Class, class.span);
783 if let Some(id) = &class.id {
784 self.push(TokenKind::Identifier(id.name.to_string()), id.span);
785 }
786 if class.super_class.is_some() {
787 self.push_keyword(KeywordType::Extends, class.span);
788 }
789 walk::walk_class(self, class);
790 }
791
792 fn visit_import_declaration(&mut self, decl: &ImportDeclaration<'a>) {
795 if self.strip_types && decl.import_kind.is_type() {
797 return;
798 }
799 self.push_keyword(KeywordType::Import, decl.span);
800 walk::walk_import_declaration(self, decl);
801 self.push_keyword(KeywordType::From, decl.span);
802 self.push(
803 TokenKind::StringLiteral(decl.source.value.to_string()),
804 decl.source.span,
805 );
806 }
807
808 fn visit_export_named_declaration(&mut self, decl: &ExportNamedDeclaration<'a>) {
809 if self.strip_types && decl.export_kind.is_type() {
811 return;
812 }
813 self.push_keyword(KeywordType::Export, decl.span);
814 walk::walk_export_named_declaration(self, decl);
815 }
816
817 fn visit_export_default_declaration(&mut self, decl: &ExportDefaultDeclaration<'a>) {
818 self.push_keyword(KeywordType::Export, decl.span);
819 self.push_keyword(KeywordType::Default, decl.span);
820 walk::walk_export_default_declaration(self, decl);
821 }
822
823 fn visit_export_all_declaration(&mut self, decl: &ExportAllDeclaration<'a>) {
824 self.push_keyword(KeywordType::Export, decl.span);
825 self.push_keyword(KeywordType::From, decl.span);
826 self.push(
827 TokenKind::StringLiteral(decl.source.value.to_string()),
828 decl.source.span,
829 );
830 }
831
832 fn visit_ts_interface_declaration(&mut self, decl: &TSInterfaceDeclaration<'a>) {
835 if self.strip_types {
836 return; }
838 self.push_keyword(KeywordType::Interface, decl.span);
839 walk::walk_ts_interface_declaration(self, decl);
840 }
841
842 fn visit_ts_interface_body(&mut self, body: &TSInterfaceBody<'a>) {
843 self.push_punc(PunctuationType::OpenBrace, body.span);
844 walk::walk_ts_interface_body(self, body);
845 self.push_punc(PunctuationType::CloseBrace, body.span);
846 }
847
848 fn visit_ts_type_alias_declaration(&mut self, decl: &TSTypeAliasDeclaration<'a>) {
849 if self.strip_types {
850 return; }
852 self.push_keyword(KeywordType::Type, decl.span);
853 walk::walk_ts_type_alias_declaration(self, decl);
854 }
855
856 fn visit_ts_module_declaration(&mut self, decl: &TSModuleDeclaration<'a>) {
857 if self.strip_types && decl.declare {
858 return; }
860 walk::walk_ts_module_declaration(self, decl);
861 }
862
863 fn visit_ts_enum_declaration(&mut self, decl: &TSEnumDeclaration<'a>) {
864 self.push_keyword(KeywordType::Enum, decl.span);
865 walk::walk_ts_enum_declaration(self, decl);
866 }
867
868 fn visit_ts_enum_body(&mut self, body: &TSEnumBody<'a>) {
869 self.push_punc(PunctuationType::OpenBrace, body.span);
870 walk::walk_ts_enum_body(self, body);
871 self.push_punc(PunctuationType::CloseBrace, body.span);
872 }
873
874 fn visit_ts_property_signature(&mut self, sig: &TSPropertySignature<'a>) {
875 walk::walk_ts_property_signature(self, sig);
876 self.push_punc(PunctuationType::Semicolon, sig.span);
877 }
878
879 fn visit_ts_type_annotation(&mut self, ann: &TSTypeAnnotation<'a>) {
880 if self.strip_types {
881 return; }
883 self.push_punc(PunctuationType::Colon, ann.span);
884 walk::walk_ts_type_annotation(self, ann);
885 }
886
887 fn visit_ts_type_parameter_declaration(&mut self, decl: &TSTypeParameterDeclaration<'a>) {
888 if self.strip_types {
889 return; }
891 walk::walk_ts_type_parameter_declaration(self, decl);
892 }
893
894 fn visit_ts_type_parameter_instantiation(&mut self, inst: &TSTypeParameterInstantiation<'a>) {
895 if self.strip_types {
896 return; }
898 walk::walk_ts_type_parameter_instantiation(self, inst);
899 }
900
901 fn visit_ts_as_expression(&mut self, expr: &TSAsExpression<'a>) {
902 self.visit_expression(&expr.expression);
903 if !self.strip_types {
904 self.push_keyword(KeywordType::As, expr.span);
905 self.visit_ts_type(&expr.type_annotation);
906 }
907 }
908
909 fn visit_ts_satisfies_expression(&mut self, expr: &TSSatisfiesExpression<'a>) {
910 self.visit_expression(&expr.expression);
911 if !self.strip_types {
912 self.push_keyword(KeywordType::Satisfies, expr.span);
913 self.visit_ts_type(&expr.type_annotation);
914 }
915 }
916
917 fn visit_ts_non_null_expression(&mut self, expr: &TSNonNullExpression<'a>) {
918 self.visit_expression(&expr.expression);
919 }
921
922 fn visit_identifier_name(&mut self, ident: &IdentifierName<'a>) {
923 self.push(TokenKind::Identifier(ident.name.to_string()), ident.span);
924 }
925
926 fn visit_ts_string_keyword(&mut self, it: &TSStringKeyword) {
927 self.push(TokenKind::Identifier("string".to_string()), it.span);
928 }
929
930 fn visit_ts_number_keyword(&mut self, it: &TSNumberKeyword) {
931 self.push(TokenKind::Identifier("number".to_string()), it.span);
932 }
933
934 fn visit_ts_boolean_keyword(&mut self, it: &TSBooleanKeyword) {
935 self.push(TokenKind::Identifier("boolean".to_string()), it.span);
936 }
937
938 fn visit_ts_any_keyword(&mut self, it: &TSAnyKeyword) {
939 self.push(TokenKind::Identifier("any".to_string()), it.span);
940 }
941
942 fn visit_ts_void_keyword(&mut self, it: &TSVoidKeyword) {
943 self.push(TokenKind::Identifier("void".to_string()), it.span);
944 }
945
946 fn visit_ts_null_keyword(&mut self, it: &TSNullKeyword) {
947 self.push(TokenKind::NullLiteral, it.span);
948 }
949
950 fn visit_ts_undefined_keyword(&mut self, it: &TSUndefinedKeyword) {
951 self.push(TokenKind::Identifier("undefined".to_string()), it.span);
952 }
953
954 fn visit_ts_never_keyword(&mut self, it: &TSNeverKeyword) {
955 self.push(TokenKind::Identifier("never".to_string()), it.span);
956 }
957
958 fn visit_ts_unknown_keyword(&mut self, it: &TSUnknownKeyword) {
959 self.push(TokenKind::Identifier("unknown".to_string()), it.span);
960 }
961
962 fn visit_jsx_opening_element(&mut self, elem: &JSXOpeningElement<'a>) {
965 self.push_punc(PunctuationType::OpenBracket, elem.span);
966 walk::walk_jsx_opening_element(self, elem);
967 self.push_punc(PunctuationType::CloseBracket, elem.span);
968 }
969
970 fn visit_jsx_closing_element(&mut self, elem: &JSXClosingElement<'a>) {
971 self.push_punc(PunctuationType::OpenBracket, elem.span);
972 walk::walk_jsx_closing_element(self, elem);
973 self.push_punc(PunctuationType::CloseBracket, elem.span);
974 }
975
976 fn visit_jsx_identifier(&mut self, ident: &JSXIdentifier<'a>) {
977 self.push(TokenKind::Identifier(ident.name.to_string()), ident.span);
978 }
979
980 fn visit_jsx_spread_attribute(&mut self, attr: &JSXSpreadAttribute<'a>) {
981 self.push_op(OperatorType::Spread, attr.span);
982 walk::walk_jsx_spread_attribute(self, attr);
983 }
984
985 fn visit_variable_declarator(&mut self, decl: &VariableDeclarator<'a>) {
988 self.visit_binding_pattern(&decl.id);
989 if let Some(init) = &decl.init {
990 self.push_op(OperatorType::Assign, decl.span);
991 self.visit_expression(init);
992 }
993 self.push_punc(PunctuationType::Semicolon, decl.span);
994 }
995
996 fn visit_expression_statement(&mut self, stmt: &ExpressionStatement<'a>) {
997 walk::walk_expression_statement(self, stmt);
998 self.push_punc(PunctuationType::Semicolon, stmt.span);
999 }
1000}
1001
1002#[cfg(test)]
1003mod tests {
1004 use super::*;
1005 use std::path::PathBuf;
1006
1007 fn tokenize(code: &str) -> Vec<SourceToken> {
1008 let path = PathBuf::from("test.ts");
1009 tokenize_file(&path, code).tokens
1010 }
1011
1012 #[test]
1013 fn tokenize_variable_declaration() {
1014 let tokens = tokenize("const x = 42;");
1015 assert!(!tokens.is_empty());
1016 assert!(matches!(
1018 tokens[0].kind,
1019 TokenKind::Keyword(KeywordType::Const)
1020 ));
1021 }
1022
1023 #[test]
1024 fn tokenize_function_declaration() {
1025 let tokens = tokenize("function foo() { return 1; }");
1026 assert!(!tokens.is_empty());
1027 assert!(matches!(
1028 tokens[0].kind,
1029 TokenKind::Keyword(KeywordType::Function)
1030 ));
1031 }
1032
1033 #[test]
1034 fn tokenize_arrow_function() {
1035 let tokens = tokenize("const f = (a, b) => a + b;");
1036 assert!(!tokens.is_empty());
1037 let has_arrow = tokens
1038 .iter()
1039 .any(|t| matches!(t.kind, TokenKind::Operator(OperatorType::Arrow)));
1040 assert!(has_arrow, "Should contain arrow operator");
1041 }
1042
1043 #[test]
1044 fn tokenize_if_else() {
1045 let tokens = tokenize("if (x) { y; } else { z; }");
1046 assert!(!tokens.is_empty());
1047 assert!(matches!(
1048 tokens[0].kind,
1049 TokenKind::Keyword(KeywordType::If)
1050 ));
1051 let has_else = tokens
1052 .iter()
1053 .any(|t| matches!(t.kind, TokenKind::Keyword(KeywordType::Else)));
1054 assert!(has_else, "Should contain else keyword");
1055 }
1056
1057 #[test]
1058 fn tokenize_class() {
1059 let tokens = tokenize("class Foo extends Bar { }");
1060 assert!(!tokens.is_empty());
1061 assert!(matches!(
1062 tokens[0].kind,
1063 TokenKind::Keyword(KeywordType::Class)
1064 ));
1065 let has_extends = tokens
1066 .iter()
1067 .any(|t| matches!(t.kind, TokenKind::Keyword(KeywordType::Extends)));
1068 assert!(has_extends, "Should contain extends keyword");
1069 }
1070
1071 #[test]
1072 fn tokenize_string_literal() {
1073 let tokens = tokenize("const s = \"hello\";");
1074 let has_string = tokens
1075 .iter()
1076 .any(|t| matches!(&t.kind, TokenKind::StringLiteral(s) if s == "hello"));
1077 assert!(has_string, "Should contain string literal");
1078 }
1079
1080 #[test]
1081 fn tokenize_boolean_literal() {
1082 let tokens = tokenize("const b = true;");
1083 let has_bool = tokens
1084 .iter()
1085 .any(|t| matches!(t.kind, TokenKind::BooleanLiteral(true)));
1086 assert!(has_bool, "Should contain boolean literal");
1087 }
1088
1089 #[test]
1090 fn tokenize_null_literal() {
1091 let tokens = tokenize("const n = null;");
1092 let has_null = tokens
1093 .iter()
1094 .any(|t| matches!(t.kind, TokenKind::NullLiteral));
1095 assert!(has_null, "Should contain null literal");
1096 }
1097
1098 #[test]
1099 fn tokenize_empty_file() {
1100 let tokens = tokenize("");
1101 assert!(tokens.is_empty());
1102 }
1103
1104 #[test]
1105 fn tokenize_ts_interface() {
1106 let tokens = tokenize("interface Foo { bar: string; baz: number; }");
1107 let has_interface = tokens
1108 .iter()
1109 .any(|t| matches!(t.kind, TokenKind::Keyword(KeywordType::Interface)));
1110 assert!(has_interface, "Should contain interface keyword");
1111 let has_bar = tokens
1112 .iter()
1113 .any(|t| matches!(&t.kind, TokenKind::Identifier(name) if name == "bar"));
1114 assert!(has_bar, "Should contain property name 'bar'");
1115 let has_string = tokens
1116 .iter()
1117 .any(|t| matches!(&t.kind, TokenKind::Identifier(name) if name == "string"));
1118 assert!(has_string, "Should contain type 'string'");
1119 assert!(
1121 tokens.len() >= 10,
1122 "Interface should produce sufficient tokens, got {}",
1123 tokens.len()
1124 );
1125 }
1126
1127 #[test]
1128 fn tokenize_ts_type_alias() {
1129 let tokens = tokenize("type Result = { ok: boolean; error: string; }");
1130 let has_type = tokens
1131 .iter()
1132 .any(|t| matches!(t.kind, TokenKind::Keyword(KeywordType::Type)));
1133 assert!(has_type, "Should contain type keyword");
1134 }
1135
1136 #[test]
1137 fn tokenize_ts_enum() {
1138 let tokens = tokenize("enum Color { Red, Green, Blue }");
1139 let has_enum = tokens
1140 .iter()
1141 .any(|t| matches!(t.kind, TokenKind::Keyword(KeywordType::Enum)));
1142 assert!(has_enum, "Should contain enum keyword");
1143 let has_red = tokens
1144 .iter()
1145 .any(|t| matches!(&t.kind, TokenKind::Identifier(name) if name == "Red"));
1146 assert!(has_red, "Should contain enum member 'Red'");
1147 }
1148
1149 fn tokenize_tsx(code: &str) -> Vec<SourceToken> {
1150 let path = PathBuf::from("test.tsx");
1151 tokenize_file(&path, code).tokens
1152 }
1153
1154 fn tokenize_cross_language(code: &str) -> Vec<SourceToken> {
1155 let path = PathBuf::from("test.ts");
1156 tokenize_file_cross_language(&path, code, true).tokens
1157 }
1158
1159 #[test]
1160 fn tokenize_jsx_element() {
1161 let tokens =
1162 tokenize_tsx("const x = <div className=\"foo\"><Button onClick={handler} /></div>;");
1163 let has_div = tokens
1164 .iter()
1165 .any(|t| matches!(&t.kind, TokenKind::Identifier(name) if name == "div"));
1166 assert!(has_div, "Should contain JSX element name 'div'");
1167 let has_classname = tokens
1168 .iter()
1169 .any(|t| matches!(&t.kind, TokenKind::Identifier(name) if name == "className"));
1170 assert!(has_classname, "Should contain JSX attribute 'className'");
1171 let brackets = tokens
1172 .iter()
1173 .filter(|t| {
1174 matches!(
1175 t.kind,
1176 TokenKind::Punctuation(PunctuationType::OpenBracket)
1177 | TokenKind::Punctuation(PunctuationType::CloseBracket)
1178 )
1179 })
1180 .count();
1181 assert!(
1182 brackets >= 4,
1183 "Should contain JSX angle brackets, got {brackets}"
1184 );
1185 }
1186
1187 #[test]
1190 fn strip_types_removes_parameter_type_annotations() {
1191 let ts_tokens = tokenize("function foo(x: string) { return x; }");
1192 let stripped = tokenize_cross_language("function foo(x: string) { return x; }");
1193
1194 assert!(
1196 stripped.len() < ts_tokens.len(),
1197 "Stripped tokens ({}) should be fewer than full tokens ({})",
1198 stripped.len(),
1199 ts_tokens.len()
1200 );
1201
1202 let has_colon_before_string = ts_tokens.windows(2).any(|w| {
1204 matches!(w[0].kind, TokenKind::Punctuation(PunctuationType::Colon))
1205 && matches!(&w[1].kind, TokenKind::Identifier(n) if n == "string")
1206 });
1207 assert!(has_colon_before_string, "Original should have `: string`");
1208
1209 let js_tokens = {
1211 let path = PathBuf::from("test.js");
1212 tokenize_file(&path, "function foo(x) { return x; }").tokens
1213 };
1214 assert_eq!(
1215 stripped.len(),
1216 js_tokens.len(),
1217 "Stripped TS should produce same token count as JS"
1218 );
1219 }
1220
1221 #[test]
1222 fn strip_types_removes_return_type_annotations() {
1223 let stripped = tokenize_cross_language("function foo(): string { return 'hello'; }");
1224 let has_string_type = stripped.iter().enumerate().any(|(i, t)| {
1226 matches!(&t.kind, TokenKind::Identifier(n) if n == "string")
1227 && i > 0
1228 && matches!(
1229 stripped[i - 1].kind,
1230 TokenKind::Punctuation(PunctuationType::Colon)
1231 )
1232 });
1233 assert!(
1234 !has_string_type,
1235 "Stripped version should not have return type annotation"
1236 );
1237 }
1238
1239 #[test]
1240 fn strip_types_removes_interface_declarations() {
1241 let stripped = tokenize_cross_language("interface Foo { bar: string; }\nconst x = 42;");
1242 let has_interface = stripped
1244 .iter()
1245 .any(|t| matches!(t.kind, TokenKind::Keyword(KeywordType::Interface)));
1246 assert!(
1247 !has_interface,
1248 "Stripped version should not contain interface declaration"
1249 );
1250 let has_const = stripped
1252 .iter()
1253 .any(|t| matches!(t.kind, TokenKind::Keyword(KeywordType::Const)));
1254 assert!(has_const, "Should still contain const keyword");
1255 }
1256
1257 #[test]
1258 fn strip_types_removes_type_alias_declarations() {
1259 let stripped = tokenize_cross_language("type Result = string | number;\nconst x = 42;");
1260 let has_type = stripped
1261 .iter()
1262 .any(|t| matches!(t.kind, TokenKind::Keyword(KeywordType::Type)));
1263 assert!(!has_type, "Stripped version should not contain type alias");
1264 let has_const = stripped
1265 .iter()
1266 .any(|t| matches!(t.kind, TokenKind::Keyword(KeywordType::Const)));
1267 assert!(has_const, "Should still contain const keyword");
1268 }
1269
1270 #[test]
1271 fn strip_types_preserves_runtime_code() {
1272 let stripped =
1273 tokenize_cross_language("const x: number = 42;\nif (x > 0) { console.log(x); }");
1274 let has_const = stripped
1276 .iter()
1277 .any(|t| matches!(t.kind, TokenKind::Keyword(KeywordType::Const)));
1278 let has_if = stripped
1279 .iter()
1280 .any(|t| matches!(t.kind, TokenKind::Keyword(KeywordType::If)));
1281 let has_42 = stripped
1282 .iter()
1283 .any(|t| matches!(&t.kind, TokenKind::NumericLiteral(n) if n == "42"));
1284 assert!(has_const, "Should preserve const");
1285 assert!(has_if, "Should preserve if");
1286 assert!(has_42, "Should preserve numeric literal");
1287 }
1288
1289 #[test]
1290 fn strip_types_preserves_enums() {
1291 let stripped = tokenize_cross_language("enum Color { Red, Green, Blue }");
1293 let has_enum = stripped
1294 .iter()
1295 .any(|t| matches!(t.kind, TokenKind::Keyword(KeywordType::Enum)));
1296 assert!(
1297 has_enum,
1298 "Enums should be preserved (they have runtime semantics)"
1299 );
1300 }
1301
1302 #[test]
1303 fn strip_types_removes_import_type() {
1304 let stripped = tokenize_cross_language("import type { Foo } from './foo';\nconst x = 42;");
1305 let import_count = stripped
1307 .iter()
1308 .filter(|t| matches!(t.kind, TokenKind::Keyword(KeywordType::Import)))
1309 .count();
1310 assert_eq!(import_count, 0, "import type should be stripped");
1311 let has_const = stripped
1313 .iter()
1314 .any(|t| matches!(t.kind, TokenKind::Keyword(KeywordType::Const)));
1315 assert!(has_const, "Runtime code should be preserved");
1316 }
1317
1318 #[test]
1319 fn strip_types_preserves_value_imports() {
1320 let stripped = tokenize_cross_language("import { foo } from './foo';\nconst x = foo();");
1321 let has_import = stripped
1322 .iter()
1323 .any(|t| matches!(t.kind, TokenKind::Keyword(KeywordType::Import)));
1324 assert!(has_import, "Value imports should be preserved");
1325 }
1326
1327 #[test]
1328 fn strip_types_removes_export_type() {
1329 let stripped = tokenize_cross_language("export type { Foo };\nconst x = 42;");
1330 let export_count = stripped
1332 .iter()
1333 .filter(|t| matches!(t.kind, TokenKind::Keyword(KeywordType::Export)))
1334 .count();
1335 assert_eq!(export_count, 0, "export type should be stripped");
1336 }
1337
1338 #[test]
1339 fn strip_types_removes_declare_module() {
1340 let stripped = tokenize_cross_language(
1341 "declare module 'foo' { export function bar(): void; }\nconst x = 42;",
1342 );
1343 let has_function_keyword = stripped
1345 .iter()
1346 .any(|t| matches!(t.kind, TokenKind::Keyword(KeywordType::Function)));
1347 assert!(
1348 !has_function_keyword,
1349 "declare module contents should be stripped"
1350 );
1351 let has_const = stripped
1352 .iter()
1353 .any(|t| matches!(t.kind, TokenKind::Keyword(KeywordType::Const)));
1354 assert!(has_const, "Runtime code should be preserved");
1355 }
1356
1357 #[test]
1360 fn tokenize_vue_sfc_extracts_script_block() {
1361 let vue_source = r#"<template><div>Hello</div></template>
1362<script lang="ts">
1363import { ref } from 'vue';
1364const count = ref(0);
1365</script>"#;
1366 let path = PathBuf::from("Component.vue");
1367 let result = tokenize_file(&path, vue_source);
1368 assert!(!result.tokens.is_empty(), "Vue SFC should produce tokens");
1369 let has_import = result
1370 .tokens
1371 .iter()
1372 .any(|t| matches!(t.kind, TokenKind::Keyword(KeywordType::Import)));
1373 assert!(has_import, "Should tokenize import in <script> block");
1374 let has_const = result
1375 .tokens
1376 .iter()
1377 .any(|t| matches!(t.kind, TokenKind::Keyword(KeywordType::Const)));
1378 assert!(has_const, "Should tokenize const in <script> block");
1379 }
1380
1381 #[test]
1382 fn tokenize_svelte_sfc_extracts_script_block() {
1383 let svelte_source = r"<script>
1384let count = 0;
1385function increment() { count += 1; }
1386</script>
1387<button on:click={increment}>{count}</button>";
1388 let path = PathBuf::from("Component.svelte");
1389 let result = tokenize_file(&path, svelte_source);
1390 assert!(
1391 !result.tokens.is_empty(),
1392 "Svelte SFC should produce tokens"
1393 );
1394 let has_let = result
1395 .tokens
1396 .iter()
1397 .any(|t| matches!(t.kind, TokenKind::Keyword(KeywordType::Let)));
1398 assert!(has_let, "Should tokenize let in <script> block");
1399 let has_function = result
1400 .tokens
1401 .iter()
1402 .any(|t| matches!(t.kind, TokenKind::Keyword(KeywordType::Function)));
1403 assert!(has_function, "Should tokenize function in <script> block");
1404 }
1405
1406 #[test]
1407 fn tokenize_vue_sfc_adjusts_span_offsets() {
1408 let vue_source = "<template><div/></template>\n<script>\nconst x = 1;\n</script>";
1409 let path = PathBuf::from("Test.vue");
1410 let result = tokenize_file(&path, vue_source);
1411 let script_body_offset = vue_source.find("const x").unwrap() as u32;
1413 for token in &result.tokens {
1416 assert!(
1417 token.span.start >= script_body_offset,
1418 "Token span start ({}) should be >= script body offset ({})",
1419 token.span.start,
1420 script_body_offset
1421 );
1422 let text = &vue_source[token.span.start as usize..token.span.end as usize];
1424 assert!(
1425 !text.is_empty(),
1426 "Token span should recover non-empty text from full SFC source"
1427 );
1428 }
1429 }
1430
1431 #[test]
1432 fn tokenize_astro_extracts_frontmatter() {
1433 let astro_source = "---\nimport { Layout } from '../layouts/Layout.astro';\nconst title = 'Home';\n---\n<Layout title={title}><h1>Hello</h1></Layout>";
1434 let path = PathBuf::from("page.astro");
1435 let result = tokenize_file(&path, astro_source);
1436 assert!(
1437 !result.tokens.is_empty(),
1438 "Astro frontmatter should produce tokens"
1439 );
1440 let has_import = result
1441 .tokens
1442 .iter()
1443 .any(|t| matches!(t.kind, TokenKind::Keyword(KeywordType::Import)));
1444 assert!(has_import, "Should tokenize import in frontmatter");
1445 }
1446
1447 #[test]
1448 fn tokenize_astro_without_frontmatter_returns_empty() {
1449 let astro_source = "<html><body>Hello</body></html>";
1450 let path = PathBuf::from("page.astro");
1451 let result = tokenize_file(&path, astro_source);
1452 assert!(
1453 result.tokens.is_empty(),
1454 "Astro without frontmatter should produce no tokens"
1455 );
1456 }
1457
1458 #[test]
1459 fn tokenize_astro_adjusts_span_offsets() {
1460 let astro_source = "---\nconst x = 1;\n---\n<div/>";
1461 let path = PathBuf::from("page.astro");
1462 let result = tokenize_file(&path, astro_source);
1463 assert!(!result.tokens.is_empty());
1464 for token in &result.tokens {
1466 assert!(
1467 token.span.start >= 4,
1468 "Token span start ({}) should be offset into the full astro source",
1469 token.span.start
1470 );
1471 }
1472 }
1473
1474 #[test]
1475 fn tokenize_mdx_extracts_imports_and_exports() {
1476 let mdx_source = "import { Button } from './Button';\nexport const meta = { title: 'Hello' };\n\n# Hello World\n\n<Button>Click me</Button>";
1477 let path = PathBuf::from("page.mdx");
1478 let result = tokenize_file(&path, mdx_source);
1479 assert!(
1480 !result.tokens.is_empty(),
1481 "MDX should produce tokens from imports/exports"
1482 );
1483 let has_import = result
1484 .tokens
1485 .iter()
1486 .any(|t| matches!(t.kind, TokenKind::Keyword(KeywordType::Import)));
1487 assert!(has_import, "Should tokenize import in MDX");
1488 let has_export = result
1489 .tokens
1490 .iter()
1491 .any(|t| matches!(t.kind, TokenKind::Keyword(KeywordType::Export)));
1492 assert!(has_export, "Should tokenize export in MDX");
1493 }
1494
1495 #[test]
1496 fn tokenize_mdx_without_statements_returns_empty() {
1497 let mdx_source = "# Just Markdown\n\nNo imports or exports here.";
1498 let path = PathBuf::from("page.mdx");
1499 let result = tokenize_file(&path, mdx_source);
1500 assert!(
1501 result.tokens.is_empty(),
1502 "MDX without imports/exports should produce no tokens"
1503 );
1504 }
1505
1506 #[test]
1507 fn tokenize_css_returns_empty() {
1508 let css_source = ".foo { color: red; }\n.bar { font-size: 16px; }";
1509 let path = PathBuf::from("styles.css");
1510 let result = tokenize_file(&path, css_source);
1511 assert!(
1512 result.tokens.is_empty(),
1513 "CSS files should produce no tokens"
1514 );
1515 assert!(result.line_count >= 1);
1516 }
1517
1518 #[test]
1519 fn tokenize_scss_returns_empty() {
1520 let scss_source = "$color: red;\n.foo { color: $color; }";
1521 let path = PathBuf::from("styles.scss");
1522 let result = tokenize_file(&path, scss_source);
1523 assert!(
1524 result.tokens.is_empty(),
1525 "SCSS files should produce no tokens"
1526 );
1527 }
1528
1529 #[test]
1532 fn file_tokens_line_count_matches_source() {
1533 let source = "const x = 1;\nconst y = 2;\nconst z = 3;";
1534 let path = PathBuf::from("test.ts");
1535 let result = tokenize_file(&path, source);
1536 assert_eq!(result.line_count, 3);
1537 assert_eq!(result.source, source);
1538 }
1539
1540 #[test]
1541 fn file_tokens_line_count_minimum_is_one() {
1542 let path = PathBuf::from("test.ts");
1543 let result = tokenize_file(&path, "");
1544 assert_eq!(result.line_count, 1, "Empty file should have line_count 1");
1545 }
1546
1547 #[test]
1550 fn js_file_with_jsx_retries_as_jsx() {
1551 let jsx_code = r#"
1555function App() {
1556 return (
1557 <div className="app">
1558 <h1>Hello World</h1>
1559 <p>Welcome to the app</p>
1560 </div>
1561 );
1562}
1563"#;
1564 let path = PathBuf::from("app.js");
1565 let result = tokenize_file(&path, jsx_code);
1566 let has_brackets = result
1568 .tokens
1569 .iter()
1570 .any(|t| matches!(t.kind, TokenKind::Punctuation(PunctuationType::OpenBracket)));
1571 assert!(
1572 has_brackets,
1573 "JSX fallback retry should produce JSX tokens from .js file"
1574 );
1575 }
1576
1577 #[test]
1580 fn tokenize_for_in_statement() {
1581 let tokens = tokenize("for (const key in obj) { console.log(key); }");
1582 assert!(matches!(
1583 tokens[0].kind,
1584 TokenKind::Keyword(KeywordType::For)
1585 ));
1586 let has_in = tokens
1587 .iter()
1588 .any(|t| matches!(t.kind, TokenKind::Keyword(KeywordType::In)));
1589 assert!(has_in, "Should contain 'in' keyword");
1590 }
1591
1592 #[test]
1593 fn tokenize_for_of_statement() {
1594 let tokens = tokenize("for (const item of items) { process(item); }");
1595 assert!(matches!(
1596 tokens[0].kind,
1597 TokenKind::Keyword(KeywordType::For)
1598 ));
1599 let has_of = tokens
1600 .iter()
1601 .any(|t| matches!(t.kind, TokenKind::Keyword(KeywordType::Of)));
1602 assert!(has_of, "Should contain 'of' keyword");
1603 }
1604
1605 #[test]
1606 fn tokenize_while_statement() {
1607 let tokens = tokenize("while (x > 0) { x--; }");
1608 assert!(matches!(
1609 tokens[0].kind,
1610 TokenKind::Keyword(KeywordType::While)
1611 ));
1612 let has_gt = tokens
1613 .iter()
1614 .any(|t| matches!(t.kind, TokenKind::Operator(OperatorType::Gt)));
1615 assert!(has_gt, "Should contain greater-than operator");
1616 }
1617
1618 #[test]
1619 fn tokenize_do_while_statement() {
1620 let tokens = tokenize("do { x++; } while (x < 10);");
1621 assert!(matches!(
1622 tokens[0].kind,
1623 TokenKind::Keyword(KeywordType::Do)
1624 ));
1625 let has_increment = tokens
1628 .iter()
1629 .any(|t| matches!(t.kind, TokenKind::Operator(OperatorType::Increment)));
1630 assert!(has_increment, "do-while body should contain increment");
1631 let has_lt = tokens
1632 .iter()
1633 .any(|t| matches!(t.kind, TokenKind::Operator(OperatorType::Lt)));
1634 assert!(has_lt, "do-while condition should contain < operator");
1635 }
1636
1637 #[test]
1638 fn tokenize_switch_case_default() {
1639 let tokens = tokenize("switch (x) { case 1: break; case 2: break; default: return; }");
1640 assert!(matches!(
1641 tokens[0].kind,
1642 TokenKind::Keyword(KeywordType::Switch)
1643 ));
1644 let case_count = tokens
1645 .iter()
1646 .filter(|t| matches!(t.kind, TokenKind::Keyword(KeywordType::Case)))
1647 .count();
1648 assert_eq!(case_count, 2, "Should have two case keywords");
1649 let has_default = tokens
1650 .iter()
1651 .any(|t| matches!(t.kind, TokenKind::Keyword(KeywordType::Default)));
1652 assert!(has_default, "Should have default keyword");
1653 let has_break = tokens
1654 .iter()
1655 .any(|t| matches!(t.kind, TokenKind::Keyword(KeywordType::Break)));
1656 assert!(has_break, "Should have break keyword");
1657 let colon_count = tokens
1659 .iter()
1660 .filter(|t| matches!(t.kind, TokenKind::Punctuation(PunctuationType::Colon)))
1661 .count();
1662 assert!(
1663 colon_count >= 3,
1664 "Should have at least 3 colons (case, case, default), got {colon_count}"
1665 );
1666 }
1667
1668 #[test]
1669 fn tokenize_continue_statement() {
1670 let tokens = tokenize("for (let i = 0; i < 10; i++) { if (i === 5) continue; }");
1671 let has_continue = tokens
1672 .iter()
1673 .any(|t| matches!(t.kind, TokenKind::Keyword(KeywordType::Continue)));
1674 assert!(has_continue, "Should contain continue keyword");
1675 }
1676
1677 #[test]
1678 fn tokenize_try_catch_finally() {
1679 let tokens = tokenize("try { foo(); } catch (e) { bar(); } finally { baz(); }");
1680 let has_try = tokens
1681 .iter()
1682 .any(|t| matches!(t.kind, TokenKind::Keyword(KeywordType::Try)));
1683 let has_catch = tokens
1684 .iter()
1685 .any(|t| matches!(t.kind, TokenKind::Keyword(KeywordType::Catch)));
1686 let has_finally = tokens
1687 .iter()
1688 .any(|t| matches!(t.kind, TokenKind::Keyword(KeywordType::Finally)));
1689 assert!(has_try, "Should contain try keyword");
1690 assert!(has_catch, "Should contain catch keyword");
1691 assert!(
1694 !has_finally,
1695 "Finally keyword is not emitted (no visitor override)"
1696 );
1697 }
1698
1699 #[test]
1700 fn tokenize_throw_statement() {
1701 let tokens = tokenize("throw new Error('fail');");
1702 assert!(matches!(
1703 tokens[0].kind,
1704 TokenKind::Keyword(KeywordType::Throw)
1705 ));
1706 let has_new = tokens
1707 .iter()
1708 .any(|t| matches!(t.kind, TokenKind::Keyword(KeywordType::New)));
1709 assert!(has_new, "Should contain new keyword");
1710 }
1711
1712 #[test]
1715 fn tokenize_this_expression() {
1716 let tokens = tokenize("const x = this.foo;");
1717 let has_this = tokens
1718 .iter()
1719 .any(|t| matches!(t.kind, TokenKind::Keyword(KeywordType::This)));
1720 assert!(has_this, "Should contain this keyword");
1721 }
1722
1723 #[test]
1724 fn tokenize_super_expression() {
1725 let tokens = tokenize("class Child extends Parent { constructor() { super(); } }");
1726 let has_super = tokens
1727 .iter()
1728 .any(|t| matches!(t.kind, TokenKind::Keyword(KeywordType::Super)));
1729 assert!(has_super, "Should contain super keyword");
1730 }
1731
1732 #[test]
1733 fn tokenize_array_expression() {
1734 let tokens = tokenize("const arr = [1, 2, 3];");
1735 let open_bracket = tokens
1736 .iter()
1737 .any(|t| matches!(t.kind, TokenKind::Punctuation(PunctuationType::OpenBracket)));
1738 let close_bracket = tokens.iter().any(|t| {
1739 matches!(
1740 t.kind,
1741 TokenKind::Punctuation(PunctuationType::CloseBracket)
1742 )
1743 });
1744 assert!(open_bracket, "Should contain open bracket");
1745 assert!(close_bracket, "Should contain close bracket");
1746 }
1747
1748 #[test]
1749 fn tokenize_object_expression() {
1750 let tokens = tokenize("const obj = { a: 1, b: 2 };");
1751 let open_brace = tokens
1752 .iter()
1753 .filter(|t| matches!(t.kind, TokenKind::Punctuation(PunctuationType::OpenBrace)))
1754 .count();
1755 let close_brace = tokens
1756 .iter()
1757 .filter(|t| matches!(t.kind, TokenKind::Punctuation(PunctuationType::CloseBrace)))
1758 .count();
1759 assert!(open_brace >= 1, "Should have open brace for object");
1760 assert!(close_brace >= 1, "Should have close brace for object");
1761 }
1762
1763 #[test]
1764 fn tokenize_computed_member_expression() {
1765 let tokens = tokenize("const x = obj[key];");
1766 let open_bracket = tokens
1768 .iter()
1769 .any(|t| matches!(t.kind, TokenKind::Punctuation(PunctuationType::OpenBracket)));
1770 let close_bracket = tokens.iter().any(|t| {
1771 matches!(
1772 t.kind,
1773 TokenKind::Punctuation(PunctuationType::CloseBracket)
1774 )
1775 });
1776 assert!(
1777 open_bracket,
1778 "Should contain open bracket for computed member"
1779 );
1780 assert!(
1781 close_bracket,
1782 "Should contain close bracket for computed member"
1783 );
1784 }
1785
1786 #[test]
1787 fn tokenize_static_member_expression() {
1788 let tokens = tokenize("const x = obj.prop;");
1789 let has_dot = tokens
1790 .iter()
1791 .any(|t| matches!(t.kind, TokenKind::Punctuation(PunctuationType::Dot)));
1792 let has_prop = tokens
1793 .iter()
1794 .any(|t| matches!(&t.kind, TokenKind::Identifier(n) if n == "prop"));
1795 assert!(has_dot, "Should contain dot for member access");
1796 assert!(has_prop, "Should contain property name 'prop'");
1797 }
1798
1799 #[test]
1800 fn tokenize_new_expression() {
1801 let tokens = tokenize("const d = new Date(2024, 1, 1);");
1802 let has_new = tokens
1803 .iter()
1804 .any(|t| matches!(t.kind, TokenKind::Keyword(KeywordType::New)));
1805 assert!(has_new, "Should contain new keyword");
1806 let has_date = tokens
1807 .iter()
1808 .any(|t| matches!(&t.kind, TokenKind::Identifier(n) if n == "Date"));
1809 assert!(has_date, "Should contain identifier 'Date'");
1810 }
1811
1812 #[test]
1813 fn tokenize_template_literal() {
1814 let tokens = tokenize("const s = `hello ${name}`;");
1815 let has_template = tokens
1816 .iter()
1817 .any(|t| matches!(t.kind, TokenKind::TemplateLiteral));
1818 assert!(has_template, "Should contain template literal token");
1819 }
1820
1821 #[test]
1822 fn tokenize_regex_literal() {
1823 let tokens = tokenize("const re = /foo[a-z]+/gi;");
1824 let has_regex = tokens
1825 .iter()
1826 .any(|t| matches!(t.kind, TokenKind::RegExpLiteral));
1827 assert!(has_regex, "Should contain regex literal token");
1828 }
1829
1830 #[test]
1831 fn tokenize_conditional_ternary_expression() {
1832 let tokens = tokenize("const x = a ? b : c;");
1833 let has_ternary = tokens
1834 .iter()
1835 .any(|t| matches!(t.kind, TokenKind::Operator(OperatorType::Ternary)));
1836 let has_colon = tokens
1837 .iter()
1838 .any(|t| matches!(t.kind, TokenKind::Punctuation(PunctuationType::Colon)));
1839 assert!(has_ternary, "Should contain ternary operator");
1840 assert!(has_colon, "Should contain colon for ternary");
1841 }
1842
1843 #[test]
1844 fn tokenize_sequence_expression() {
1845 let tokens = tokenize("for (let i = 0, j = 10; i < j; i++, j--) {}");
1846 let comma_count = tokens
1847 .iter()
1848 .filter(|t| matches!(t.kind, TokenKind::Operator(OperatorType::Comma)))
1849 .count();
1850 assert!(
1851 comma_count >= 1,
1852 "Sequence expression should produce comma operators"
1853 );
1854 }
1855
1856 #[test]
1857 fn tokenize_spread_element() {
1858 let tokens = tokenize("const arr = [...other, 1, 2];");
1859 let has_spread = tokens
1860 .iter()
1861 .any(|t| matches!(t.kind, TokenKind::Operator(OperatorType::Spread)));
1862 assert!(has_spread, "Should contain spread operator");
1863 }
1864
1865 #[test]
1866 fn tokenize_yield_expression() {
1867 let tokens = tokenize("function* gen() { yield 42; }");
1868 let has_yield = tokens
1869 .iter()
1870 .any(|t| matches!(t.kind, TokenKind::Keyword(KeywordType::Yield)));
1871 assert!(has_yield, "Should contain yield keyword");
1872 }
1873
1874 #[test]
1875 fn tokenize_await_expression() {
1876 let tokens = tokenize("async function run() { const x = await fetch(); }");
1877 let has_async = tokens
1878 .iter()
1879 .any(|t| matches!(t.kind, TokenKind::Keyword(KeywordType::Async)));
1880 let has_await = tokens
1881 .iter()
1882 .any(|t| matches!(t.kind, TokenKind::Keyword(KeywordType::Await)));
1883 assert!(has_async, "Should contain async keyword");
1884 assert!(has_await, "Should contain await keyword");
1885 }
1886
1887 #[test]
1888 fn tokenize_async_arrow_function() {
1889 let tokens = tokenize("const f = async () => { await fetch(); };");
1890 let has_async = tokens
1891 .iter()
1892 .any(|t| matches!(t.kind, TokenKind::Keyword(KeywordType::Async)));
1893 let has_arrow = tokens
1894 .iter()
1895 .any(|t| matches!(t.kind, TokenKind::Operator(OperatorType::Arrow)));
1896 assert!(has_async, "Should contain async keyword before arrow");
1897 assert!(has_arrow, "Should contain arrow operator");
1898 }
1899
1900 #[test]
1903 fn tokenize_all_binary_operators() {
1904 let code = r"
1905const a = 1 + 2;
1906const b = 3 - 4;
1907const c = 5 * 6;
1908const d = 7 / 8;
1909const e = 9 % 10;
1910const f = 2 ** 3;
1911const g = a == b;
1912const h = a != b;
1913const i = a === b;
1914const j = a !== b;
1915const k = a < b;
1916const l = a > b;
1917const m = a <= b;
1918const n = a >= b;
1919const o = a & b;
1920const p = a | b;
1921const q = a ^ b;
1922const r = a << b;
1923const s = a >> b;
1924const t = a >>> b;
1925const u = a instanceof Object;
1926const v = 'key' in obj;
1927";
1928 let tokens = tokenize(code);
1929 let ops: Vec<&OperatorType> = tokens
1930 .iter()
1931 .filter_map(|t| match &t.kind {
1932 TokenKind::Operator(op) => Some(op),
1933 _ => None,
1934 })
1935 .collect();
1936 assert!(ops.contains(&&OperatorType::Add));
1937 assert!(ops.contains(&&OperatorType::Sub));
1938 assert!(ops.contains(&&OperatorType::Mul));
1939 assert!(ops.contains(&&OperatorType::Div));
1940 assert!(ops.contains(&&OperatorType::Mod));
1941 assert!(ops.contains(&&OperatorType::Exp));
1942 assert!(ops.contains(&&OperatorType::Eq));
1943 assert!(ops.contains(&&OperatorType::NEq));
1944 assert!(ops.contains(&&OperatorType::StrictEq));
1945 assert!(ops.contains(&&OperatorType::StrictNEq));
1946 assert!(ops.contains(&&OperatorType::Lt));
1947 assert!(ops.contains(&&OperatorType::Gt));
1948 assert!(ops.contains(&&OperatorType::LtEq));
1949 assert!(ops.contains(&&OperatorType::GtEq));
1950 assert!(ops.contains(&&OperatorType::BitwiseAnd));
1951 assert!(ops.contains(&&OperatorType::BitwiseOr));
1952 assert!(ops.contains(&&OperatorType::BitwiseXor));
1953 assert!(ops.contains(&&OperatorType::ShiftLeft));
1954 assert!(ops.contains(&&OperatorType::ShiftRight));
1955 assert!(ops.contains(&&OperatorType::UnsignedShiftRight));
1956 assert!(ops.contains(&&OperatorType::Instanceof));
1957 assert!(ops.contains(&&OperatorType::In));
1958 }
1959
1960 #[test]
1961 fn tokenize_logical_operators() {
1962 let tokens = tokenize("const x = a && b || c ?? d;");
1963 let ops: Vec<&OperatorType> = tokens
1964 .iter()
1965 .filter_map(|t| match &t.kind {
1966 TokenKind::Operator(op) => Some(op),
1967 _ => None,
1968 })
1969 .collect();
1970 assert!(ops.contains(&&OperatorType::And));
1971 assert!(ops.contains(&&OperatorType::Or));
1972 assert!(ops.contains(&&OperatorType::NullishCoalescing));
1973 }
1974
1975 #[test]
1976 fn tokenize_assignment_operators() {
1977 let code = r"
1978x = 1;
1979x += 1;
1980x -= 1;
1981x *= 1;
1982x /= 1;
1983x %= 1;
1984x **= 1;
1985x &&= true;
1986x ||= true;
1987x ??= 1;
1988x &= 1;
1989x |= 1;
1990x ^= 1;
1991x <<= 1;
1992x >>= 1;
1993x >>>= 1;
1994";
1995 let tokens = tokenize(code);
1996 let ops: Vec<&OperatorType> = tokens
1997 .iter()
1998 .filter_map(|t| match &t.kind {
1999 TokenKind::Operator(op) => Some(op),
2000 _ => None,
2001 })
2002 .collect();
2003 assert!(ops.contains(&&OperatorType::Assign));
2004 assert!(ops.contains(&&OperatorType::AddAssign));
2005 assert!(ops.contains(&&OperatorType::SubAssign));
2006 assert!(ops.contains(&&OperatorType::MulAssign));
2007 assert!(ops.contains(&&OperatorType::DivAssign));
2008 assert!(ops.contains(&&OperatorType::ModAssign));
2009 assert!(ops.contains(&&OperatorType::ExpAssign));
2010 assert!(ops.contains(&&OperatorType::AndAssign));
2011 assert!(ops.contains(&&OperatorType::OrAssign));
2012 assert!(ops.contains(&&OperatorType::NullishAssign));
2013 assert!(ops.contains(&&OperatorType::BitwiseAndAssign));
2014 assert!(ops.contains(&&OperatorType::BitwiseOrAssign));
2015 assert!(ops.contains(&&OperatorType::BitwiseXorAssign));
2016 assert!(ops.contains(&&OperatorType::ShiftLeftAssign));
2017 assert!(ops.contains(&&OperatorType::ShiftRightAssign));
2018 assert!(ops.contains(&&OperatorType::UnsignedShiftRightAssign));
2019 }
2020
2021 #[test]
2022 fn tokenize_unary_operators() {
2023 let code = "const a = +x; const b = -x; const c = !x; const d = ~x;";
2024 let tokens = tokenize(code);
2025 let ops: Vec<&OperatorType> = tokens
2026 .iter()
2027 .filter_map(|t| match &t.kind {
2028 TokenKind::Operator(op) => Some(op),
2029 _ => None,
2030 })
2031 .collect();
2032 assert!(
2034 ops.contains(&&OperatorType::Add),
2035 "Should have unary plus (mapped to Add)"
2036 );
2037 assert!(
2038 ops.contains(&&OperatorType::Sub),
2039 "Should have unary minus (mapped to Sub)"
2040 );
2041 assert!(ops.contains(&&OperatorType::Not), "Should have logical not");
2042 assert!(
2043 ops.contains(&&OperatorType::BitwiseNot),
2044 "Should have bitwise not"
2045 );
2046 }
2047
2048 #[test]
2049 fn tokenize_typeof_void_delete_as_keywords() {
2050 let tokens = tokenize("typeof x; void 0; delete obj.key;");
2051 let has_typeof = tokens
2052 .iter()
2053 .any(|t| matches!(t.kind, TokenKind::Keyword(KeywordType::Typeof)));
2054 let has_void = tokens
2055 .iter()
2056 .any(|t| matches!(t.kind, TokenKind::Keyword(KeywordType::Void)));
2057 let has_delete = tokens
2058 .iter()
2059 .any(|t| matches!(t.kind, TokenKind::Keyword(KeywordType::Delete)));
2060 assert!(has_typeof, "typeof should be a keyword token");
2061 assert!(has_void, "void should be a keyword token");
2062 assert!(has_delete, "delete should be a keyword token");
2063 }
2064
2065 #[test]
2066 fn tokenize_prefix_and_postfix_update() {
2067 let tokens = tokenize("++x; x--;");
2068 let first_increment_idx = tokens
2069 .iter()
2070 .position(|t| matches!(t.kind, TokenKind::Operator(OperatorType::Increment)));
2071 let has_decrement = tokens
2072 .iter()
2073 .any(|t| matches!(t.kind, TokenKind::Operator(OperatorType::Decrement)));
2074 assert!(
2075 first_increment_idx.is_some(),
2076 "Should have increment operator"
2077 );
2078 assert!(has_decrement, "Should have decrement operator");
2079
2080 let first_x_idx = tokens
2082 .iter()
2083 .position(|t| matches!(&t.kind, TokenKind::Identifier(n) if n == "x"))
2084 .unwrap();
2085 assert!(
2086 first_increment_idx.unwrap() < first_x_idx,
2087 "Prefix ++ should appear before identifier"
2088 );
2089 }
2090
2091 #[test]
2094 fn tokenize_ts_as_expression() {
2095 let tokens = tokenize("const x = value as string;");
2096 let has_as = tokens
2097 .iter()
2098 .any(|t| matches!(t.kind, TokenKind::Keyword(KeywordType::As)));
2099 assert!(has_as, "Should contain 'as' keyword");
2100 }
2101
2102 #[test]
2103 fn tokenize_ts_satisfies_expression() {
2104 let tokens = tokenize("const config = {} satisfies Config;");
2105 let has_satisfies = tokens
2106 .iter()
2107 .any(|t| matches!(t.kind, TokenKind::Keyword(KeywordType::Satisfies)));
2108 assert!(has_satisfies, "Should contain 'satisfies' keyword");
2109 }
2110
2111 #[test]
2112 fn tokenize_ts_non_null_assertion() {
2113 let ts_tokens = tokenize("const x = value!.toString();");
2114 let has_value = ts_tokens
2117 .iter()
2118 .any(|t| matches!(&t.kind, TokenKind::Identifier(n) if n == "value"));
2119 assert!(has_value, "Should contain 'value' identifier");
2120 }
2121
2122 #[test]
2123 fn tokenize_ts_generic_type_parameters() {
2124 let tokens = tokenize("function identity<T>(x: T): T { return x; }");
2125 let t_count = tokens
2127 .iter()
2128 .filter(|t| matches!(&t.kind, TokenKind::Identifier(n) if n == "T"))
2129 .count();
2130 assert!(
2131 t_count >= 1,
2132 "Generic type parameter T should appear in tokens"
2133 );
2134 }
2135
2136 #[test]
2137 fn tokenize_ts_type_keywords() {
2138 let tokens = tokenize(
2139 "type T = string | number | boolean | any | void | null | undefined | never | unknown;",
2140 );
2141 let idents: Vec<&String> = tokens
2142 .iter()
2143 .filter_map(|t| match &t.kind {
2144 TokenKind::Identifier(name) => Some(name),
2145 _ => None,
2146 })
2147 .collect();
2148 assert!(idents.contains(&&"string".to_string()));
2149 assert!(idents.contains(&&"number".to_string()));
2150 assert!(idents.contains(&&"boolean".to_string()));
2151 assert!(idents.contains(&&"any".to_string()));
2152 assert!(idents.contains(&&"void".to_string()));
2153 assert!(idents.contains(&&"undefined".to_string()));
2154 assert!(idents.contains(&&"never".to_string()));
2155 assert!(idents.contains(&&"unknown".to_string()));
2156 let has_null = tokens
2158 .iter()
2159 .any(|t| matches!(t.kind, TokenKind::NullLiteral));
2160 assert!(has_null, "null keyword should produce NullLiteral token");
2161 }
2162
2163 #[test]
2164 fn tokenize_ts_property_signatures_in_interface() {
2165 let tokens = tokenize("interface Foo { bar: string; baz: number; }");
2166 let semicolons = tokens
2168 .iter()
2169 .filter(|t| matches!(t.kind, TokenKind::Punctuation(PunctuationType::Semicolon)))
2170 .count();
2171 assert!(
2172 semicolons >= 2,
2173 "Interface property signatures should produce semicolons, got {semicolons}"
2174 );
2175 }
2176
2177 #[test]
2178 fn tokenize_ts_enum_with_initializers() {
2179 let tokens = tokenize("enum Status { Active = 'ACTIVE', Inactive = 'INACTIVE' }");
2180 let has_enum = tokens
2181 .iter()
2182 .any(|t| matches!(t.kind, TokenKind::Keyword(KeywordType::Enum)));
2183 assert!(has_enum);
2184 let has_active_str = tokens
2185 .iter()
2186 .any(|t| matches!(&t.kind, TokenKind::StringLiteral(s) if s == "ACTIVE"));
2187 assert!(has_active_str, "Should contain string initializer 'ACTIVE'");
2188 }
2189
2190 #[test]
2193 fn strip_types_removes_generic_type_parameters() {
2194 let stripped = tokenize_cross_language("function identity<T>(x: T): T { return x; }");
2195 let js_tokens = {
2196 let path = PathBuf::from("test.js");
2197 tokenize_file(&path, "function identity(x) { return x; }").tokens
2198 };
2199 assert_eq!(
2200 stripped.len(),
2201 js_tokens.len(),
2202 "Stripped TS with generics should match JS token count: stripped={}, js={}",
2203 stripped.len(),
2204 js_tokens.len()
2205 );
2206 }
2207
2208 #[test]
2209 fn strip_types_removes_generic_type_arguments() {
2210 let stripped = tokenize_cross_language("const x = new Map<string, number>();");
2211 let has_string_ident = stripped
2213 .iter()
2214 .any(|t| matches!(&t.kind, TokenKind::Identifier(n) if n == "string"));
2215 let has_map = stripped
2217 .iter()
2218 .any(|t| matches!(&t.kind, TokenKind::Identifier(n) if n == "Map"));
2219 assert!(has_map, "Map identifier should be preserved");
2220 assert!(
2222 !has_string_ident,
2223 "Type argument 'string' should be stripped"
2224 );
2225 }
2226
2227 #[test]
2228 fn strip_types_removes_as_expression() {
2229 let stripped = tokenize_cross_language("const x = value as string;");
2230 let has_as = stripped
2231 .iter()
2232 .any(|t| matches!(t.kind, TokenKind::Keyword(KeywordType::As)));
2233 assert!(!has_as, "'as' expression should be stripped");
2234 }
2235
2236 #[test]
2237 fn strip_types_removes_satisfies_expression() {
2238 let stripped = tokenize_cross_language("const config = {} satisfies Config;");
2239 let has_satisfies = stripped
2240 .iter()
2241 .any(|t| matches!(t.kind, TokenKind::Keyword(KeywordType::Satisfies)));
2242 assert!(!has_satisfies, "'satisfies' expression should be stripped");
2243 }
2244
2245 #[test]
2246 fn strip_types_ts_and_js_produce_identical_token_kinds() {
2247 let ts_code = r#"
2248function greet(name: string, age: number): string {
2249 const msg: string = `Hello ${name}`;
2250 if (age > 18) {
2251 return msg;
2252 }
2253 return "too young";
2254}
2255"#;
2256 let js_code = r#"
2257function greet(name, age) {
2258 const msg = `Hello ${name}`;
2259 if (age > 18) {
2260 return msg;
2261 }
2262 return "too young";
2263}
2264"#;
2265 let stripped = tokenize_cross_language(ts_code);
2266 let js_tokens = {
2267 let path = PathBuf::from("test.js");
2268 tokenize_file(&path, js_code).tokens
2269 };
2270
2271 assert_eq!(
2272 stripped.len(),
2273 js_tokens.len(),
2274 "Stripped TS and JS should produce same number of tokens"
2275 );
2276
2277 for (i, (ts_tok, js_tok)) in stripped.iter().zip(js_tokens.iter()).enumerate() {
2279 assert_eq!(
2280 ts_tok.kind, js_tok.kind,
2281 "Token {i} mismatch: TS={:?}, JS={:?}",
2282 ts_tok.kind, js_tok.kind
2283 );
2284 }
2285 }
2286
2287 #[test]
2288 fn strip_types_removes_export_type_but_keeps_export_value() {
2289 let stripped =
2290 tokenize_cross_language("export type { Foo };\nexport { bar };\nexport const x = 1;");
2291 let export_count = stripped
2292 .iter()
2293 .filter(|t| matches!(t.kind, TokenKind::Keyword(KeywordType::Export)))
2294 .count();
2295 assert_eq!(
2297 export_count, 2,
2298 "Should have 2 value exports, got {export_count}"
2299 );
2300 }
2301
2302 #[test]
2305 fn tokenize_jsx_fragment() {
2306 let tokens = tokenize_tsx("const x = <><div>Hello</div></>;");
2307 let bracket_count = tokens
2309 .iter()
2310 .filter(|t| {
2311 matches!(
2312 t.kind,
2313 TokenKind::Punctuation(PunctuationType::OpenBracket)
2314 | TokenKind::Punctuation(PunctuationType::CloseBracket)
2315 )
2316 })
2317 .count();
2318 assert!(
2319 bracket_count >= 4,
2320 "JSX fragment should produce bracket tokens, got {bracket_count}"
2321 );
2322 }
2323
2324 #[test]
2325 fn tokenize_jsx_spread_attribute() {
2326 let tokens = tokenize_tsx("const x = <div {...props}>Hello</div>;");
2327 let has_spread = tokens
2328 .iter()
2329 .any(|t| matches!(t.kind, TokenKind::Operator(OperatorType::Spread)));
2330 assert!(
2331 has_spread,
2332 "JSX spread attribute should produce spread operator"
2333 );
2334 }
2335
2336 #[test]
2337 fn tokenize_jsx_expression_container() {
2338 let tokens = tokenize_tsx("const x = <div>{count > 0 ? 'yes' : 'no'}</div>;");
2339 let has_ternary = tokens
2340 .iter()
2341 .any(|t| matches!(t.kind, TokenKind::Operator(OperatorType::Ternary)));
2342 assert!(
2343 has_ternary,
2344 "Expression in JSX should be tokenized (ternary)"
2345 );
2346 }
2347
2348 #[test]
2351 fn tokenize_import_declaration() {
2352 let tokens = tokenize("import { foo, bar } from './module';");
2353 let has_import = tokens
2354 .iter()
2355 .any(|t| matches!(t.kind, TokenKind::Keyword(KeywordType::Import)));
2356 let has_from = tokens
2357 .iter()
2358 .any(|t| matches!(t.kind, TokenKind::Keyword(KeywordType::From)));
2359 let has_source = tokens
2360 .iter()
2361 .any(|t| matches!(&t.kind, TokenKind::StringLiteral(s) if s == "./module"));
2362 assert!(has_import, "Should contain import keyword");
2363 assert!(has_from, "Should contain from keyword");
2364 assert!(has_source, "Should contain module source string");
2365 }
2366
2367 #[test]
2368 fn tokenize_export_default_declaration() {
2369 let tokens = tokenize("export default function() { return 42; }");
2370 let has_export = tokens
2371 .iter()
2372 .any(|t| matches!(t.kind, TokenKind::Keyword(KeywordType::Export)));
2373 let has_default = tokens
2374 .iter()
2375 .any(|t| matches!(t.kind, TokenKind::Keyword(KeywordType::Default)));
2376 assert!(has_export, "Should contain export keyword");
2377 assert!(has_default, "Should contain default keyword");
2378 }
2379
2380 #[test]
2381 fn tokenize_export_all_declaration() {
2382 let tokens = tokenize("export * from './module';");
2383 let has_export = tokens
2384 .iter()
2385 .any(|t| matches!(t.kind, TokenKind::Keyword(KeywordType::Export)));
2386 let has_from = tokens
2387 .iter()
2388 .any(|t| matches!(t.kind, TokenKind::Keyword(KeywordType::From)));
2389 let has_source = tokens
2390 .iter()
2391 .any(|t| matches!(&t.kind, TokenKind::StringLiteral(s) if s == "./module"));
2392 assert!(has_export, "export * should have export keyword");
2393 assert!(has_from, "export * should have from keyword");
2394 assert!(has_source, "export * should have source string");
2395 }
2396
2397 #[test]
2398 fn tokenize_dynamic_import() {
2399 let tokens = tokenize("const mod = await import('./module');");
2400 let has_import = tokens
2401 .iter()
2402 .any(|t| matches!(t.kind, TokenKind::Keyword(KeywordType::Import)));
2403 let has_await = tokens
2404 .iter()
2405 .any(|t| matches!(t.kind, TokenKind::Keyword(KeywordType::Await)));
2406 assert!(
2409 !has_import,
2410 "Dynamic import() should not produce Import keyword"
2411 );
2412 assert!(has_await, "Should contain await keyword");
2413 }
2414
2415 #[test]
2418 fn tokenize_only_comments() {
2419 let tokens = tokenize("// This is a comment\n/* block comment */\n");
2420 assert!(
2421 tokens.is_empty(),
2422 "File with only comments should produce no tokens"
2423 );
2424 }
2425
2426 #[test]
2427 fn tokenize_deeply_nested_structure() {
2428 let code = "const x = { a: { b: { c: { d: { e: 1 } } } } };";
2429 let tokens = tokenize(code);
2430 let open_braces = tokens
2431 .iter()
2432 .filter(|t| matches!(t.kind, TokenKind::Punctuation(PunctuationType::OpenBrace)))
2433 .count();
2434 let close_braces = tokens
2435 .iter()
2436 .filter(|t| matches!(t.kind, TokenKind::Punctuation(PunctuationType::CloseBrace)))
2437 .count();
2438 assert_eq!(
2439 open_braces, close_braces,
2440 "Nested structure should have balanced braces"
2441 );
2442 assert!(
2443 open_braces >= 5,
2444 "Should have at least 5 levels of braces, got {open_braces}"
2445 );
2446 }
2447
2448 #[test]
2449 fn tokenize_chained_method_calls_uses_point_spans() {
2450 let tokens = tokenize("arr.filter(x => x > 0).map(x => x * 2).reduce((a, b) => a + b, 0);");
2451 let dots: Vec<&SourceToken> = tokens
2454 .iter()
2455 .filter(|t| matches!(t.kind, TokenKind::Punctuation(PunctuationType::Dot)))
2456 .collect();
2457 assert!(
2458 dots.len() >= 3,
2459 "Chained calls should produce dots, got {}",
2460 dots.len()
2461 );
2462 for dot in &dots {
2464 assert_eq!(
2465 dot.span.end - dot.span.start,
2466 1,
2467 "Dot should use point span"
2468 );
2469 }
2470 }
2471
2472 #[test]
2473 fn tokenize_expression_statement_appends_semicolon() {
2474 let tokens = tokenize("foo();");
2475 let last = tokens.last().unwrap();
2476 assert!(
2477 matches!(
2478 last.kind,
2479 TokenKind::Punctuation(PunctuationType::Semicolon)
2480 | TokenKind::Punctuation(PunctuationType::CloseParen)
2481 | TokenKind::Operator(OperatorType::Comma)
2482 ),
2483 "Expression statement should end with semicolon or related punctuation"
2484 );
2485 let has_semicolon = tokens
2486 .iter()
2487 .any(|t| matches!(t.kind, TokenKind::Punctuation(PunctuationType::Semicolon)));
2488 assert!(
2489 has_semicolon,
2490 "Expression statement should produce a semicolon"
2491 );
2492 }
2493
2494 #[test]
2495 fn tokenize_variable_declarator_with_no_initializer() {
2496 let tokens = tokenize("let x;");
2497 let has_let = tokens
2498 .iter()
2499 .any(|t| matches!(t.kind, TokenKind::Keyword(KeywordType::Let)));
2500 let has_x = tokens
2501 .iter()
2502 .any(|t| matches!(&t.kind, TokenKind::Identifier(n) if n == "x"));
2503 let has_assign = tokens
2505 .iter()
2506 .any(|t| matches!(t.kind, TokenKind::Operator(OperatorType::Assign)));
2507 assert!(has_let, "Should have let keyword");
2508 assert!(has_x, "Should have identifier x");
2509 assert!(
2510 !has_assign,
2511 "Uninitialized declarator should not have assign operator"
2512 );
2513 }
2514
2515 #[test]
2516 fn tokenize_using_declaration_maps_to_const() {
2517 let tokens = tokenize("{ using resource = getResource(); }");
2519 let has_const = tokens
2520 .iter()
2521 .any(|t| matches!(t.kind, TokenKind::Keyword(KeywordType::Const)));
2522 assert!(
2523 has_const,
2524 "`using` declaration should be mapped to Const keyword"
2525 );
2526 }
2527
2528 #[test]
2529 fn tokenize_block_statement_produces_braces() {
2530 let tokens = tokenize("{ const x = 1; }");
2531 assert!(matches!(
2532 tokens[0].kind,
2533 TokenKind::Punctuation(PunctuationType::OpenBrace)
2534 ));
2535 let last = tokens.last().unwrap();
2536 assert!(
2537 matches!(
2538 last.kind,
2539 TokenKind::Punctuation(PunctuationType::CloseBrace)
2540 ),
2541 "Block should end with close brace"
2542 );
2543 }
2544
2545 #[test]
2546 fn tokenize_class_without_name_and_no_extends() {
2547 let tokens = tokenize("const C = class { };");
2548 let has_class = tokens
2549 .iter()
2550 .any(|t| matches!(t.kind, TokenKind::Keyword(KeywordType::Class)));
2551 let has_extends = tokens
2552 .iter()
2553 .any(|t| matches!(t.kind, TokenKind::Keyword(KeywordType::Extends)));
2554 assert!(has_class, "Should have class keyword");
2555 assert!(
2556 !has_extends,
2557 "Anonymous class without extends should not have extends keyword"
2558 );
2559 }
2560
2561 #[test]
2562 fn tokenize_function_without_name() {
2563 let tokens = tokenize("const f = function() { return 1; };");
2564 let has_function = tokens
2565 .iter()
2566 .any(|t| matches!(t.kind, TokenKind::Keyword(KeywordType::Function)));
2567 assert!(has_function, "Should have function keyword");
2568 }
2569
2570 #[test]
2571 fn tokenize_ts_interface_body_has_braces() {
2572 let tokens = tokenize("interface I { x: number; }");
2573 let open_braces = tokens
2574 .iter()
2575 .filter(|t| matches!(t.kind, TokenKind::Punctuation(PunctuationType::OpenBrace)))
2576 .count();
2577 let close_braces = tokens
2578 .iter()
2579 .filter(|t| matches!(t.kind, TokenKind::Punctuation(PunctuationType::CloseBrace)))
2580 .count();
2581 assert!(open_braces >= 1, "Interface body should have open brace");
2582 assert_eq!(
2583 open_braces, close_braces,
2584 "Interface body braces should be balanced"
2585 );
2586 }
2587
2588 #[test]
2589 fn tokenize_ts_enum_body_has_braces() {
2590 let tokens = tokenize("enum E { A, B }");
2591 let open_braces = tokens
2592 .iter()
2593 .filter(|t| matches!(t.kind, TokenKind::Punctuation(PunctuationType::OpenBrace)))
2594 .count();
2595 assert!(open_braces >= 1, "Enum body should have open brace");
2596 }
2597
2598 #[test]
2599 fn tokenize_ts_module_declaration_not_stripped_when_not_declare() {
2600 let tokens = tokenize("namespace Foo { export const x = 1; }");
2602 let has_const = tokens
2603 .iter()
2604 .any(|t| matches!(t.kind, TokenKind::Keyword(KeywordType::Const)));
2605 assert!(
2606 has_const,
2607 "Non-declare namespace contents should be preserved"
2608 );
2609 }
2610
2611 #[test]
2612 fn cross_language_preserves_non_declare_namespace() {
2613 let stripped = tokenize_cross_language("namespace Foo { export const x = 1; }");
2614 let has_const = stripped
2615 .iter()
2616 .any(|t| matches!(t.kind, TokenKind::Keyword(KeywordType::Const)));
2617 assert!(
2618 has_const,
2619 "Non-declare namespace contents should be preserved in cross-language mode"
2620 );
2621 }
2622
2623 #[test]
2624 fn tokenize_for_statement_with_all_clauses() {
2625 let tokens = tokenize("for (let i = 0; i < 10; i++) { console.log(i); }");
2626 assert!(matches!(
2627 tokens[0].kind,
2628 TokenKind::Keyword(KeywordType::For)
2629 ));
2630 let has_open_paren = tokens
2631 .iter()
2632 .any(|t| matches!(t.kind, TokenKind::Punctuation(PunctuationType::OpenParen)));
2633 let has_close_paren = tokens
2634 .iter()
2635 .any(|t| matches!(t.kind, TokenKind::Punctuation(PunctuationType::CloseParen)));
2636 assert!(has_open_paren, "For statement should have open paren");
2637 assert!(has_close_paren, "For statement should have close paren");
2638 }
2639
2640 #[test]
2641 fn tokenize_cross_language_produces_correct_metadata() {
2642 let path = PathBuf::from("test.ts");
2643 let source = "const x: number = 1;\nconst y: string = 'hello';";
2644 let result = tokenize_file_cross_language(&path, source, true);
2645 assert_eq!(result.line_count, 2);
2646 assert_eq!(result.source, source);
2647 assert!(!result.tokens.is_empty());
2648 }
2649
2650 #[test]
2651 fn strip_types_removes_complex_generics() {
2652 let stripped = tokenize_cross_language(
2653 "function merge<T extends object, U extends object>(a: T, b: U): T & U { return Object.assign(a, b); }",
2654 );
2655 let js_tokens = {
2656 let path = PathBuf::from("test.js");
2657 tokenize_file(
2658 &path,
2659 "function merge(a, b) { return Object.assign(a, b); }",
2660 )
2661 .tokens
2662 };
2663 assert_eq!(
2664 stripped.len(),
2665 js_tokens.len(),
2666 "Complex generics should be fully stripped: stripped={}, js={}",
2667 stripped.len(),
2668 js_tokens.len()
2669 );
2670 }
2671
2672 #[test]
2673 fn tokenize_ts_conditional_type_without_strip() {
2674 let tokens = tokenize("type IsString<T> = T extends string ? true : false;");
2675 let has_type = tokens
2676 .iter()
2677 .any(|t| matches!(t.kind, TokenKind::Keyword(KeywordType::Type)));
2678 assert!(has_type, "Should contain type keyword");
2679 let has_true_bool = tokens
2683 .iter()
2684 .any(|t| matches!(t.kind, TokenKind::BooleanLiteral(true)));
2685 let has_false_bool = tokens
2686 .iter()
2687 .any(|t| matches!(t.kind, TokenKind::BooleanLiteral(false)));
2688 assert!(
2689 has_true_bool,
2690 "Conditional type should contain true literal"
2691 );
2692 assert!(
2693 has_false_bool,
2694 "Conditional type should contain false literal"
2695 );
2696 }
2697
2698 #[test]
2699 fn strip_types_removes_conditional_type() {
2700 let stripped = tokenize_cross_language(
2701 "type IsString<T> = T extends string ? true : false;\nconst x = 1;",
2702 );
2703 let has_type = stripped
2704 .iter()
2705 .any(|t| matches!(t.kind, TokenKind::Keyword(KeywordType::Type)));
2706 assert!(!has_type, "Conditional type alias should be fully stripped");
2707 }
2708
2709 #[test]
2710 fn tokenize_vue_sfc_with_cross_language_stripping() {
2711 let vue_source = r#"<template><div/></template>
2712<script lang="ts">
2713import type { Ref } from 'vue';
2714import { ref } from 'vue';
2715const count: Ref<number> = ref(0);
2716</script>"#;
2717 let path = PathBuf::from("Component.vue");
2718 let result = tokenize_file_cross_language(&path, vue_source, true);
2719 let import_count = result
2721 .tokens
2722 .iter()
2723 .filter(|t| matches!(t.kind, TokenKind::Keyword(KeywordType::Import)))
2724 .count();
2725 assert_eq!(
2726 import_count, 1,
2727 "import type should be stripped, leaving only 1 value import, got {import_count}"
2728 );
2729 }
2730
2731 #[test]
2732 fn tokenize_no_extension_uses_default_source_type() {
2733 let path = PathBuf::from("Makefile");
2734 let result = tokenize_file(&path, "const x = 1;");
2736 assert!(result.line_count >= 1);
2739 }
2740
2741 #[test]
2742 fn point_span_is_one_byte() {
2743 let span = point_span(42);
2744 assert_eq!(span.start, 42);
2745 assert_eq!(span.end, 43);
2746 }
2747
2748 #[test]
2749 fn tokenize_call_expression_with_arguments() {
2750 let tokens = tokenize("foo(1, 'hello', true);");
2751 let has_open_paren = tokens
2752 .iter()
2753 .any(|t| matches!(t.kind, TokenKind::Punctuation(PunctuationType::OpenParen)));
2754 let has_close_paren = tokens
2755 .iter()
2756 .any(|t| matches!(t.kind, TokenKind::Punctuation(PunctuationType::CloseParen)));
2757 let comma_count = tokens
2758 .iter()
2759 .filter(|t| matches!(t.kind, TokenKind::Operator(OperatorType::Comma)))
2760 .count();
2761 assert!(has_open_paren, "Call should have open paren");
2762 assert!(has_close_paren, "Call should have close paren");
2763 assert!(
2764 comma_count >= 3,
2765 "3 arguments should produce at least 3 commas (one per arg), got {comma_count}"
2766 );
2767 }
2768
2769 #[test]
2770 fn tokenize_new_expression_with_arguments() {
2771 let tokens = tokenize("new Foo(1, 2);");
2772 let has_new = tokens
2773 .iter()
2774 .any(|t| matches!(t.kind, TokenKind::Keyword(KeywordType::New)));
2775 let comma_count = tokens
2776 .iter()
2777 .filter(|t| matches!(t.kind, TokenKind::Operator(OperatorType::Comma)))
2778 .count();
2779 assert!(has_new);
2780 assert!(
2781 comma_count >= 2,
2782 "2 arguments should produce at least 2 commas, got {comma_count}"
2783 );
2784 }
2785
2786 #[test]
2787 fn tokenize_arrow_function_params_produce_commas() {
2788 let tokens = tokenize("const f = (a, b, c) => a;");
2789 let comma_count = tokens
2790 .iter()
2791 .filter(|t| matches!(t.kind, TokenKind::Operator(OperatorType::Comma)))
2792 .count();
2793 assert!(
2794 comma_count >= 3,
2795 "Arrow function with 3 params should produce at least 3 commas, got {comma_count}"
2796 );
2797 }
2798
2799 #[test]
2800 fn tokenize_function_params_produce_commas() {
2801 let tokens = tokenize("function f(a, b) { return a + b; }");
2802 let comma_count = tokens
2803 .iter()
2804 .filter(|t| matches!(t.kind, TokenKind::Operator(OperatorType::Comma)))
2805 .count();
2806 assert!(
2807 comma_count >= 2,
2808 "Function with 2 params should produce at least 2 commas, got {comma_count}"
2809 );
2810 }
2811
2812 #[test]
2813 fn tokenize_switch_with_open_close_parens() {
2814 let tokens = tokenize("switch (x) { case 1: break; }");
2815 let has_open_paren = tokens
2816 .iter()
2817 .any(|t| matches!(t.kind, TokenKind::Punctuation(PunctuationType::OpenParen)));
2818 let has_close_paren = tokens
2819 .iter()
2820 .any(|t| matches!(t.kind, TokenKind::Punctuation(PunctuationType::CloseParen)));
2821 assert!(
2822 has_open_paren,
2823 "Switch should have open paren for discriminant"
2824 );
2825 assert!(
2826 has_close_paren,
2827 "Switch should have close paren for discriminant"
2828 );
2829 }
2830
2831 #[test]
2832 fn tokenize_while_has_parens_around_condition() {
2833 let tokens = tokenize("while (true) { break; }");
2834 let has_open_paren = tokens
2835 .iter()
2836 .any(|t| matches!(t.kind, TokenKind::Punctuation(PunctuationType::OpenParen)));
2837 let has_close_paren = tokens
2838 .iter()
2839 .any(|t| matches!(t.kind, TokenKind::Punctuation(PunctuationType::CloseParen)));
2840 assert!(has_open_paren, "While should have open paren");
2841 assert!(has_close_paren, "While should have close paren");
2842 }
2843
2844 #[test]
2845 fn tokenize_for_in_has_parens() {
2846 let tokens = tokenize("for (const k in obj) {}");
2847 let open_parens = tokens
2848 .iter()
2849 .filter(|t| matches!(t.kind, TokenKind::Punctuation(PunctuationType::OpenParen)))
2850 .count();
2851 let close_parens = tokens
2852 .iter()
2853 .filter(|t| matches!(t.kind, TokenKind::Punctuation(PunctuationType::CloseParen)))
2854 .count();
2855 assert!(open_parens >= 1, "for-in should have open paren");
2856 assert!(close_parens >= 1, "for-in should have close paren");
2857 }
2858
2859 #[test]
2860 fn tokenize_for_of_has_parens() {
2861 let tokens = tokenize("for (const v of arr) {}");
2862 let open_parens = tokens
2863 .iter()
2864 .filter(|t| matches!(t.kind, TokenKind::Punctuation(PunctuationType::OpenParen)))
2865 .count();
2866 let close_parens = tokens
2867 .iter()
2868 .filter(|t| matches!(t.kind, TokenKind::Punctuation(PunctuationType::CloseParen)))
2869 .count();
2870 assert!(open_parens >= 1, "for-of should have open paren");
2871 assert!(close_parens >= 1, "for-of should have close paren");
2872 }
2873
2874 #[test]
2875 fn strip_types_removes_ts_type_annotation_colon() {
2876 let stripped = tokenize_cross_language("const x: number = 1;");
2878 let colon_count = stripped
2879 .iter()
2880 .filter(|t| matches!(t.kind, TokenKind::Punctuation(PunctuationType::Colon)))
2881 .count();
2882 assert_eq!(
2883 colon_count, 0,
2884 "Type annotation colons should be stripped, got {colon_count}"
2885 );
2886 }
2887
2888 #[test]
2889 fn tokenize_ts_as_const() {
2890 let tokens = tokenize("const colors = ['red', 'green', 'blue'] as const;");
2891 let has_as = tokens
2892 .iter()
2893 .any(|t| matches!(t.kind, TokenKind::Keyword(KeywordType::As)));
2894 assert!(has_as, "as const should produce 'as' keyword");
2895 let has_const_decl = tokens
2898 .iter()
2899 .any(|t| matches!(t.kind, TokenKind::Keyword(KeywordType::Const)));
2900 assert!(
2901 has_const_decl,
2902 "Should have Const keyword for the declaration"
2903 );
2904 }
2905
2906 #[test]
2907 fn strip_types_removes_as_const() {
2908 let stripped = tokenize_cross_language("const colors = ['red', 'green', 'blue'] as const;");
2909 let has_as = stripped
2910 .iter()
2911 .any(|t| matches!(t.kind, TokenKind::Keyword(KeywordType::As)));
2912 assert!(
2913 !has_as,
2914 "'as const' should be stripped in cross-language mode"
2915 );
2916 }
2917}