use verter_span::Span;
#[derive(Debug, Clone, Copy, PartialEq, Eq)]
pub enum RecoveredMacroKind {
DefineProps,
DefineEmits,
WithDefaults,
DefineModel,
DefineExpose,
DefineOptions,
DefineSlots,
}
#[derive(Debug)]
pub struct RecoveredMacro<'a> {
pub kind: RecoveredMacroKind,
pub binding_name: Option<&'a str>,
#[allow(dead_code)]
pub binding_span: Option<Span>,
pub call_span: Span,
}
#[derive(Debug)]
pub struct RecoveredFunction<'a> {
pub name: &'a str,
#[allow(dead_code)]
pub name_span: Span,
#[allow(dead_code)]
pub params_span: Span,
}
#[derive(Debug)]
pub struct RecoveredVariable<'a> {
pub name: &'a str,
#[allow(dead_code)]
pub name_span: Span,
pub kind: RecoveredVarKind,
}
#[derive(Debug, Clone, Copy, PartialEq, Eq)]
pub enum RecoveredVarKind {
Const,
Let,
Var,
}
#[derive(Debug)]
pub struct TokenizerRecovery<'a> {
pub macros: Vec<RecoveredMacro<'a>>,
pub functions: Vec<RecoveredFunction<'a>>,
pub variables: Vec<RecoveredVariable<'a>>,
}
pub struct ScriptTokenScanner<'a> {
source: &'a str,
bytes: &'a [u8],
pos: usize,
content_start: u32,
}
const MACRO_NAMES: &[(&str, RecoveredMacroKind)] = &[
("defineProps", RecoveredMacroKind::DefineProps),
("defineEmits", RecoveredMacroKind::DefineEmits),
("withDefaults", RecoveredMacroKind::WithDefaults),
("defineModel", RecoveredMacroKind::DefineModel),
("defineExpose", RecoveredMacroKind::DefineExpose),
("defineOptions", RecoveredMacroKind::DefineOptions),
("defineSlots", RecoveredMacroKind::DefineSlots),
];
impl<'a> ScriptTokenScanner<'a> {
pub fn new(source: &'a str, content_start: u32) -> Self {
Self {
source,
bytes: source.as_bytes(),
pos: 0,
content_start,
}
}
pub fn recover(mut self) -> TokenizerRecovery<'a> {
let mut macros = Vec::new();
let mut functions = Vec::new();
let mut variables = Vec::new();
while self.pos < self.bytes.len() {
if self.bytes[self.pos].is_ascii_whitespace() {
self.pos += 1;
continue;
}
if self.looking_at(b"//") {
self.skip_line_comment();
continue;
}
if self.looking_at(b"/*") {
self.skip_block_comment();
continue;
}
if self.bytes[self.pos] == b'"' || self.bytes[self.pos] == b'\'' {
self.skip_string(self.bytes[self.pos]);
continue;
}
if self.bytes[self.pos] == b'`' {
self.skip_template_literal();
continue;
}
if is_ident_start(self.bytes[self.pos]) {
let ident_start = self.pos;
let ident = self.read_ident();
if ident == "function" {
if let Some(func) = self.try_recover_function() {
functions.push(func);
}
continue;
}
let var_kind = match ident {
"const" => Some(RecoveredVarKind::Const),
"let" => Some(RecoveredVarKind::Let),
"var" => Some(RecoveredVarKind::Var),
_ => None,
};
if let Some(kind) = var_kind {
if !self.is_ident_at(self.pos) {
if let Some(var) = self.try_recover_variable(kind) {
variables.push(var);
}
}
continue;
}
if let Some(&(_, kind)) = MACRO_NAMES.iter().find(|&&(name, _)| name == ident) {
if let Some(call_end) = self.try_match_macro_call() {
let call_span = Span::new(
self.content_start + ident_start as u32,
self.content_start + call_end as u32,
);
let (binding_name, binding_span) =
self.scan_backward_for_binding(ident_start);
macros.push(RecoveredMacro {
kind,
binding_name,
binding_span,
call_span,
});
}
continue;
}
continue;
}
self.pos += 1;
}
TokenizerRecovery {
macros,
functions,
variables,
}
}
fn looking_at(&self, needle: &[u8]) -> bool {
self.bytes[self.pos..].starts_with(needle)
}
fn skip_line_comment(&mut self) {
self.pos += 2; while self.pos < self.bytes.len() && self.bytes[self.pos] != b'\n' {
self.pos += 1;
}
}
fn skip_block_comment(&mut self) {
self.pos += 2; while self.pos + 1 < self.bytes.len() {
if self.bytes[self.pos] == b'*' && self.bytes[self.pos + 1] == b'/' {
self.pos += 2;
return;
}
self.pos += 1;
}
self.pos = self.bytes.len(); }
fn skip_string(&mut self, quote: u8) {
self.pos += 1; while self.pos < self.bytes.len() {
if self.bytes[self.pos] == b'\\' {
self.pos += 2; continue;
}
if self.bytes[self.pos] == quote {
self.pos += 1;
return;
}
self.pos += 1;
}
}
fn skip_template_literal(&mut self) {
self.pos += 1; while self.pos < self.bytes.len() {
if self.bytes[self.pos] == b'\\' {
self.pos += 2;
continue;
}
if self.bytes[self.pos] == b'`' {
self.pos += 1;
return;
}
if self.bytes[self.pos] == b'$'
&& self.pos + 1 < self.bytes.len()
&& self.bytes[self.pos + 1] == b'{'
{
self.pos += 2; self.skip_bracket_matched(b'{', b'}');
continue;
}
self.pos += 1;
}
}
fn read_ident(&mut self) -> &'a str {
let start = self.pos;
while self.pos < self.bytes.len() && is_ident_continue(self.bytes[self.pos]) {
self.pos += 1;
}
&self.source[start..self.pos]
}
fn skip_ws_and_comments(&mut self) -> bool {
let start = self.pos;
loop {
if self.pos >= self.bytes.len() {
break;
}
if self.bytes[self.pos].is_ascii_whitespace() {
self.pos += 1;
continue;
}
if self.looking_at(b"//") {
self.skip_line_comment();
continue;
}
if self.looking_at(b"/*") {
self.skip_block_comment();
continue;
}
break;
}
self.pos > start
}
fn try_match_macro_call(&mut self) -> Option<usize> {
let saved = self.pos;
self.skip_ws_and_comments();
if self.pos < self.bytes.len() && self.bytes[self.pos] == b'<' {
self.pos += 1;
if !self.skip_bracket_matched(b'<', b'>') {
self.pos = saved;
self.skip_ws_and_comments();
} else {
self.skip_ws_and_comments();
}
}
if self.pos < self.bytes.len() && self.bytes[self.pos] == b'(' {
self.pos += 1;
if self.skip_bracket_matched(b'(', b')') {
return Some(self.pos);
}
}
self.pos = saved;
None
}
fn skip_bracket_matched(&mut self, open: u8, close: u8) -> bool {
let mut depth = 1u32;
while self.pos < self.bytes.len() && depth > 0 {
let b = self.bytes[self.pos];
if b == b'"' || b == b'\'' {
self.skip_string(b);
continue;
}
if b == b'`' {
self.skip_template_literal();
continue;
}
if self.looking_at(b"//") {
self.skip_line_comment();
continue;
}
if self.looking_at(b"/*") {
self.skip_block_comment();
continue;
}
if b == open {
depth += 1;
} else if b == close {
depth -= 1;
if depth == 0 {
self.pos += 1; return true;
}
}
self.pos += 1;
}
false }
fn scan_backward_for_binding(&self, macro_start: usize) -> (Option<&'a str>, Option<Span>) {
let mut p = macro_start;
p = self.skip_back_ws(p);
if p == 0 || self.bytes[p - 1] != b'=' {
return (None, None);
}
p -= 1;
p = self.skip_back_ws(p);
if p == 0 || !is_ident_continue(self.bytes[p - 1]) {
return (None, None);
}
let ident_end = p;
while p > 0 && is_ident_continue(self.bytes[p - 1]) {
p -= 1;
}
let ident_start = p;
let name = &self.source[ident_start..ident_end];
p = self.skip_back_ws(p);
let make_span = || {
Span::new(
self.content_start + ident_start as u32,
self.content_start + ident_end as u32,
)
};
let is_word_boundary = |before_kw: usize| -> bool {
before_kw == 0 || !is_ident_continue(self.bytes[before_kw - 1])
};
if p >= 3 && &self.source[p - 3..p] == "var" && is_word_boundary(p - 3) {
return (Some(name), Some(make_span()));
}
if p >= 3 && &self.source[p - 3..p] == "let" && is_word_boundary(p - 3) {
return (Some(name), Some(make_span()));
}
if p >= 5 && &self.source[p - 5..p] == "const" && is_word_boundary(p - 5) {
return (Some(name), Some(make_span()));
}
(None, None)
}
fn skip_back_ws(&self, mut p: usize) -> usize {
while p > 0 && self.bytes[p - 1].is_ascii_whitespace() {
p -= 1;
}
p
}
fn is_ident_at(&self, pos: usize) -> bool {
if pos >= self.bytes.len() {
return false;
}
is_ident_continue(self.bytes[pos])
}
fn try_recover_variable(&mut self, kind: RecoveredVarKind) -> Option<RecoveredVariable<'a>> {
self.skip_ws_and_comments();
if self.pos >= self.bytes.len() || !is_ident_start(self.bytes[self.pos]) {
return None;
}
let name_start = self.pos;
let name = self.read_ident();
let name_end = self.pos;
Some(RecoveredVariable {
name,
name_span: Span::new(
self.content_start + name_start as u32,
self.content_start + name_end as u32,
),
kind,
})
}
fn try_recover_function(&mut self) -> Option<RecoveredFunction<'a>> {
self.skip_ws_and_comments();
if self.pos >= self.bytes.len() || !is_ident_start(self.bytes[self.pos]) {
return None;
}
let name_start = self.pos;
let name = self.read_ident();
let name_end = self.pos;
self.skip_ws_and_comments();
if self.pos < self.bytes.len() && self.bytes[self.pos] == b'<' {
self.pos += 1;
if !self.skip_bracket_matched(b'<', b'>') {
return None; }
self.skip_ws_and_comments();
}
if self.pos >= self.bytes.len() || self.bytes[self.pos] != b'(' {
return None;
}
let params_start = self.pos;
self.pos += 1; if !self.skip_bracket_matched(b'(', b')') {
return None; }
let params_end = self.pos;
Some(RecoveredFunction {
name,
name_span: Span::new(
self.content_start + name_start as u32,
self.content_start + name_end as u32,
),
params_span: Span::new(
self.content_start + params_start as u32,
self.content_start + params_end as u32,
),
})
}
}
fn is_ident_start(b: u8) -> bool {
b.is_ascii_alphabetic() || b == b'_' || b == b'$'
}
fn is_ident_continue(b: u8) -> bool {
b.is_ascii_alphanumeric() || b == b'_' || b == b'$'
}
#[cfg(test)]
mod tests {
use super::*;
fn scan(source: &str) -> TokenizerRecovery<'_> {
ScriptTokenScanner::new(source, 0).recover()
}
fn scan_offset(source: &str, offset: u32) -> TokenizerRecovery<'_> {
ScriptTokenScanner::new(source, offset).recover()
}
#[test]
fn finds_define_props() {
let r = scan("defineProps<{ count: number }>()");
assert_eq!(r.macros.len(), 1);
assert_eq!(r.macros[0].kind, RecoveredMacroKind::DefineProps);
assert!(r.macros[0].binding_name.is_none());
}
#[test]
fn finds_define_props_with_binding() {
let r = scan("const props = defineProps<{ count: number }>()");
assert_eq!(r.macros.len(), 1);
assert_eq!(r.macros[0].kind, RecoveredMacroKind::DefineProps);
assert_eq!(r.macros[0].binding_name, Some("props"));
}
#[test]
fn finds_define_emits_with_binding() {
let r = scan("const emit = defineEmits<{ click: [e: MouseEvent] }>()");
assert_eq!(r.macros.len(), 1);
assert_eq!(r.macros[0].kind, RecoveredMacroKind::DefineEmits);
assert_eq!(r.macros[0].binding_name, Some("emit"));
}
#[test]
fn finds_with_defaults() {
let r = scan("const props = withDefaults(defineProps<Props>(), { count: 0 })");
assert!(r
.macros
.iter()
.any(|m| m.kind == RecoveredMacroKind::WithDefaults));
assert_eq!(r.macros[0].binding_name, Some("props"));
}
#[test]
fn finds_define_model() {
let r = scan("const modelValue = defineModel<string>()");
assert_eq!(r.macros.len(), 1);
assert_eq!(r.macros[0].kind, RecoveredMacroKind::DefineModel);
assert_eq!(r.macros[0].binding_name, Some("modelValue"));
}
#[test]
fn finds_define_expose() {
let r = scan("defineExpose({ foo: 1 })");
assert_eq!(r.macros.len(), 1);
assert_eq!(r.macros[0].kind, RecoveredMacroKind::DefineExpose);
}
#[test]
fn finds_define_options() {
let r = scan("defineOptions({ name: 'Foo' })");
assert_eq!(r.macros.len(), 1);
assert_eq!(r.macros[0].kind, RecoveredMacroKind::DefineOptions);
}
#[test]
fn finds_define_slots() {
let r = scan("const slots = defineSlots<{ default(): any }>()");
assert_eq!(r.macros.len(), 1);
assert_eq!(r.macros[0].kind, RecoveredMacroKind::DefineSlots);
assert_eq!(r.macros[0].binding_name, Some("slots"));
}
#[test]
fn finds_multiple_macros() {
let r = scan(
"const props = defineProps<{ x: number }>()\nconst emit = defineEmits<{ click: [] }>()",
);
assert_eq!(r.macros.len(), 2);
}
#[test]
fn ignores_macro_in_line_comment() {
let r = scan("// defineProps<{ count: number }>()");
assert!(r.macros.is_empty(), "should not find macro in line comment");
}
#[test]
fn ignores_macro_in_block_comment() {
let r = scan("/* defineProps<{ count: number }>() */");
assert!(
r.macros.is_empty(),
"should not find macro in block comment"
);
}
#[test]
fn ignores_macro_in_string() {
let r = scan(r#""defineProps<{ count: number }>()""#);
assert!(r.macros.is_empty(), "should not find macro in string");
}
#[test]
fn ignores_macro_in_single_quote_string() {
let r = scan("'defineProps()'");
assert!(
r.macros.is_empty(),
"should not find macro in single-quote string"
);
}
#[test]
fn ignores_macro_in_template_literal() {
let r = scan("`defineProps()`");
assert!(
r.macros.is_empty(),
"should not find macro in template literal"
);
}
#[test]
fn finds_macro_after_block_comment() {
let r = scan("/* comment */\ndefineProps()");
assert_eq!(r.macros.len(), 1);
assert_eq!(r.macros[0].kind, RecoveredMacroKind::DefineProps);
}
#[test]
fn handles_template_literal_with_interpolation() {
let r = scan("`${defineProps()}` \n defineEmits()");
assert!(
!r.macros.is_empty(),
"should find at least defineEmits after template literal"
);
assert!(
r.macros
.iter()
.any(|m| m.kind == RecoveredMacroKind::DefineEmits),
"should find defineEmits"
);
}
#[test]
fn handles_nested_brackets_in_type_params() {
let r = scan("defineProps<{ items: Array<{ name: string }> }>()");
assert_eq!(r.macros.len(), 1);
assert_eq!(r.macros[0].kind, RecoveredMacroKind::DefineProps);
}
#[test]
fn handles_nested_parens_in_call() {
let r = scan("defineProps(foo(bar()))");
assert_eq!(r.macros.len(), 1);
}
#[test]
fn handles_strings_inside_brackets() {
let r = scan(r#"defineProps<{ foo: "bar<baz>" }>()"#);
assert_eq!(r.macros.len(), 1);
}
#[test]
fn backward_scan_const() {
let r = scan("const props = defineProps()");
assert_eq!(r.macros[0].binding_name, Some("props"));
}
#[test]
fn backward_scan_let() {
let r = scan("let props = defineProps()");
assert_eq!(r.macros[0].binding_name, Some("props"));
}
#[test]
fn backward_scan_var() {
let r = scan("var props = defineProps()");
assert_eq!(r.macros[0].binding_name, Some("props"));
}
#[test]
fn no_binding_without_keyword() {
let r = scan("props = defineProps()");
assert!(
r.macros[0].binding_name.is_none(),
"should not find binding without const/let/var"
);
}
#[test]
fn backward_scan_with_extra_whitespace() {
let r = scan("const props = defineProps()");
assert_eq!(r.macros[0].binding_name, Some("props"));
}
#[test]
fn finds_function_declaration() {
let r = scan("function handleClick(event) {}");
assert_eq!(r.functions.len(), 1);
assert_eq!(r.functions[0].name, "handleClick");
}
#[test]
fn finds_function_with_multiple_params() {
let r = scan("function handleDrag(startEvent, endEvent) {}");
assert_eq!(r.functions.len(), 1);
assert_eq!(r.functions[0].name, "handleDrag");
}
#[test]
fn finds_function_with_type_params() {
let r = scan("function foo<T>(x: T) {}");
assert_eq!(r.functions.len(), 1);
assert_eq!(r.functions[0].name, "foo");
}
#[test]
fn finds_multiple_functions() {
let r = scan("function foo() {}\nfunction bar() {}");
assert_eq!(r.functions.len(), 2);
assert_eq!(r.functions[0].name, "foo");
assert_eq!(r.functions[1].name, "bar");
}
#[test]
fn ignores_function_in_comment() {
let r = scan("// function foo() {}");
assert!(r.functions.is_empty());
}
#[test]
fn macro_at_start_of_file() {
let r = scan("defineProps()");
assert_eq!(r.macros.len(), 1);
}
#[test]
fn macro_at_end_of_file_no_trailing_newline() {
let r = scan("const x = defineProps()");
assert_eq!(r.macros.len(), 1);
assert_eq!(r.macros[0].binding_name, Some("x"));
}
#[test]
fn adjacent_macros() {
let r = scan("defineProps()\ndefineEmits()");
assert_eq!(r.macros.len(), 2);
}
#[test]
fn macro_after_comment() {
let r = scan("// This sets up props\nconst props = defineProps()");
assert_eq!(r.macros.len(), 1);
assert_eq!(r.macros[0].binding_name, Some("props"));
}
#[test]
fn empty_source() {
let r = scan("");
assert!(r.macros.is_empty());
assert!(r.functions.is_empty());
}
#[test]
fn only_whitespace() {
let r = scan(" \n\n ");
assert!(r.macros.is_empty());
assert!(r.functions.is_empty());
}
#[test]
fn unclosed_string_doesnt_panic() {
let r = scan(r#"const x = "unclosed"#);
let _ = r;
}
#[test]
fn unclosed_template_literal_doesnt_panic() {
let r = scan("const x = `unclosed");
let _ = r;
}
#[test]
fn unclosed_block_comment_doesnt_panic() {
let r = scan("/* unclosed block comment\ndefineProps()");
assert!(r.macros.is_empty());
}
#[test]
fn spans_include_content_start_offset() {
let r = scan_offset("defineProps()", 100);
assert_eq!(r.macros.len(), 1);
assert_eq!(r.macros[0].call_span.start, 100);
assert_eq!(r.macros[0].call_span.end, 113); }
#[test]
fn binding_span_includes_offset() {
let r = scan_offset("const props = defineProps()", 50);
assert_eq!(r.macros[0].binding_span.unwrap().start, 56); assert_eq!(r.macros[0].binding_span.unwrap().end, 61); }
#[test]
fn function_spans_include_offset() {
let r = scan_offset("function foo() {}", 200);
assert_eq!(r.functions[0].name_span.start, 209); assert_eq!(r.functions[0].name_span.end, 212); assert_eq!(r.functions[0].params_span.start, 212); assert_eq!(r.functions[0].params_span.end, 214); }
#[test]
fn macros_and_functions_together() {
let r = scan("const props = defineProps<{ x: number }>()\nfunction handleClick(event) {}");
assert_eq!(r.macros.len(), 1);
assert_eq!(r.functions.len(), 1);
assert_eq!(r.macros[0].binding_name, Some("props"));
assert_eq!(r.functions[0].name, "handleClick");
}
#[test]
fn broken_macro_no_parens() {
let r = scan("defineProps<");
assert!(
r.macros.is_empty(),
"incomplete macro should not be recovered"
);
}
#[test]
fn broken_macro_unclosed_generic() {
let r = scan("defineProps<{ count.");
assert!(
r.macros.is_empty(),
"unclosed generic should not produce a macro"
);
}
#[test]
fn partial_define_props_but_later_valid_macro() {
let r = scan("defineProps<{\nconst emit = defineEmits()");
assert!(r
.macros
.iter()
.any(|m| m.kind == RecoveredMacroKind::DefineEmits));
}
#[test]
fn keyword_const_not_partial_match() {
let r = scan("constant = defineProps()");
assert!(
r.macros[0].binding_name.is_none(),
"constant should not match const"
);
}
#[test]
fn finds_const_variable() {
let r = scan("const count = ref(0)");
assert_eq!(r.variables.len(), 1);
assert_eq!(r.variables[0].name, "count");
assert_eq!(r.variables[0].kind, RecoveredVarKind::Const);
}
#[test]
fn finds_let_variable() {
let r = scan("let x = 1");
assert_eq!(r.variables.len(), 1);
assert_eq!(r.variables[0].name, "x");
assert_eq!(r.variables[0].kind, RecoveredVarKind::Let);
}
#[test]
fn finds_var_variable() {
let r = scan("var y = 2");
assert_eq!(r.variables.len(), 1);
assert_eq!(r.variables[0].name, "y");
assert_eq!(r.variables[0].kind, RecoveredVarKind::Var);
}
#[test]
fn finds_multiple_variables() {
let r = scan("const a = 1\nlet b = 2\nvar c = 3");
assert_eq!(r.variables.len(), 3);
assert_eq!(r.variables[0].name, "a");
assert_eq!(r.variables[1].name, "b");
assert_eq!(r.variables[2].name, "c");
}
#[test]
fn variable_span_includes_offset() {
let r = scan_offset("const count = 1", 100);
assert_eq!(r.variables[0].name_span.start, 106); assert_eq!(r.variables[0].name_span.end, 111); }
#[test]
fn const_in_comment_not_variable() {
let r = scan("// const x = 1\nconst y = 2");
assert_eq!(r.variables.len(), 1);
assert_eq!(r.variables[0].name, "y");
}
#[test]
fn const_in_string_not_variable() {
let r = scan(r#""const x = 1""#);
assert!(r.variables.is_empty());
}
#[test]
fn variables_with_macros_and_functions() {
let r = scan("const count = ref(0)\nconst props = defineProps()\nfunction handle() {}");
assert_eq!(r.variables.len(), 2); assert_eq!(r.macros.len(), 1); assert_eq!(r.functions.len(), 1); }
#[test]
fn constant_keyword_not_variable() {
let r = scan("constant = 1");
assert!(r.variables.is_empty());
}
#[test]
fn letter_keyword_not_variable() {
let r = scan("letter = 1");
assert!(r.variables.is_empty());
}
}