use tree_sitter::{Node, Parser};
use crate::error::{CodegraphError, Result};
use crate::graph::types::{ByteSpan, FileFacts, ScopeId, ScopeKind, SymbolKind};
use crate::lang::Language;
use super::Extractor;
use super::module_symbol;
use super::push_scope;
use super::shift_offsets;
use super::typescript::{extract_ecmascript, module_namespaces};
pub struct SvelteExtractor;
impl Extractor for SvelteExtractor {
fn lang(&self) -> Language {
Language::Svelte
}
fn extract(&self, source: &str, file: &str) -> Result<FileFacts> {
let ts_lang = crate::grammar::svelte();
let mut parser = Parser::new();
parser
.set_language(&ts_lang)
.map_err(|_| CodegraphError::Parse {
path: file.to_owned(),
})?;
let tree = parser
.parse(source.as_bytes(), None)
.ok_or_else(|| CodegraphError::Parse {
path: file.to_owned(),
})?;
let root = tree.root_node();
let bytes = source.as_bytes();
let mut script_nodes = Vec::new();
collect_script_elements(&root, &mut script_nodes);
let mut merged = FileFacts {
file: file.to_owned(),
lang: "svelte".to_owned(),
symbols: Vec::new(),
references: Vec::new(),
scopes: Vec::new(),
bindings: Vec::new(),
ffi_exports: Vec::new(),
};
let doc_root: ScopeId = push_scope(
&mut merged.scopes,
None,
ByteSpan {
start: 0,
end: source.len(),
},
ScopeKind::Module,
);
for script_el in script_nodes {
let raw_text = match find_raw_text(&script_el) {
Some(n) => n,
None => continue,
};
let delta = raw_text.start_byte();
let inner_source =
std::str::from_utf8(&bytes[raw_text.byte_range()]).unwrap_or_default();
let inner_lang = detect_script_lang(&script_el, bytes);
let mut block_facts = extract_ecmascript(inner_source, file, inner_lang)?;
shift_offsets(&mut block_facts, delta, file, "svelte", bytes);
let scope_base: ScopeId = merged.scopes.len();
for b in &mut block_facts.bindings {
b.scope += scope_base;
}
for r in &mut block_facts.references {
if let Some(s) = r.scope.as_mut() {
*s += scope_base;
}
}
for sc in &mut block_facts.scopes {
if let Some(p) = sc.parent.as_mut() {
*p += scope_base;
}
}
if let Some(first) = block_facts.scopes.first_mut() {
first.parent = Some(doc_root);
}
merged.symbols.extend(
block_facts
.symbols
.into_iter()
.filter(|s| s.kind != SymbolKind::Module),
);
merged.references.extend(block_facts.references);
merged.scopes.extend(block_facts.scopes);
merged.bindings.extend(block_facts.bindings);
}
let namespaces = module_namespaces(file);
merged.symbols.push(module_symbol(
Language::Svelte,
&namespaces,
file,
source.len(),
));
Ok(merged)
}
}
fn collect_script_elements<'a>(node: &Node<'a>, out: &mut Vec<Node<'a>>) {
if node.kind() == "script_element" {
out.push(*node);
return; }
let mut cursor = node.walk();
for child in node.children(&mut cursor) {
collect_script_elements(&child, out);
}
}
fn find_raw_text<'a>(script_el: &Node<'a>) -> Option<Node<'a>> {
let mut cursor = script_el.walk();
script_el
.children(&mut cursor)
.find(|n| n.kind() == "raw_text")
}
fn detect_script_lang(script_el: &Node<'_>, bytes: &[u8]) -> Language {
let mut cursor = script_el.walk();
for child in script_el.children(&mut cursor) {
if child.kind() == "start_tag" {
let mut tag_cursor = child.walk();
for attr in child.children(&mut tag_cursor) {
if attr.kind() != "attribute" {
continue;
}
let name_matches = {
let mut c = attr.walk();
attr.children(&mut c)
.any(|n| n.kind() == "attribute_name" && &bytes[n.byte_range()] == b"lang")
};
if !name_matches {
continue;
}
let mut attr_cursor = attr.walk();
for child2 in attr.children(&mut attr_cursor) {
if child2.kind() == "quoted_attribute_value" {
let mut qav_cursor = child2.walk();
for av in child2.children(&mut qav_cursor) {
if av.kind() == "attribute_value" {
let val = &bytes[av.byte_range()];
if val == b"ts" || val == b"typescript" {
return Language::TypeScript;
}
}
}
}
}
}
}
}
Language::JavaScript
}
#[cfg(test)]
mod tests {
use super::*;
use crate::graph::types::SymbolKind;
fn svelte_source_with_ts_script() -> &'static str {
r#"<script lang="ts">
import { foo } from './util';
export function run(x: number) { foo(x); }
let count = 0;
</script>
<main>Hello</main>"#
}
#[test]
fn extracts_run_symbol_and_reference_lang_ts() {
let source = svelte_source_with_ts_script();
let facts = SvelteExtractor
.extract(source, "src/App.svelte")
.expect("extraction should succeed");
assert_eq!(facts.lang, "svelte");
assert_eq!(facts.file, "src/App.svelte");
let run_sym = facts
.symbols
.iter()
.find(|s| s.name == "run" && s.kind == SymbolKind::Function);
assert!(
run_sym.is_some(),
"expected `run` function symbol; got: {:?}",
facts.symbols
);
assert!(
!facts.references.is_empty(),
"expected at least one reference"
);
}
#[test]
fn offset_remap_is_correct() {
let source = svelte_source_with_ts_script();
let facts = SvelteExtractor
.extract(source, "src/App.svelte")
.expect("extraction should succeed");
let run_sym = facts
.symbols
.iter()
.find(|s| s.name == "run" && s.kind == SymbolKind::Function)
.expect("`run` symbol must be present");
let expected_start = source
.find("export function run")
.expect("`export function run` must appear in source");
assert_eq!(
run_sym.span.start, expected_start,
"span.start should be the byte offset of the `run` declaration in the full .svelte source"
);
assert!(
source[run_sym.span.start..run_sym.span.end].contains("run"),
"remapped span must slice the run declaration out of the .svelte source"
);
}
#[test]
fn extracts_js_script_no_lang_attr() {
let source = r#"<script>
export function greet(name) { return name; }
</script>
<p>Hi</p>"#;
let facts = SvelteExtractor
.extract(source, "src/Comp.svelte")
.expect("extraction should succeed");
assert_eq!(facts.lang, "svelte", "lang should always be 'svelte'");
let greet = facts.symbols.iter().find(|s| s.name == "greet");
assert!(
greet.is_some(),
"expected `greet` symbol; got: {:?}",
facts.symbols
);
}
#[test]
fn two_script_blocks_both_extracted_and_scope_indices_valid() {
let source = r#"<script context="module">
export function preload() {}
</script>
<script>
export function setup() {}
</script>
<div>content</div>"#;
let facts = SvelteExtractor
.extract(source, "src/Page.svelte")
.expect("extraction should succeed");
let has_preload = facts.symbols.iter().any(|s| s.name == "preload");
let has_setup = facts.symbols.iter().any(|s| s.name == "setup");
assert!(has_preload, "expected `preload` from module script block");
assert!(has_setup, "expected `setup` from instance script block");
let module_syms: Vec<_> = facts
.symbols
.iter()
.filter(|s| s.kind == SymbolKind::Module)
.collect();
assert_eq!(
module_syms.len(),
1,
"expected exactly one Module symbol, got {module_syms:?}"
);
assert_eq!(module_syms[0].span.start, 0, "module span must start at 0");
assert_eq!(
module_syms[0].span.end,
source.len(),
"module span must cover the whole document"
);
let root_scopes: Vec<_> = facts.scopes.iter().filter(|s| s.parent.is_none()).collect();
assert_eq!(
root_scopes.len(),
1,
"expected exactly one root scope, got {root_scopes:?}"
);
assert_eq!(root_scopes[0].span.start, 0, "root scope must start at 0");
assert_eq!(
root_scopes[0].span.end,
source.len(),
"root scope must cover the whole document"
);
let block_roots: Vec<_> = facts
.scopes
.iter()
.enumerate()
.filter(|(i, s)| *i != 0 && s.kind == ScopeKind::Module)
.collect();
assert!(
!block_roots.is_empty(),
"expected at least one re-parented block root scope"
);
for (i, sc) in &block_roots {
assert_eq!(
sc.parent,
Some(0),
"block root scope at index {i} must be re-parented under the doc root"
);
}
for b in &facts.bindings {
assert!(
b.scope < facts.scopes.len() || facts.scopes.is_empty(),
"binding scope {} out of range (scopes.len={})",
b.scope,
facts.scopes.len()
);
}
for r in &facts.references {
if let Some(s) = r.scope {
assert!(
s < facts.scopes.len(),
"reference scope {} out of range (scopes.len={})",
s,
facts.scopes.len()
);
}
}
}
#[test]
fn no_script_block_emits_single_module_symbol_and_root_scope() {
let source = r#"<main><p>Hello world</p></main>"#;
let facts = SvelteExtractor
.extract(source, "src/NoScript.svelte")
.expect("extraction should succeed even with no script");
assert_eq!(facts.lang, "svelte");
assert_eq!(facts.file, "src/NoScript.svelte");
assert_eq!(facts.symbols.len(), 1, "expected exactly one symbol");
assert_eq!(facts.symbols[0].kind, SymbolKind::Module);
assert_eq!(facts.symbols[0].span.start, 0);
assert_eq!(facts.symbols[0].span.end, source.len());
assert!(facts.references.is_empty(), "expected no references");
assert_eq!(facts.scopes.len(), 1, "expected exactly one (root) scope");
assert_eq!(facts.scopes[0].parent, None);
assert_eq!(facts.scopes[0].span.start, 0);
assert_eq!(facts.scopes[0].span.end, source.len());
}
#[test]
fn single_script_emits_one_module_symbol_spanning_document() {
let source = svelte_source_with_ts_script();
let facts = SvelteExtractor
.extract(source, "src/App.svelte")
.expect("extraction should succeed");
let module_syms: Vec<_> = facts
.symbols
.iter()
.filter(|s| s.kind == SymbolKind::Module)
.collect();
assert_eq!(
module_syms.len(),
1,
"single-script .svelte must yield exactly one Module symbol, got {module_syms:?}"
);
assert_eq!(module_syms[0].span.start, 0, "module span must start at 0");
assert_eq!(
module_syms[0].span.end,
source.len(),
"module span must cover the whole document"
);
assert_eq!(
module_syms[0].id.to_scip_string(),
"codegraph . . . src/App/"
);
}
}