use crate::types::*;
use super::node_text;
pub fn extract(tree: &tree_sitter::Tree, source: &[u8]) -> (Vec<Symbol>, Vec<Import>) {
let root = tree.root_node();
let mut symbols = Vec::new();
let mut imports = Vec::new();
visit_node(&root, source, &mut symbols, &mut imports);
(symbols, imports)
}
fn visit_node(
node: &tree_sitter::Node,
source: &[u8],
symbols: &mut Vec<Symbol>,
imports: &mut Vec<Import>,
) {
let mut cursor = node.walk();
for child in node.children(&mut cursor) {
match child.kind() {
"element" => {
handle_element(&child, source, symbols, imports);
}
"script_element" => {
handle_script_element(&child, source, symbols, imports);
}
"style_element" => {
symbols.push(Symbol {
name: "style".to_string(),
kind: SymbolKind::Module,
span: Span::from_node(&child),
signature: String::new(),
doc_comment: None,
parent: None,
children: Vec::new(),
});
}
_ => {
visit_node(&child, source, symbols, imports);
}
}
}
}
fn handle_script_element(
node: &tree_sitter::Node,
source: &[u8],
symbols: &mut Vec<Symbol>,
imports: &mut Vec<Import>,
) {
if let Some(tag) = find_child_by_kind(node, "start_tag") {
if let Some(src) = find_attribute_value(&tag, "src", source) {
imports.push(Import {
path: src,
alias: None,
span: Span::from_node(node),
});
return;
}
}
symbols.push(Symbol {
name: "script".to_string(),
kind: SymbolKind::Module,
span: Span::from_node(node),
signature: String::new(),
doc_comment: None,
parent: None,
children: Vec::new(),
});
}
fn handle_element(
node: &tree_sitter::Node,
source: &[u8],
symbols: &mut Vec<Symbol>,
imports: &mut Vec<Import>,
) {
let tag = find_child_by_kind(node, "start_tag");
let Some(tag) = tag else {
visit_node(node, source, symbols, imports);
return;
};
let tag_name = find_child_by_kind(&tag, "tag_name")
.map(|n| node_text(&n, source).to_lowercase());
let tag_name_str = tag_name.as_deref().unwrap_or("");
match tag_name_str {
"link" => {
if let Some(href) = find_attribute_value(&tag, "href", source) {
imports.push(Import {
path: href,
alias: None,
span: Span::from_node(node),
});
}
}
"template" => {
symbols.push(Symbol {
name: "template".to_string(),
kind: SymbolKind::Module,
span: Span::from_node(node),
signature: String::new(),
doc_comment: None,
parent: None,
children: Vec::new(),
});
}
_ => {}
}
if let Some(id_val) = find_attribute_value(&tag, "id", source) {
symbols.push(Symbol {
name: id_val,
kind: SymbolKind::Const,
span: Span::from_node(node),
signature: format!("<{}>", tag_name_str),
doc_comment: None,
parent: None,
children: Vec::new(),
});
}
visit_node(node, source, symbols, imports);
}
fn find_child_by_kind<'a>(node: &'a tree_sitter::Node<'a>, kind: &str) -> Option<tree_sitter::Node<'a>> {
let mut cursor = node.walk();
for child in node.children(&mut cursor) {
if child.kind() == kind {
return Some(child);
}
}
None
}
fn find_attribute_value(tag: &tree_sitter::Node, attr_name: &str, source: &[u8]) -> Option<String> {
let mut cursor = tag.walk();
for child in tag.children(&mut cursor) {
if child.kind() == "attribute" {
let name_node = match find_child_by_kind(&child, "attribute_name") {
Some(n) => n,
None => continue,
};
let name = node_text(&name_node, source);
if name == attr_name {
if let Some(val_node) = find_child_by_kind(&child, "quoted_attribute_value") {
let raw = node_text(&val_node, source);
let trimmed = raw.trim_matches('"').trim_matches('\'');
return Some(trimmed.to_string());
}
}
}
}
None
}
#[cfg(test)]
mod tests {
use super::*;
fn parse_html(source: &str) -> (Vec<Symbol>, Vec<Import>) {
let mut parser = tree_sitter::Parser::new();
parser.set_language(&tree_sitter_html::LANGUAGE.into()).unwrap();
let tree = parser.parse(source, None).unwrap();
extract(&tree, source.as_bytes())
}
#[test]
fn test_html_extraction() {
let source = r#"<!DOCTYPE html>
<html>
<head>
<link href="styles.css" rel="stylesheet">
<script src="app.js"></script>
<style>body { color: red; }</style>
</head>
<body>
<div id="main-content">
<h1 id="title">Hello</h1>
<template>
<p>Template content</p>
</template>
<script>console.log("inline");</script>
</div>
</body>
</html>"#;
let (symbols, imports) = parse_html(source);
assert_eq!(imports.len(), 2);
assert!(imports.iter().any(|i| i.path == "styles.css"));
assert!(imports.iter().any(|i| i.path == "app.js"));
let modules: Vec<_> = symbols.iter().filter(|s| s.kind == SymbolKind::Module).collect();
assert_eq!(modules.len(), 3);
assert!(modules.iter().any(|s| s.name == "style"));
assert!(modules.iter().any(|s| s.name == "template"));
assert!(modules.iter().any(|s| s.name == "script"));
let consts: Vec<_> = symbols.iter().filter(|s| s.kind == SymbolKind::Const).collect();
assert_eq!(consts.len(), 2);
assert!(consts.iter().any(|s| s.name == "main-content"));
assert!(consts.iter().any(|s| s.name == "title"));
}
}