car-ast 0.15.2

Tree-sitter AST parsing for code-aware inference
Documentation
use super::node_text;
use crate::types::*;

pub fn extract(tree: &tree_sitter::Tree, source: &[u8]) -> (Vec<Symbol>, Vec<Import>) {
    let root = tree.root_node();
    let mut symbols = Vec::new();
    let mut imports = Vec::new();

    visit_node(&root, source, &mut symbols, &mut imports);

    (symbols, imports)
}

fn visit_node(
    node: &tree_sitter::Node,
    source: &[u8],
    symbols: &mut Vec<Symbol>,
    imports: &mut Vec<Import>,
) {
    let mut cursor = node.walk();
    for child in node.children(&mut cursor) {
        match child.kind() {
            "element" => {
                handle_element(&child, source, symbols, imports);
            }
            "script_element" => {
                handle_script_element(&child, source, symbols, imports);
            }
            "style_element" => {
                symbols.push(Symbol {
                    name: "style".to_string(),
                    kind: SymbolKind::Module,
                    span: Span::from_node(&child),
                    signature: String::new(),
                    doc_comment: None,
                    parent: None,
                    children: Vec::new(),
                });
            }
            _ => {
                visit_node(&child, source, symbols, imports);
            }
        }
    }
}

fn handle_script_element(
    node: &tree_sitter::Node,
    source: &[u8],
    symbols: &mut Vec<Symbol>,
    imports: &mut Vec<Import>,
) {
    if let Some(tag) = find_child_by_kind(node, "start_tag") {
        if let Some(src) = find_attribute_value(&tag, "src", source) {
            imports.push(Import {
                path: src,
                alias: None,
                span: Span::from_node(node),
            });
            return;
        }
    }
    // Inline script -> Module
    symbols.push(Symbol {
        name: "script".to_string(),
        kind: SymbolKind::Module,
        span: Span::from_node(node),
        signature: String::new(),
        doc_comment: None,
        parent: None,
        children: Vec::new(),
    });
}

fn handle_element(
    node: &tree_sitter::Node,
    source: &[u8],
    symbols: &mut Vec<Symbol>,
    imports: &mut Vec<Import>,
) {
    let tag = find_child_by_kind(node, "start_tag");

    let Some(tag) = tag else {
        visit_node(node, source, symbols, imports);
        return;
    };

    let tag_name =
        find_child_by_kind(&tag, "tag_name").map(|n| node_text(&n, source).to_lowercase());

    let tag_name_str = tag_name.as_deref().unwrap_or("");

    match tag_name_str {
        "link" => {
            if let Some(href) = find_attribute_value(&tag, "href", source) {
                imports.push(Import {
                    path: href,
                    alias: None,
                    span: Span::from_node(node),
                });
            }
        }
        "template" => {
            symbols.push(Symbol {
                name: "template".to_string(),
                kind: SymbolKind::Module,
                span: Span::from_node(node),
                signature: String::new(),
                doc_comment: None,
                parent: None,
                children: Vec::new(),
            });
        }
        _ => {}
    }

    // Elements with id attribute -> Const landmarks
    if let Some(id_val) = find_attribute_value(&tag, "id", source) {
        symbols.push(Symbol {
            name: id_val,
            kind: SymbolKind::Const,
            span: Span::from_node(node),
            signature: format!("<{}>", tag_name_str),
            doc_comment: None,
            parent: None,
            children: Vec::new(),
        });
    }

    // Recurse into children for nested elements
    visit_node(node, source, symbols, imports);
}

fn find_child_by_kind<'a>(
    node: &'a tree_sitter::Node<'a>,
    kind: &str,
) -> Option<tree_sitter::Node<'a>> {
    let mut cursor = node.walk();
    for child in node.children(&mut cursor) {
        if child.kind() == kind {
            return Some(child);
        }
    }
    None
}

fn find_attribute_value(tag: &tree_sitter::Node, attr_name: &str, source: &[u8]) -> Option<String> {
    let mut cursor = tag.walk();
    for child in tag.children(&mut cursor) {
        if child.kind() == "attribute" {
            let name_node = match find_child_by_kind(&child, "attribute_name") {
                Some(n) => n,
                None => continue,
            };
            let name = node_text(&name_node, source);
            if name == attr_name {
                if let Some(val_node) = find_child_by_kind(&child, "quoted_attribute_value") {
                    let raw = node_text(&val_node, source);
                    let trimmed = raw.trim_matches('"').trim_matches('\'');
                    return Some(trimmed.to_string());
                }
            }
        }
    }
    None
}

#[cfg(test)]
mod tests {
    use super::*;

    fn parse_html(source: &str) -> (Vec<Symbol>, Vec<Import>) {
        let mut parser = tree_sitter::Parser::new();
        parser
            .set_language(&tree_sitter_html::LANGUAGE.into())
            .unwrap();
        let tree = parser.parse(source, None).unwrap();
        extract(&tree, source.as_bytes())
    }

    #[test]
    fn test_html_extraction() {
        let source = r#"<!DOCTYPE html>
<html>
<head>
    <link href="styles.css" rel="stylesheet">
    <script src="app.js"></script>
    <style>body { color: red; }</style>
</head>
<body>
    <div id="main-content">
        <h1 id="title">Hello</h1>
        <template>
            <p>Template content</p>
        </template>
        <script>console.log("inline");</script>
    </div>
</body>
</html>"#;

        let (symbols, imports) = parse_html(source);

        // Imports: link href + script src
        assert_eq!(imports.len(), 2);
        assert!(imports.iter().any(|i| i.path == "styles.css"));
        assert!(imports.iter().any(|i| i.path == "app.js"));

        // Modules: <style>, <template>, inline <script>
        let modules: Vec<_> = symbols
            .iter()
            .filter(|s| s.kind == SymbolKind::Module)
            .collect();
        assert_eq!(modules.len(), 3);
        assert!(modules.iter().any(|s| s.name == "style"));
        assert!(modules.iter().any(|s| s.name == "template"));
        assert!(modules.iter().any(|s| s.name == "script"));

        // Const landmarks: id="main-content", id="title"
        let consts: Vec<_> = symbols
            .iter()
            .filter(|s| s.kind == SymbolKind::Const)
            .collect();
        assert_eq!(consts.len(), 2);
        assert!(consts.iter().any(|s| s.name == "main-content"));
        assert!(consts.iter().any(|s| s.name == "title"));
    }
}