car-ast 0.12.0

Tree-sitter AST parsing for code-aware inference
Documentation
use super::{extract_doc_comment, extract_signature, field_text, node_text};
use crate::types::*;

pub fn extract(tree: &tree_sitter::Tree, source: &[u8]) -> (Vec<Symbol>, Vec<Import>) {
    let root = tree.root_node();
    let mut symbols = Vec::new();
    let mut imports = Vec::new();
    let mut cursor = root.walk();

    for child in root.children(&mut cursor) {
        match child.kind() {
            "import_declaration" => {
                let text = node_text(&child, source).to_string();
                // Strip "import " prefix and trailing ";"
                let path = text
                    .strip_prefix("import ")
                    .unwrap_or(&text)
                    .trim_end_matches(';')
                    .trim()
                    .to_string();
                imports.push(Import {
                    path,
                    alias: None,
                    span: Span::from_node(&child),
                });
            }
            "package_declaration" => {
                let text = node_text(&child, source);
                let path = text
                    .strip_prefix("package ")
                    .unwrap_or(text)
                    .trim_end_matches(';')
                    .trim();
                symbols.push(Symbol {
                    name: path.to_string(),
                    kind: SymbolKind::Module,
                    span: Span::from_node(&child),
                    signature: text.to_string(),
                    doc_comment: None,
                    parent: None,
                    children: Vec::new(),
                });
            }
            "class_declaration" | "record_declaration" => {
                if let Some(sym) = extract_class(&child, source) {
                    symbols.push(sym);
                }
            }
            "interface_declaration" => {
                if let Some(sym) = extract_interface(&child, source) {
                    symbols.push(sym);
                }
            }
            "enum_declaration" => {
                if let Some(sym) = extract_enum(&child, source) {
                    symbols.push(sym);
                }
            }
            "annotation_type_declaration" => {
                if let Some(sym) = extract_annotation_type(&child, source) {
                    symbols.push(sym);
                }
            }
            _ => {}
        }
    }

    (symbols, imports)
}

fn extract_class(node: &tree_sitter::Node, source: &[u8]) -> Option<Symbol> {
    let name = field_text(node, "name", source)?;
    let mut children = Vec::new();
    extract_class_body_members(node, source, name, &mut children);

    Some(Symbol {
        name: name.to_string(),
        kind: SymbolKind::Class,
        span: Span::from_node(node),
        signature: extract_signature(node, "body", source),
        doc_comment: extract_doc_comment(node, source),
        parent: None,
        children,
    })
}

fn extract_interface(node: &tree_sitter::Node, source: &[u8]) -> Option<Symbol> {
    let name = field_text(node, "name", source)?;
    let mut children = Vec::new();
    extract_class_body_members(node, source, name, &mut children);

    Some(Symbol {
        name: name.to_string(),
        kind: SymbolKind::Interface,
        span: Span::from_node(node),
        signature: extract_signature(node, "body", source),
        doc_comment: extract_doc_comment(node, source),
        parent: None,
        children,
    })
}

fn extract_enum(node: &tree_sitter::Node, source: &[u8]) -> Option<Symbol> {
    let name = field_text(node, "name", source)?;
    Some(Symbol {
        name: name.to_string(),
        kind: SymbolKind::Enum,
        span: Span::from_node(node),
        signature: extract_signature(node, "body", source),
        doc_comment: extract_doc_comment(node, source),
        parent: None,
        children: Vec::new(),
    })
}

fn extract_annotation_type(node: &tree_sitter::Node, source: &[u8]) -> Option<Symbol> {
    let name = field_text(node, "name", source)?;
    Some(Symbol {
        name: name.to_string(),
        kind: SymbolKind::Interface,
        span: Span::from_node(node),
        signature: extract_signature(node, "body", source),
        doc_comment: extract_doc_comment(node, source),
        parent: None,
        children: Vec::new(),
    })
}

fn extract_class_body_members(
    node: &tree_sitter::Node,
    source: &[u8],
    class_name: &str,
    children: &mut Vec<Symbol>,
) {
    let body = match node.child_by_field_name("body") {
        Some(b) => b,
        None => return,
    };
    let mut cursor = body.walk();
    for member in body.children(&mut cursor) {
        match member.kind() {
            "method_declaration" => {
                if let Some(name) = field_text(&member, "name", source) {
                    children.push(Symbol {
                        name: name.to_string(),
                        kind: SymbolKind::Method,
                        span: Span::from_node(&member),
                        signature: extract_signature(&member, "body", source),
                        doc_comment: extract_doc_comment(&member, source),
                        parent: Some(class_name.to_string()),
                        children: Vec::new(),
                    });
                }
            }
            "constructor_declaration" => {
                if let Some(name) = field_text(&member, "name", source) {
                    children.push(Symbol {
                        name: name.to_string(),
                        kind: SymbolKind::Method,
                        span: Span::from_node(&member),
                        signature: extract_signature(&member, "body", source),
                        doc_comment: extract_doc_comment(&member, source),
                        parent: Some(class_name.to_string()),
                        children: Vec::new(),
                    });
                }
            }
            "field_declaration" => {
                if let Some(declarator) = member.child_by_field_name("declarator") {
                    if let Some(name) = field_text(&declarator, "name", source) {
                        children.push(Symbol {
                            name: name.to_string(),
                            kind: SymbolKind::Const,
                            span: Span::from_node(&member),
                            signature: node_text(&member, source).to_string(),
                            doc_comment: extract_doc_comment(&member, source),
                            parent: Some(class_name.to_string()),
                            children: Vec::new(),
                        });
                    }
                }
            }
            _ => {}
        }
    }
}

#[cfg(test)]
#[cfg(feature = "java")]
mod tests {
    use super::*;

    fn parse_java(source: &str) -> (Vec<Symbol>, Vec<Import>) {
        let mut parser = tree_sitter::Parser::new();
        parser
            .set_language(&tree_sitter_java::LANGUAGE.into())
            .expect("failed to set java language");
        let tree = parser.parse(source, None).expect("failed to parse");
        extract(&tree, source.as_bytes())
    }

    #[test]
    fn test_java_extraction() {
        let src = r#"
package com.example;

import java.util.List;
import java.io.IOException;

/** A sample class. */
public class Greeter {
    private String name;

    public Greeter(String name) {
        this.name = name;
    }

    public String greet() {
        return "Hello, " + name;
    }
}

public interface Printable {
    void print();
}

public enum Color {
    RED, GREEN, BLUE
}
"#;
        let (symbols, imports) = parse_java(src);

        // Imports
        assert_eq!(imports.len(), 2);
        assert_eq!(imports[0].path, "java.util.List");
        assert_eq!(imports[1].path, "java.io.IOException");

        // Package
        let pkg: Vec<_> = symbols
            .iter()
            .filter(|s| s.kind == SymbolKind::Module)
            .collect();
        assert_eq!(pkg.len(), 1);
        assert_eq!(pkg[0].name, "com.example");

        // Class
        let classes: Vec<_> = symbols
            .iter()
            .filter(|s| s.kind == SymbolKind::Class)
            .collect();
        assert_eq!(classes.len(), 1);
        assert_eq!(classes[0].name, "Greeter");
        // Methods + constructor + field inside class
        let methods: Vec<_> = classes[0]
            .children
            .iter()
            .filter(|s| s.kind == SymbolKind::Method)
            .collect();
        assert_eq!(methods.len(), 2); // constructor + greet
        let fields: Vec<_> = classes[0]
            .children
            .iter()
            .filter(|s| s.kind == SymbolKind::Const)
            .collect();
        assert_eq!(fields.len(), 1);
        assert_eq!(fields[0].name, "name");
        // Parent set correctly
        assert_eq!(methods[0].parent.as_deref(), Some("Greeter"));

        // Interface
        let ifaces: Vec<_> = symbols
            .iter()
            .filter(|s| s.kind == SymbolKind::Interface)
            .collect();
        assert_eq!(ifaces.len(), 1);
        assert_eq!(ifaces[0].name, "Printable");

        // Enum
        let enums: Vec<_> = symbols
            .iter()
            .filter(|s| s.kind == SymbolKind::Enum)
            .collect();
        assert_eq!(enums.len(), 1);
        assert_eq!(enums[0].name, "Color");
    }
}