car-ast 0.15.0

Tree-sitter AST parsing for code-aware inference
Documentation
use super::{extract_doc_comment, field_text, node_text};
use crate::types::*;

pub fn extract(tree: &tree_sitter::Tree, source: &[u8]) -> (Vec<Symbol>, Vec<Import>) {
    let root = tree.root_node();
    let mut symbols = Vec::new();
    let mut imports = Vec::new();

    extract_children(&root, source, &mut symbols, &mut imports, None);

    (symbols, imports)
}

fn extract_children(
    node: &tree_sitter::Node,
    source: &[u8],
    symbols: &mut Vec<Symbol>,
    imports: &mut Vec<Import>,
    parent_name: Option<&str>,
) {
    let mut cursor = node.walk();
    for child in node.children(&mut cursor) {
        match child.kind() {
            "method" => {
                if let Some(sym) = extract_method(&child, source, parent_name) {
                    symbols.push(sym);
                }
            }
            "singleton_method" => {
                if let Some(sym) = extract_singleton_method(&child, source, parent_name) {
                    symbols.push(sym);
                }
            }
            "class" => {
                if let Some(sym) = extract_class(&child, source, parent_name) {
                    symbols.push(sym);
                }
            }
            "module" => {
                if let Some(sym) = extract_module(&child, source, parent_name) {
                    symbols.push(sym);
                }
            }
            "constant_assignment" | "assignment" => {
                if let Some(sym) = extract_constant(&child, source, parent_name) {
                    symbols.push(sym);
                }
            }
            "call" => {
                if let Some(imp) = extract_require(&child, source) {
                    imports.push(imp);
                }
            }
            _ => {}
        }
    }
}

fn extract_method(
    node: &tree_sitter::Node,
    source: &[u8],
    parent_name: Option<&str>,
) -> Option<Symbol> {
    let name = field_text(node, "name", source)?;

    let kind = if parent_name.is_some() {
        SymbolKind::Method
    } else {
        SymbolKind::Function
    };

    // Signature: everything before the body
    let signature = if let Some(body) = node.child_by_field_name("body") {
        let sig = &source[node.start_byte()..body.start_byte()];
        std::str::from_utf8(sig).unwrap_or("").trim().to_string()
    } else {
        node_text(node, source).to_string()
    };

    Some(Symbol {
        name: name.to_string(),
        kind,
        span: Span::from_node(node),
        signature,
        doc_comment: extract_doc_comment(node, source),
        parent: parent_name.map(|s| s.to_string()),
        children: Vec::new(),
    })
}

fn extract_singleton_method(
    node: &tree_sitter::Node,
    source: &[u8],
    parent_name: Option<&str>,
) -> Option<Symbol> {
    let name = field_text(node, "name", source)?;

    let signature = if let Some(body) = node.child_by_field_name("body") {
        let sig = &source[node.start_byte()..body.start_byte()];
        std::str::from_utf8(sig).unwrap_or("").trim().to_string()
    } else {
        node_text(node, source).to_string()
    };

    Some(Symbol {
        name: name.to_string(),
        kind: SymbolKind::Method,
        span: Span::from_node(node),
        signature,
        doc_comment: extract_doc_comment(node, source),
        parent: parent_name.map(|s| s.to_string()),
        children: Vec::new(),
    })
}

fn extract_class(
    node: &tree_sitter::Node,
    source: &[u8],
    parent_name: Option<&str>,
) -> Option<Symbol> {
    let name = field_text(node, "name", source)?;

    // Signature: "class Foo < Bar" up to the body
    let signature = if let Some(body) = node.child_by_field_name("body") {
        let sig = &source[node.start_byte()..body.start_byte()];
        std::str::from_utf8(sig).unwrap_or("").trim().to_string()
    } else {
        node_text(node, source)
            .lines()
            .next()
            .unwrap_or("")
            .trim()
            .to_string()
    };

    let mut children = Vec::new();
    let mut child_imports = Vec::new();
    if let Some(body) = node.child_by_field_name("body") {
        extract_children(&body, source, &mut children, &mut child_imports, Some(name));
    }

    Some(Symbol {
        name: name.to_string(),
        kind: SymbolKind::Class,
        span: Span::from_node(node),
        signature,
        doc_comment: extract_doc_comment(node, source),
        parent: parent_name.map(|s| s.to_string()),
        children,
    })
}

fn extract_module(
    node: &tree_sitter::Node,
    source: &[u8],
    parent_name: Option<&str>,
) -> Option<Symbol> {
    let name = field_text(node, "name", source)?;

    let signature = format!("module {}", name);

    let mut children = Vec::new();
    let mut child_imports = Vec::new();
    if let Some(body) = node.child_by_field_name("body") {
        extract_children(&body, source, &mut children, &mut child_imports, Some(name));
    }

    Some(Symbol {
        name: name.to_string(),
        kind: SymbolKind::Module,
        span: Span::from_node(node),
        signature,
        doc_comment: extract_doc_comment(node, source),
        parent: parent_name.map(|s| s.to_string()),
        children,
    })
}

fn extract_constant(
    node: &tree_sitter::Node,
    source: &[u8],
    parent_name: Option<&str>,
) -> Option<Symbol> {
    // constant_assignment: left is a constant (e.g. MAX_SIZE), right is the value
    // We only want SCREAMING_CASE or PascalCase constants
    let left = node.child_by_field_name("left")?;
    let name = node_text(&left, source);

    // Only treat as const if the name starts with an uppercase letter
    if !name.starts_with(|c: char| c.is_ascii_uppercase()) {
        return None;
    }

    Some(Symbol {
        name: name.to_string(),
        kind: SymbolKind::Const,
        span: Span::from_node(node),
        signature: node_text(node, source).to_string(),
        doc_comment: extract_doc_comment(node, source),
        parent: parent_name.map(|s| s.to_string()),
        children: Vec::new(),
    })
}

fn extract_require(node: &tree_sitter::Node, source: &[u8]) -> Option<Import> {
    let method = field_text(node, "method", source)?;

    if method != "require" && method != "require_relative" {
        return None;
    }

    // The arguments contain the path
    let args = node.child_by_field_name("arguments")?;
    let text = node_text(&args, source);
    // Strip parentheses and quotes
    let path = text
        .trim_start_matches('(')
        .trim_end_matches(')')
        .trim()
        .trim_matches('"')
        .trim_matches('\'')
        .to_string();

    Some(Import {
        path,
        alias: None,
        span: Span::from_node(node),
    })
}

#[cfg(test)]
mod tests {
    use super::*;

    fn parse_ruby(source: &str) -> (Vec<Symbol>, Vec<Import>) {
        let mut parser = tree_sitter::Parser::new();
        parser
            .set_language(&tree_sitter_ruby::LANGUAGE.into())
            .unwrap();
        let tree = parser.parse(source, None).unwrap();
        extract(&tree, source.as_bytes())
    }

    #[test]
    fn test_class_with_methods() {
        let source = r#"
require "json"
require_relative "helpers"

module Animals
  class Dog < Animal
    MAX_AGE = 20

    def initialize(name)
      @name = name
    end

    def self.species
      "Canis familiaris"
    end

    def bark
      puts "Woof!"
    end
  end
end
"#;
        let (symbols, imports) = parse_ruby(source);

        // Imports
        assert_eq!(imports.len(), 2);
        assert_eq!(imports[0].path, "json");
        assert_eq!(imports[1].path, "helpers");

        // Top-level: module Animals
        assert_eq!(symbols.len(), 1);
        assert_eq!(symbols[0].name, "Animals");
        assert_eq!(symbols[0].kind, SymbolKind::Module);

        // Inside module: class Dog
        let mod_children = &symbols[0].children;
        assert_eq!(mod_children.len(), 1);
        assert_eq!(mod_children[0].name, "Dog");
        assert_eq!(mod_children[0].kind, SymbolKind::Class);

        // Inside class: constant, methods
        let class_children = &mod_children[0].children;
        let names: Vec<&str> = class_children.iter().map(|s| s.name.as_str()).collect();
        assert!(names.contains(&"MAX_AGE"), "missing MAX_AGE in {:?}", names);
        assert!(
            names.contains(&"initialize"),
            "missing initialize in {:?}",
            names
        );
        assert!(names.contains(&"species"), "missing species in {:?}", names);
        assert!(names.contains(&"bark"), "missing bark in {:?}", names);

        // initialize and bark are methods (inside class)
        let init = class_children
            .iter()
            .find(|s| s.name == "initialize")
            .unwrap();
        assert_eq!(init.kind, SymbolKind::Method);

        // species is a singleton method
        let species = class_children.iter().find(|s| s.name == "species").unwrap();
        assert_eq!(species.kind, SymbolKind::Method);

        // MAX_AGE is a constant
        let max_age = class_children.iter().find(|s| s.name == "MAX_AGE").unwrap();
        assert_eq!(max_age.kind, SymbolKind::Const);
    }

    #[test]
    fn test_top_level_function() {
        let source = r#"
def greet(name)
  puts "Hello, #{name}!"
end
"#;
        let (symbols, _imports) = parse_ruby(source);
        assert_eq!(symbols.len(), 1);
        assert_eq!(symbols[0].name, "greet");
        assert_eq!(symbols[0].kind, SymbolKind::Function);
    }
}