car-ast 0.8.0

Tree-sitter AST parsing for code-aware inference
Documentation
use crate::types::*;
use super::{node_text, field_text, extract_doc_comment};

pub fn extract(tree: &tree_sitter::Tree, source: &[u8]) -> (Vec<Symbol>, Vec<Import>) {
    let root = tree.root_node();
    let mut symbols = Vec::new();
    let mut imports = Vec::new();
    let mut cursor = root.walk();

    for child in root.children(&mut cursor) {
        match child.kind() {
            "function_definition" => {
                if let Some(sym) = extract_function(&child, source) {
                    symbols.push(sym);
                }
            }
            "variable_assignment" => {
                if let Some(sym) = extract_variable(&child, source) {
                    symbols.push(sym);
                }
            }
            "command" => {
                // Check for source/. commands (imports)
                extract_source_command(&child, source, &mut imports);
            }
            "declaration_command" => {
                // export VAR=value, declare VAR=value, etc.
                extract_declaration(&child, source, &mut symbols);
            }
            _ => {}
        }
    }

    (symbols, imports)
}

fn extract_function(node: &tree_sitter::Node, source: &[u8]) -> Option<Symbol> {
    let name = field_text(node, "name", source)
        .or_else(|| {
            // Fall back to finding the first word/identifier child
            let mut cursor = node.walk();
            for child in node.children(&mut cursor) {
                let kind = child.kind();
                if kind == "word" || kind == "identifier" || kind == "function_name" {
                    return Some(node_text(&child, source));
                }
            }
            None
        })?;

    // Signature: "function name()" or "name()"
    let text = node_text(node, source);
    let sig = text.lines().next().unwrap_or("").to_string();

    Some(Symbol {
        name: name.to_string(),
        kind: SymbolKind::Function,
        span: Span::from_node(node),
        signature: sig,
        doc_comment: extract_doc_comment(node, source),
        parent: None,
        children: Vec::new(),
    })
}

fn extract_variable(node: &tree_sitter::Node, source: &[u8]) -> Option<Symbol> {
    let text = node_text(node, source);
    // Variable assignment: NAME=value
    let name = field_text(node, "name", source)
        .or_else(|| {
            // Fall back: split on '='
            text.split('=').next().map(|s| s.trim())
        })?;

    if name.is_empty() {
        return None;
    }

    Some(Symbol {
        name: name.to_string(),
        kind: SymbolKind::Const,
        span: Span::from_node(node),
        signature: text.lines().next().unwrap_or("").to_string(),
        doc_comment: extract_doc_comment(node, source),
        parent: None,
        children: Vec::new(),
    })
}

fn extract_source_command(node: &tree_sitter::Node, source: &[u8], imports: &mut Vec<Import>) {
    // A command node: first child is the command name
    let cmd_name_node = match node.child_by_field_name("name") {
        Some(n) => n,
        None => {
            // Fall back to first child
            match node.child(0) {
                Some(n) => n,
                None => return,
            }
        }
    };

    let cmd_name = node_text(&cmd_name_node, source);

    if cmd_name == "source" || cmd_name == "." {
        // The argument is typically the second child or the first argument
        let arg = node.child_by_field_name("argument")
            .map(|n| node_text(&n, source).to_string())
            .or_else(|| {
                // Find the first word/string after the command name
                let mut cursor = node.walk();
                let mut found_cmd = false;
                for child in node.children(&mut cursor) {
                    if found_cmd {
                        let kind = child.kind();
                        if kind == "word" || kind == "string" || kind == "raw_string"
                            || kind == "concatenation" || kind == "simple_expansion"
                            || kind == "string_content"
                        {
                            return Some(node_text(&child, source).to_string());
                        }
                    }
                    if std::ptr::eq(&child as *const _, &cmd_name_node as *const _)
                        || node_text(&child, source) == cmd_name
                    {
                        found_cmd = true;
                    }
                }
                None
            });

        if let Some(path) = arg {
            // Strip quotes
            let path = path.trim_matches('"').trim_matches('\'').to_string();
            if !path.is_empty() {
                imports.push(Import {
                    path,
                    alias: None,
                    span: Span::from_node(node),
                });
            }
        }
    }
}

fn extract_declaration(node: &tree_sitter::Node, source: &[u8], symbols: &mut Vec<Symbol>) {
    // declaration_command: export VAR=value, declare -r VAR=value, etc.
    let mut cursor = node.walk();
    for child in node.children(&mut cursor) {
        if child.kind() == "variable_assignment" {
            if let Some(mut sym) = extract_variable(&child, source) {
                // Override the signature to include the export/declare prefix
                sym.signature = node_text(node, source).lines().next().unwrap_or("").to_string();
                symbols.push(sym);
            }
        }
    }
}

#[cfg(test)]
mod tests {
    use super::*;

    fn parse_bash(source: &str) -> (Vec<Symbol>, Vec<Import>) {
        let mut parser = tree_sitter::Parser::new();
        parser.set_language(&tree_sitter_bash::LANGUAGE.into()).unwrap();
        let tree = parser.parse(source, None).unwrap();
        extract(&tree, source.as_bytes())
    }

    #[test]
    fn test_bash_basics() {
        let source = r#"#!/bin/bash

source ./config.sh
. /etc/profile.d/env.sh

MAX_RETRIES=5
export LOG_LEVEL="info"

greet() {
    echo "Hello, $1"
}

function cleanup {
    rm -rf /tmp/work
}
"#;
        let (symbols, imports) = parse_bash(source);

        // Imports
        assert!(imports.len() >= 2, "Expected at least 2 imports, got {}: {:?}", imports.len(), imports);
        assert!(imports.iter().any(|i| i.path.contains("config.sh")));
        assert!(imports.iter().any(|i| i.path.contains("env.sh")));

        // Functions
        let greet = symbols.iter().find(|s| s.name == "greet");
        assert!(greet.is_some(), "Expected greet function, symbols: {:?}", symbols.iter().map(|s| (&s.name, &s.kind)).collect::<Vec<_>>());
        assert_eq!(greet.unwrap().kind, SymbolKind::Function);

        let cleanup = symbols.iter().find(|s| s.name == "cleanup");
        assert!(cleanup.is_some(), "Expected cleanup function");
        assert_eq!(cleanup.unwrap().kind, SymbolKind::Function);

        // Constants
        let max_retries = symbols.iter().find(|s| s.name == "MAX_RETRIES");
        assert!(max_retries.is_some(), "Expected MAX_RETRIES constant");
        assert_eq!(max_retries.unwrap().kind, SymbolKind::Const);

        // Export variable
        let log_level = symbols.iter().find(|s| s.name == "LOG_LEVEL");
        assert!(log_level.is_some(), "Expected LOG_LEVEL constant, symbols: {:?}", symbols.iter().map(|s| (&s.name, &s.kind)).collect::<Vec<_>>());
        assert_eq!(log_level.unwrap().kind, SymbolKind::Const);
    }
}