car-ast 0.13.0

Tree-sitter AST parsing for code-aware inference
Documentation
#[cfg(feature = "bash")]
pub mod bash;
#[cfg(feature = "c")]
pub mod c;
#[cfg(feature = "cpp")]
pub mod cpp;
#[cfg(feature = "csharp")]
pub mod csharp;
#[cfg(feature = "css")]
pub mod css;
#[cfg(feature = "dart")]
pub mod dart;
#[cfg(feature = "elixir")]
pub mod elixir;
#[cfg(feature = "go")]
pub mod go;
#[cfg(feature = "haskell")]
pub mod haskell;
#[cfg(feature = "html")]
pub mod html;
#[cfg(feature = "java")]
pub mod java;
#[cfg(feature = "javascript")]
pub mod javascript;
#[cfg(feature = "json")]
pub mod json;
#[cfg(feature = "kotlin")]
pub mod kotlin;
#[cfg(feature = "lua")]
pub mod lua;
#[cfg(feature = "php")]
pub mod php;
#[cfg(feature = "python")]
pub mod python;
#[cfg(feature = "r")]
pub mod r;
#[cfg(feature = "ruby")]
pub mod ruby;
#[cfg(feature = "rust")]
pub mod rust;
#[cfg(feature = "scala")]
pub mod scala;
#[cfg(feature = "swift")]
pub mod swift;
#[cfg(feature = "typescript")]
pub mod typescript;
#[cfg(feature = "zig")]
pub mod zig;

use crate::types::*;

/// Extract symbols from a tree-sitter tree for the given language.
pub fn extract_symbols(
    lang: Language,
    tree: &tree_sitter::Tree,
    source: &[u8],
) -> (Vec<Symbol>, Vec<Import>) {
    match lang {
        #[cfg(feature = "rust")]
        Language::Rust => rust::extract(tree, source),
        #[cfg(feature = "python")]
        Language::Python => python::extract(tree, source),
        #[cfg(feature = "typescript")]
        Language::TypeScript => typescript::extract(tree, source),
        #[cfg(feature = "javascript")]
        Language::JavaScript => javascript::extract(tree, source),
        #[cfg(feature = "go")]
        Language::Go => go::extract(tree, source),
        #[cfg(feature = "csharp")]
        Language::CSharp => csharp::extract(tree, source),
        #[cfg(feature = "java")]
        Language::Java => java::extract(tree, source),
        #[cfg(feature = "c")]
        Language::C => c::extract(tree, source),
        #[cfg(feature = "cpp")]
        Language::Cpp => cpp::extract(tree, source),
        #[cfg(feature = "ruby")]
        Language::Ruby => ruby::extract(tree, source),
        #[cfg(feature = "php")]
        Language::Php => php::extract(tree, source),
        #[cfg(feature = "swift")]
        Language::Swift => swift::extract(tree, source),
        #[cfg(feature = "kotlin")]
        Language::Kotlin => kotlin::extract(tree, source),
        #[cfg(feature = "scala")]
        Language::Scala => scala::extract(tree, source),
        #[cfg(feature = "dart")]
        Language::Dart => dart::extract(tree, source),
        #[cfg(feature = "zig")]
        Language::Zig => zig::extract(tree, source),
        #[cfg(feature = "lua")]
        Language::Lua => lua::extract(tree, source),
        #[cfg(feature = "elixir")]
        Language::Elixir => elixir::extract(tree, source),
        #[cfg(feature = "haskell")]
        Language::Haskell => haskell::extract(tree, source),
        #[cfg(feature = "r")]
        Language::R => r::extract(tree, source),
        #[cfg(feature = "bash")]
        Language::Bash => bash::extract(tree, source),
        #[cfg(feature = "html")]
        Language::Html => html::extract(tree, source),
        #[cfg(feature = "css")]
        Language::Css => css::extract(tree, source),
        #[cfg(feature = "json")]
        Language::Json => json::extract(tree, source),
        #[allow(unreachable_patterns)]
        _ => (Vec::new(), Vec::new()),
    }
}

/// Get the tree-sitter Language for a Language enum value.
pub fn ts_language(lang: Language) -> Option<tree_sitter::Language> {
    match lang {
        #[cfg(feature = "rust")]
        Language::Rust => Some(tree_sitter_rust::LANGUAGE.into()),
        #[cfg(feature = "python")]
        Language::Python => Some(tree_sitter_python::LANGUAGE.into()),
        #[cfg(feature = "typescript")]
        Language::TypeScript => Some(tree_sitter_typescript::LANGUAGE_TSX.into()),
        #[cfg(feature = "javascript")]
        Language::JavaScript => Some(tree_sitter_javascript::LANGUAGE.into()),
        #[cfg(feature = "go")]
        Language::Go => Some(tree_sitter_go::LANGUAGE.into()),
        #[cfg(feature = "csharp")]
        Language::CSharp => Some(tree_sitter_c_sharp::LANGUAGE.into()),
        #[cfg(feature = "java")]
        Language::Java => Some(tree_sitter_java::LANGUAGE.into()),
        #[cfg(feature = "c")]
        Language::C => Some(tree_sitter_c::LANGUAGE.into()),
        #[cfg(feature = "cpp")]
        Language::Cpp => Some(tree_sitter_cpp::LANGUAGE.into()),
        #[cfg(feature = "ruby")]
        Language::Ruby => Some(tree_sitter_ruby::LANGUAGE.into()),
        #[cfg(feature = "php")]
        Language::Php => Some(tree_sitter_php::LANGUAGE_PHP.into()),
        #[cfg(feature = "swift")]
        Language::Swift => Some(tree_sitter_swift::LANGUAGE.into()),
        #[cfg(feature = "kotlin")]
        Language::Kotlin => Some(tree_sitter_kotlin_ng::LANGUAGE.into()),
        #[cfg(feature = "scala")]
        Language::Scala => Some(tree_sitter_scala::LANGUAGE.into()),
        #[cfg(feature = "dart")]
        Language::Dart => Some(tree_sitter_dart::LANGUAGE.into()),
        #[cfg(feature = "zig")]
        Language::Zig => Some(tree_sitter_zig::LANGUAGE.into()),
        #[cfg(feature = "lua")]
        Language::Lua => Some(tree_sitter_lua::LANGUAGE.into()),
        #[cfg(feature = "elixir")]
        Language::Elixir => Some(tree_sitter_elixir::LANGUAGE.into()),
        #[cfg(feature = "haskell")]
        Language::Haskell => Some(tree_sitter_haskell::LANGUAGE.into()),
        #[cfg(feature = "r")]
        Language::R => Some(tree_sitter_r::LANGUAGE.into()),
        #[cfg(feature = "bash")]
        Language::Bash => Some(tree_sitter_bash::LANGUAGE.into()),
        #[cfg(feature = "html")]
        Language::Html => Some(tree_sitter_html::LANGUAGE.into()),
        #[cfg(feature = "css")]
        Language::Css => Some(tree_sitter_css::LANGUAGE.into()),
        #[cfg(feature = "json")]
        Language::Json => Some(tree_sitter_json::LANGUAGE.into()),
        #[allow(unreachable_patterns)]
        _ => None,
    }
}

/// Helper: get text from a node.
pub(crate) fn node_text<'a>(node: &tree_sitter::Node, source: &'a [u8]) -> &'a str {
    node.utf8_text(source).unwrap_or("")
}

/// Helper: find child by field name and get its text.
pub(crate) fn field_text<'a>(
    node: &tree_sitter::Node,
    field: &str,
    source: &'a [u8],
) -> Option<&'a str> {
    node.child_by_field_name(field)
        .map(|n| node_text(&n, source))
}

/// Helper: extract doc comment from preceding sibling nodes.
pub(crate) fn extract_doc_comment(node: &tree_sitter::Node, source: &[u8]) -> Option<String> {
    let mut comments = Vec::new();
    let mut sibling = node.prev_sibling();
    while let Some(s) = sibling {
        let kind = s.kind();
        if kind == "line_comment" || kind == "comment" || kind == "block_comment" {
            let text = node_text(&s, source);
            // Only doc comments (/// or /** or # for python)
            if text.starts_with("///") || text.starts_with("/**") || text.starts_with("## ") {
                comments.push(text.to_string());
            } else {
                break;
            }
        } else if kind == "attribute_item" || kind == "decorator" {
            // Skip attributes/decorators, keep looking for comments
        } else {
            break;
        }
        sibling = s.prev_sibling();
    }
    if comments.is_empty() {
        None
    } else {
        comments.reverse();
        Some(comments.join("\n"))
    }
}

/// Helper: extract signature (text up to body start).
pub(crate) fn extract_signature(
    node: &tree_sitter::Node,
    body_field: &str,
    source: &[u8],
) -> String {
    if let Some(body) = node.child_by_field_name(body_field) {
        let sig_end = body.start_byte();
        let sig = &source[node.start_byte()..sig_end];
        std::str::from_utf8(sig).unwrap_or("").trim().to_string()
    } else {
        node_text(node, source).to_string()
    }
}