car-ast 0.1.1

Tree-sitter AST parsing for code-aware inference
Documentation
//! Project-wide symbol index — scans a directory, parses all supported files,
//! and builds a cross-file symbol table with caller/callee references.

use std::collections::HashMap;
use std::path::{Path, PathBuf};

use crate::types::*;
use crate::{parse_file, extract_source};

/// A symbol with its file context.
#[derive(Debug, Clone)]
pub struct IndexedSymbol {
    /// Relative path from project root.
    pub file_path: String,
    pub symbol: Symbol,
    /// Source code of this symbol.
    pub source: String,
}

/// Cross-file reference: symbol A in file X references symbol B.
#[derive(Debug, Clone)]
pub struct CrossReference {
    /// The symbol doing the referencing.
    pub from_file: String,
    pub from_symbol: String,
    /// The symbol being referenced.
    pub to_file: String,
    pub to_symbol: String,
}

/// Project-wide symbol index.
#[derive(Debug)]
pub struct ProjectIndex {
    /// All parsed files, keyed by relative path.
    pub files: HashMap<String, ParsedFile>,
    /// All symbols flattened, keyed by `file_path::name`.
    pub symbols: HashMap<String, IndexedSymbol>,
    /// Cross-file references (A calls/uses B).
    pub references: Vec<CrossReference>,
    /// Project root directory.
    pub root: PathBuf,
}

/// File extensions to scan.
const EXTENSIONS: &[&str] = &["rs", "py", "ts", "tsx", "js", "jsx", "mjs", "cjs", "go"];

/// Directories to skip.
const SKIP_DIRS: &[&str] = &[
    "target", "node_modules", ".git", "__pycache__", "dist", "build",
    ".next", "vendor", ".cargo",
];

impl ProjectIndex {
    /// Build an index by scanning a directory.
    pub fn build(root: &Path) -> Self {
        let mut index = Self {
            files: HashMap::new(),
            symbols: HashMap::new(),
            references: Vec::new(),
            root: root.to_path_buf(),
        };

        // Collect source files
        let files = collect_source_files(root);

        // Parse each file
        for file_path in &files {
            let rel_path = file_path.strip_prefix(root)
                .unwrap_or(file_path)
                .to_string_lossy()
                .to_string();

            let content = match std::fs::read_to_string(file_path) {
                Ok(c) => c,
                Err(_) => continue,
            };

            if let Some(parsed) = parse_file(&content, &rel_path) {
                // Index all symbols (top-level + children)
                for sym in &parsed.symbols {
                    let key = format!("{}::{}", rel_path, sym.name);
                    index.symbols.insert(key, IndexedSymbol {
                        file_path: rel_path.clone(),
                        symbol: sym.clone(),
                        source: extract_source(sym, &content),
                    });

                    // Index children (methods)
                    for child in &sym.children {
                        let child_key = format!("{}::{}::{}", rel_path, sym.name, child.name);
                        index.symbols.insert(child_key, IndexedSymbol {
                            file_path: rel_path.clone(),
                            symbol: child.clone(),
                            source: extract_source(child, &content),
                        });
                    }
                }

                index.files.insert(rel_path, parsed);
            }
        }

        // Build cross-file references
        index.build_cross_references(&files, root);

        index
    }

    /// Find all symbols matching a name (exact).
    pub fn find(&self, name: &str) -> Vec<&IndexedSymbol> {
        self.symbols.values()
            .filter(|s| s.symbol.name == name)
            .collect()
    }

    /// Find all symbols matching a name (fuzzy, case-insensitive substring).
    pub fn find_fuzzy(&self, query: &str) -> Vec<&IndexedSymbol> {
        let query_lower = query.to_lowercase();
        self.symbols.values()
            .filter(|s| s.symbol.name.to_lowercase().contains(&query_lower))
            .collect()
    }

    /// Get all callers of a symbol (symbols that reference it).
    pub fn callers_of(&self, symbol_name: &str) -> Vec<&CrossReference> {
        self.references.iter()
            .filter(|r| r.to_symbol == symbol_name)
            .collect()
    }

    /// Get all callees of a symbol (symbols it references).
    pub fn callees_of(&self, symbol_name: &str) -> Vec<&CrossReference> {
        self.references.iter()
            .filter(|r| r.from_symbol == symbol_name)
            .collect()
    }

    /// Build cross-file references by scanning for identifier usage.
    fn build_cross_references(&mut self, files: &[PathBuf], root: &Path) {
        // Collect all symbol names and their defining files
        let symbol_names: Vec<(String, String)> = self.symbols.values()
            .filter(|s| !matches!(s.symbol.kind, SymbolKind::Import | SymbolKind::Const))
            .map(|s| (s.symbol.name.clone(), s.file_path.clone()))
            .collect();

        // For each file, scan for references to symbols defined in other files
        for file_path in files {
            let rel_path = file_path.strip_prefix(root)
                .unwrap_or(file_path)
                .to_string_lossy()
                .to_string();

            let content = match std::fs::read_to_string(file_path) {
                Ok(c) => c,
                Err(_) => continue,
            };

            // Get symbols defined in this file (to find which symbol contains the reference)
            let file_symbols: Vec<&IndexedSymbol> = self.symbols.values()
                .filter(|s| s.file_path == rel_path)
                .collect();

            for (target_name, target_file) in &symbol_names {
                // Skip self-references within the same file
                if *target_file == rel_path { continue; }
                // Skip very short names (too many false positives)
                if target_name.len() < 3 { continue; }

                // Check if this file contains the target symbol name as a whole word
                if !contains_identifier(&content, target_name) {
                    continue;
                }

                // Find which symbol in this file contains the reference
                let referencing_symbol = file_symbols.iter()
                    .find(|s| {
                        let src = &s.source;
                        contains_identifier(src, target_name)
                    })
                    .map(|s| s.symbol.name.clone())
                    .unwrap_or_else(|| rel_path.clone());

                self.references.push(CrossReference {
                    from_file: rel_path.clone(),
                    from_symbol: referencing_symbol,
                    to_file: target_file.clone(),
                    to_symbol: target_name.clone(),
                });
            }
        }
    }

    /// Summary statistics.
    pub fn stats(&self) -> IndexStats {
        IndexStats {
            files: self.files.len(),
            symbols: self.symbols.len(),
            references: self.references.len(),
        }
    }
}

#[derive(Debug, Clone)]
pub struct IndexStats {
    pub files: usize,
    pub symbols: usize,
    pub references: usize,
}

impl std::fmt::Display for IndexStats {
    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
        write!(f, "{} files, {} symbols, {} cross-references",
            self.files, self.symbols, self.references)
    }
}

/// Recursively collect source files from a directory.
fn collect_source_files(dir: &Path) -> Vec<PathBuf> {
    let mut files = Vec::new();
    collect_recursive(dir, &mut files);
    files
}

fn collect_recursive(dir: &Path, files: &mut Vec<PathBuf>) {
    let entries = match std::fs::read_dir(dir) {
        Ok(e) => e,
        Err(_) => return,
    };

    for entry in entries.flatten() {
        let path = entry.path();

        if path.is_dir() {
            let name = path.file_name()
                .and_then(|n| n.to_str())
                .unwrap_or("");
            if SKIP_DIRS.contains(&name) {
                continue;
            }
            collect_recursive(&path, files);
        } else if path.is_file() {
            let ext = path.extension()
                .and_then(|e| e.to_str())
                .unwrap_or("");
            if EXTENSIONS.contains(&ext) {
                files.push(path);
            }
        }
    }
}

/// Check if `source` contains `name` as a whole identifier (not a substring).
fn contains_identifier(source: &str, name: &str) -> bool {
    let bytes = source.as_bytes();
    let name_bytes = name.as_bytes();
    let mut pos = 0;

    while pos + name_bytes.len() <= bytes.len() {
        if let Some(found) = source[pos..].find(name) {
            let abs = pos + found;
            let before = if abs > 0 { bytes[abs - 1] } else { b' ' };
            let after_pos = abs + name_bytes.len();
            let after = if after_pos < bytes.len() { bytes[after_pos] } else { b' ' };

            if !is_ident_char(before) && !is_ident_char(after) {
                return true;
            }
            pos = abs + 1;
        } else {
            break;
        }
    }
    false
}

fn is_ident_char(b: u8) -> bool {
    b.is_ascii_alphanumeric() || b == b'_'
}

#[cfg(test)]
mod tests {
    use super::*;
    use std::fs;

    #[test]
    fn test_project_index() {
        let dir = tempfile::tempdir().unwrap();

        // Create a small Rust project
        fs::write(dir.path().join("lib.rs"), r#"
pub struct Parser {
    lang: String,
}

impl Parser {
    pub fn new(lang: String) -> Self {
        Self { lang }
    }

    pub fn parse(&self, source: &str) -> Vec<String> {
        vec![]
    }
}

pub fn create_parser() -> Parser {
    Parser::new("rust".into())
}
"#).unwrap();

        fs::write(dir.path().join("main.rs"), r#"
mod lib;

fn main() {
    let p = create_parser();
    let result = p.parse("fn foo() {}");
    println!("{:?}", result);
}
"#).unwrap();

        let index = ProjectIndex::build(dir.path());
        let stats = index.stats();

        assert_eq!(stats.files, 2);
        assert!(stats.symbols >= 4); // Parser, new, parse, create_parser, main

        // Find Parser
        let parsers = index.find("Parser");
        assert!(!parsers.is_empty());

        // Find cross-references: main.rs should reference create_parser from lib.rs
        let refs = index.callers_of("create_parser");
        assert!(!refs.is_empty(), "main.rs should reference create_parser");
        assert!(refs.iter().any(|r| r.from_file == "main.rs"));

        // Fuzzy search
        let fuzzy = index.find_fuzzy("pars");
        assert!(fuzzy.len() >= 2); // Parser, parse, create_parser
    }

    #[test]
    fn test_contains_identifier() {
        assert!(contains_identifier("let x = foo();", "foo"));
        assert!(!contains_identifier("let x = foobar();", "foo"));
        assert!(contains_identifier("use crate::Parser;", "Parser"));
        assert!(!contains_identifier("use crate::ParserBuilder;", "Parser"));
    }
}