sparrow-cli 0.5.1

A local-first Rust agent cockpit — route, run, replay, rewind
Documentation
use std::collections::HashMap;
use std::path::{Path, PathBuf};
use std::sync::{Arc, OnceLock, RwLock};
use std::time::UNIX_EPOCH;

use tree_sitter::{Language, Node, Parser, Query, QueryCursor};

const SKIP_DIRS: &[&str] = &[".git", "target", "node_modules", "dist", "build", ".venv"];
const MAX_FILES: usize = 10_000;

#[derive(Debug, Clone, PartialEq, Eq)]
pub enum SymbolKind {
    Fn,
    Struct,
    Enum,
    Trait,
    Impl,
    Class,
    Method,
}

impl SymbolKind {
    pub fn as_str(&self) -> &'static str {
        match self {
            Self::Fn => "fn",
            Self::Struct => "struct",
            Self::Enum => "enum",
            Self::Trait => "trait",
            Self::Impl => "impl",
            Self::Class => "class",
            Self::Method => "method",
        }
    }
}

#[derive(Debug, Clone)]
pub struct SymbolDef {
    pub name: String,
    pub kind: SymbolKind,
    pub file: PathBuf,
    pub line: usize,
    pub signature: String,
}

#[derive(Debug, Clone, Default)]
pub struct SymbolIndex {
    pub defs: HashMap<String, Vec<SymbolDef>>,
}

#[derive(Debug, Clone)]
struct CachedIndex {
    hash: u64,
    index: Arc<SymbolIndex>,
}

static CACHE: OnceLock<RwLock<HashMap<PathBuf, CachedIndex>>> = OnceLock::new();

impl SymbolIndex {
    pub fn build(root: &Path) -> SymbolIndex {
        let root = normalize_root(root);
        let mut files = Vec::new();
        walk_code_files(&root, &mut files);
        let hash = content_hash(&files);
        let cache = CACHE.get_or_init(|| RwLock::new(HashMap::new()));

        if let Ok(guard) = cache.read() {
            if let Some(cached) = guard.get(&root) {
                if cached.hash == hash {
                    return (*cached.index).clone();
                }
            }
        }

        let mut index = SymbolIndex::default();
        for file in files {
            if let Ok(content) = std::fs::read_to_string(&file) {
                for def in parse_file(&root, &file, &content) {
                    index.defs.entry(def.name.clone()).or_default().push(def);
                }
            }
        }
        for defs in index.defs.values_mut() {
            defs.sort_by(|a, b| a.file.cmp(&b.file).then(a.line.cmp(&b.line)));
        }

        if let Ok(mut guard) = cache.write() {
            guard.insert(
                root,
                CachedIndex {
                    hash,
                    index: Arc::new(index.clone()),
                },
            );
        }
        index
    }

    pub fn find_definition(&self, name: &str) -> Vec<&SymbolDef> {
        self.defs
            .get(name)
            .map(|defs| defs.iter().collect())
            .unwrap_or_default()
    }

    pub fn outline(&self, file: &Path) -> Vec<&SymbolDef> {
        let normalized = normalize_separators(file);
        let mut out: Vec<&SymbolDef> = self
            .defs
            .values()
            .flat_map(|defs| defs.iter())
            .filter(|def| normalize_separators(&def.file) == normalized)
            .collect();
        out.sort_by(|a, b| a.line.cmp(&b.line).then(a.name.cmp(&b.name)));
        out
    }
}

fn normalize_root(root: &Path) -> PathBuf {
    root.canonicalize().unwrap_or_else(|_| root.to_path_buf())
}

fn normalize_separators(path: &Path) -> String {
    path.to_string_lossy().replace('\\', "/")
}

fn walk_code_files(root: &Path, out: &mut Vec<PathBuf>) {
    let repo = git2::Repository::discover(root).ok();
    walk_code_files_inner(root, &repo, out);
}

fn walk_code_files_inner(dir: &Path, repo: &Option<git2::Repository>, out: &mut Vec<PathBuf>) {
    if out.len() >= MAX_FILES {
        return;
    }
    let Ok(entries) = std::fs::read_dir(dir) else {
        return;
    };
    for entry in entries.flatten() {
        let path = entry.path();
        let name = entry.file_name();
        let name = name.to_string_lossy();
        if is_ignored(repo, &path) {
            continue;
        }
        if path.is_dir() {
            if name.starts_with('.') || SKIP_DIRS.contains(&name.as_ref()) {
                continue;
            }
            walk_code_files_inner(&path, repo, out);
        } else if language_for_path(&path).is_some() {
            out.push(path);
        }
    }
}

fn is_ignored(repo: &Option<git2::Repository>, path: &Path) -> bool {
    repo.as_ref()
        .and_then(|repo| repo.status_should_ignore(path).ok())
        .unwrap_or(false)
}

fn content_hash(files: &[PathBuf]) -> u64 {
    let mut hash = 0xcbf29ce484222325u64;
    for file in files {
        hash = fnv_mix(hash, normalize_separators(file).as_bytes());
        if let Ok(meta) = file.metadata() {
            hash = fnv_mix(hash, &meta.len().to_le_bytes());
            let modified = meta
                .modified()
                .ok()
                .and_then(|time| time.duration_since(UNIX_EPOCH).ok())
                .map(|duration| duration.as_nanos())
                .unwrap_or_default();
            hash = fnv_mix(hash, &modified.to_le_bytes());
        }
    }
    hash
}

fn fnv_mix(mut hash: u64, bytes: &[u8]) -> u64 {
    for byte in bytes {
        hash ^= *byte as u64;
        hash = hash.wrapping_mul(0x100000001b3);
    }
    hash
}

fn language_for_path(path: &Path) -> Option<(&'static str, Language)> {
    match path.extension().and_then(|e| e.to_str()) {
        Some("rs") => Some(("rust", tree_sitter_rust::LANGUAGE.into())),
        Some("py") => Some(("python", tree_sitter_python::LANGUAGE.into())),
        Some("js") | Some("jsx") => Some(("javascript", tree_sitter_javascript::LANGUAGE.into())),
        Some("ts") | Some("tsx") => Some(("typescript", tree_sitter_javascript::LANGUAGE.into())),
        _ => None,
    }
}

fn query_for_language(language_name: &str, language: &Language) -> Option<Query> {
    let source = match language_name {
        "rust" => {
            r#"
            (function_item name: (identifier) @name) @fn
            (struct_item name: (type_identifier) @name) @struct
            (enum_item name: (type_identifier) @name) @enum
            (trait_item name: (type_identifier) @name) @trait
            (impl_item type: (_) @name) @impl
            "#
        }
        "python" => {
            r#"
            (function_definition name: (identifier) @name) @fn
            (class_definition name: (identifier) @name) @class
            "#
        }
        "javascript" | "typescript" => {
            r#"
            (function_declaration name: (identifier) @name) @fn
            (method_definition name: (property_identifier) @name) @method
            (class_declaration name: (identifier) @name) @class
            (lexical_declaration
              (variable_declarator
                name: (identifier) @name
                value: [(arrow_function) (function_expression)])) @fn
            "#
        }
        _ => return None,
    };
    Query::new(language, source).ok()
}

fn parse_file(root: &Path, file: &Path, content: &str) -> Vec<SymbolDef> {
    let Some((language_name, language)) = language_for_path(file) else {
        return Vec::new();
    };
    let Some(query) = query_for_language(language_name, &language) else {
        return Vec::new();
    };
    let mut parser = Parser::new();
    if parser.set_language(&language).is_err() {
        return Vec::new();
    }
    let Some(tree) = parser.parse(content, None) else {
        return Vec::new();
    };
    let bytes = content.as_bytes();
    let mut cursor = QueryCursor::new();
    let matches = cursor.matches(&query, tree.root_node(), bytes);
    let mut defs = Vec::new();

    for mat in matches {
        let mut name_node = None;
        let mut def_node = None;
        let mut kind = None;
        for capture in mat.captures {
            let capture_name = query.capture_names()[capture.index as usize];
            if capture_name == "name" {
                name_node = Some(capture.node);
            } else {
                def_node = Some(capture.node);
                kind = kind_from_capture(capture_name);
            }
        }
        let Some(name_node) = name_node else {
            continue;
        };
        let Some(kind) = kind else {
            continue;
        };
        let node = def_node.unwrap_or(name_node);
        let name = node_text(name_node, bytes);
        if name.is_empty() {
            continue;
        }
        let rel = file.strip_prefix(root).unwrap_or(file).to_path_buf();
        defs.push(SymbolDef {
            name: clean_symbol_name(&name),
            kind,
            file: rel,
            line: node.start_position().row + 1,
            signature: signature_for_node(node, content),
        });
    }

    defs
}

fn kind_from_capture(capture: &str) -> Option<SymbolKind> {
    match capture {
        "fn" => Some(SymbolKind::Fn),
        "struct" => Some(SymbolKind::Struct),
        "enum" => Some(SymbolKind::Enum),
        "trait" => Some(SymbolKind::Trait),
        "impl" => Some(SymbolKind::Impl),
        "class" => Some(SymbolKind::Class),
        "method" => Some(SymbolKind::Method),
        _ => None,
    }
}

fn node_text(node: Node<'_>, bytes: &[u8]) -> String {
    node.utf8_text(bytes).unwrap_or("").trim().to_string()
}

fn clean_symbol_name(name: &str) -> String {
    name.split_whitespace()
        .last()
        .unwrap_or(name)
        .trim_matches(|c: char| c == '{' || c == '(' || c == ')' || c == ';')
        .to_string()
}

fn signature_for_node(node: Node<'_>, content: &str) -> String {
    let start = node.start_byte().min(content.len());
    let end = node.end_byte().min(content.len());
    let raw = &content[start..end];
    raw.lines()
        .next()
        .unwrap_or("")
        .trim()
        .trim_end_matches('{')
        .trim()
        .to_string()
}