arbor-watcher 2.0.0

File watching and incremental indexing for Arbor
Documentation
//! Directory indexing.
//!
//! Walks directories to find and parse source files, building
//! the initial code graph.

use arbor_core::{parse_file, CodeNode};
use arbor_graph::{ArborGraph, GraphBuilder, GraphStore};
use ignore::WalkBuilder;
use std::collections::HashSet;
use std::path::{Path, PathBuf};
use std::time::Instant;
use tracing::{debug, info, warn};

/// Result of indexing a directory.
pub struct IndexResult {
    /// The built graph.
    pub graph: ArborGraph,

    /// Number of files processed (parsed fresh).
    pub files_indexed: usize,

    /// Number of files loaded from cache.
    pub cache_hits: usize,

    /// Number of nodes extracted.
    pub nodes_extracted: usize,

    /// Time taken in milliseconds.
    pub duration_ms: u64,

    /// Files that failed to parse.
    pub errors: Vec<(String, String)>,
}

/// Options for directory indexing.
#[derive(Debug, Clone, Default)]
pub struct IndexOptions {
    /// Follow symbolic links when walking directories.
    pub follow_symlinks: bool,

    /// Path to cache directory (e.g., `.arbor/cache`).
    /// If None, caching is disabled.
    pub cache_path: Option<PathBuf>,
}

/// Indexes a directory and returns the code graph.
///
/// This walks all source files, parses them, and builds the
/// relationship graph. It respects .gitignore patterns.
///
/// If `options.cache_path` is set, files are cached with their mtimes.
/// Only files with changed mtimes are re-parsed.
///
/// # Example
///
/// ```no_run
/// use arbor_watcher::{index_directory, IndexOptions};
/// use std::path::Path;
///
/// let result = index_directory(Path::new("./src"), IndexOptions::default()).unwrap();
/// println!("Indexed {} files, {} nodes", result.files_indexed, result.nodes_extracted);
/// ```
pub fn index_directory(root: &Path, options: IndexOptions) -> Result<IndexResult, std::io::Error> {
    let start = Instant::now();
    let mut builder = GraphBuilder::new();
    let mut files_indexed = 0;
    let mut cache_hits = 0;
    let mut nodes_extracted = 0;
    let mut errors = Vec::new();

    info!("Starting index of {}", root.display());

    // Open cache if configured
    let store =
        options
            .cache_path
            .as_ref()
            .and_then(|path| match GraphStore::open_or_reset(path) {
                Ok(s) => Some(s),
                Err(e) => {
                    warn!("Failed to open cache: {}, proceeding without cache", e);
                    None
                }
            });

    // Track files we've seen (for detecting deleted files)
    let mut seen_files: HashSet<String> = HashSet::new();

    // Walk the directory, respecting .gitignore
    let walker = WalkBuilder::new(root)
        .hidden(true) // Skip hidden files
        .git_ignore(true) // Respect .gitignore
        .git_global(true)
        .git_exclude(true)
        .follow_links(options.follow_symlinks)
        .build();

    for entry in walker.filter_map(Result::ok) {
        let path = entry.path();

        // Skip directories
        if path.is_dir() {
            continue;
        }

        // Check if it's a supported file type
        let extension = match path.extension().and_then(|e| e.to_str()) {
            Some(ext) => ext,
            None => continue,
        };

        if !arbor_core::languages::is_supported(extension) {
            continue;
        }

        let path_str = path.display().to_string();
        seen_files.insert(path_str.clone());

        // Check cache
        if let Some(ref store) = store {
            // Get file mtime
            let current_mtime = match std::fs::metadata(path) {
                Ok(meta) => meta
                    .modified()
                    .ok()
                    .and_then(|t| t.duration_since(std::time::UNIX_EPOCH).ok())
                    .map(|d| d.as_secs())
                    .unwrap_or(0),
                Err(_) => 0,
            };

            // Check cached mtime
            if let Ok(Some(cached_mtime)) = store.get_mtime(&path_str) {
                if cached_mtime == current_mtime {
                    // File unchanged, load from cache
                    if let Ok(Some(cached_nodes)) = store.get_file_nodes(&path_str) {
                        debug!("Cache hit: {}", path.display());
                        nodes_extracted += cached_nodes.len();
                        cache_hits += 1;
                        builder.add_nodes(cached_nodes);
                        continue;
                    }
                }
            }

            // Cache miss or stale, parse file
            debug!("Parsing (cache miss): {}", path.display());
            match parse_file(path) {
                Ok(nodes) => {
                    nodes_extracted += nodes.len();
                    files_indexed += 1;
                    // Update cache
                    if let Err(e) = store.update_file(&path_str, &nodes, current_mtime) {
                        warn!("Failed to update cache for {}: {}", path_str, e);
                    }
                    builder.add_nodes(nodes);
                }
                Err(e) => {
                    warn!("Failed to parse {}: {}", path.display(), e);
                    errors.push((path_str, e.to_string()));
                }
            }
        } else {
            // No cache, parse directly
            debug!("Parsing {}", path.display());
            match parse_file(path) {
                Ok(nodes) => {
                    nodes_extracted += nodes.len();
                    files_indexed += 1;
                    builder.add_nodes(nodes);
                }
                Err(e) => {
                    warn!("Failed to parse {}: {}", path.display(), e);
                    errors.push((path_str, e.to_string()));
                }
            }
        }
    }

    // Handle deleted files: remove from cache any files that no longer exist
    if let Some(ref store) = store {
        if let Ok(cached_files) = store.list_cached_files() {
            for cached_file in cached_files {
                if !seen_files.contains(&cached_file) {
                    debug!("Removing deleted file from cache: {}", cached_file);
                    if let Err(e) = store.remove_file(&cached_file) {
                        warn!("Failed to remove {} from cache: {}", cached_file, e);
                    }
                }
            }
        }
    }

    let graph = builder.build();
    let duration = start.elapsed();

    info!(
        "Indexed {} files, {} cache hits ({} nodes) in {:?}",
        files_indexed, cache_hits, nodes_extracted, duration
    );

    Ok(IndexResult {
        graph,
        files_indexed,
        cache_hits,
        nodes_extracted,
        duration_ms: duration.as_millis() as u64,
        errors,
    })
}

/// Parses a single file and returns its nodes.
#[allow(dead_code)]
pub fn parse_single_file(path: &Path) -> Result<Vec<CodeNode>, arbor_core::ParseError> {
    parse_file(path)
}

#[cfg(test)]
mod tests {
    use super::*;
    use std::fs;
    use tempfile::tempdir;

    #[test]
    fn test_index_empty_directory() {
        let dir = tempdir().unwrap();
        let result = index_directory(dir.path(), IndexOptions::default()).unwrap();
        assert_eq!(result.files_indexed, 0);
        assert_eq!(result.nodes_extracted, 0);
    }

    #[test]
    fn test_index_with_rust_file() {
        let dir = tempdir().unwrap();
        let file_path = dir.path().join("test.rs");

        fs::write(
            &file_path,
            r#"
            pub fn hello() {
                println!("Hello!");
            }
        "#,
        )
        .unwrap();

        let result = index_directory(dir.path(), IndexOptions::default()).unwrap();
        assert_eq!(result.files_indexed, 1);
        assert!(result.nodes_extracted > 0);
    }

    /// Helper to create a directory symlink cross-platform.
    /// Returns None if symlink creation fails (e.g., no privileges on Windows).
    fn create_dir_symlink(original: &std::path::Path, link: &std::path::Path) -> Option<()> {
        #[cfg(unix)]
        {
            std::os::unix::fs::symlink(original, link).ok()
        }
        #[cfg(windows)]
        {
            std::os::windows::fs::symlink_dir(original, link).ok()
        }
        #[cfg(not(any(unix, windows)))]
        {
            None
        }
    }

    #[test]
    fn test_index_does_not_follow_symlinks_by_default() {
        let dir = tempdir().unwrap();
        let linked_dir = tempdir().unwrap();

        // Create a file in the linked directory
        let linked_file = linked_dir.path().join("linked.rs");
        fs::write(&linked_file, "pub fn linked_func() {}").unwrap();

        // Create a symlink to the linked directory
        let symlink_path = dir.path().join("linked");
        if create_dir_symlink(linked_dir.path(), &symlink_path).is_none() {
            // Skip test if symlinks not supported (e.g., Windows without privileges)
            return;
        }

        // Index without following symlinks (default)
        let result = index_directory(dir.path(), IndexOptions::default()).unwrap();
        assert_eq!(result.files_indexed, 0);
    }

    #[test]
    fn test_index_follows_symlinks_when_enabled() {
        let dir = tempdir().unwrap();
        let linked_dir = tempdir().unwrap();

        // Create a file in the linked directory
        let linked_file = linked_dir.path().join("linked.rs");
        fs::write(&linked_file, "pub fn linked_func() {}").unwrap();

        // Create a symlink to the linked directory
        let symlink_path = dir.path().join("linked");
        if create_dir_symlink(linked_dir.path(), &symlink_path).is_none() {
            // Skip test if symlinks not supported (e.g., Windows without privileges)
            return;
        }

        // Index with follow_symlinks enabled
        let options = IndexOptions {
            follow_symlinks: true,
            cache_path: None,
        };
        let result = index_directory(dir.path(), options).unwrap();
        assert_eq!(result.files_indexed, 1);
        assert!(result.nodes_extracted > 0);
    }
}