agent-source-repository 0.1.0

Agent Source Repository local context registry for coding agents
Documentation
use std::collections::HashSet;
use std::path::{Path, PathBuf};

use ignore::overrides::OverrideBuilder;
use ignore::WalkBuilder;
use once_cell::sync::Lazy;

pub(crate) use crate::language::{filter_extensions, language_for_path};

static DEFAULT_IGNORED_DIRS: Lazy<HashSet<String>> = Lazy::new(|| {
    [
        ".git",
        ".hg",
        ".svn",
        "__pycache__",
        "node_modules",
        "target",
        ".gradle",
        ".swiftpm",
        ".venv",
        "venv",
        ".tox",
        ".mypy_cache",
        ".pytest_cache",
        ".ruff_cache",
        ".cache",
        ".build",
        ".rumble",
        ".next",
        "Derived",
        "DerivedData",
        "dist",
        "build",
        "tuist-derived",
        ".eggs",
    ]
    .iter()
    .map(|s| s.to_string())
    .collect()
});

pub(crate) fn default_ignored_dirs() -> &'static HashSet<String> {
    &DEFAULT_IGNORED_DIRS
}

pub(crate) fn walk_source_files(
    root: &Path,
    extensions: &HashSet<String>,
    ignore_dirs: Option<&HashSet<String>>,
) -> Vec<PathBuf> {
    let default_ignored = default_ignored_dirs();
    let all_ignored: HashSet<&str> = default_ignored
        .iter()
        .map(|s| s.as_str())
        .chain(
            ignore_dirs
                .into_iter()
                .flat_map(|s| s.iter().map(|s| s.as_str())),
        )
        .collect();

    let mut ob = OverrideBuilder::new(root);
    for dir in &all_ignored {
        let _ = ob.add(&format!("!**/{dir}"));
        let _ = ob.add(&format!("!**/{dir}/**"));
        let _ = ob.add(&format!("!{dir}/**"));
    }
    let overrides = ob
        .build()
        .unwrap_or_else(|_| OverrideBuilder::new(root).build().unwrap());

    let walker = WalkBuilder::new(root)
        .overrides(overrides)
        .hidden(false)
        .parents(false)
        .git_ignore(true)
        .git_global(false)
        .git_exclude(false)
        .sort_by_file_name(|a, b| a.cmp(b))
        .build();

    let mut files = Vec::new();
    for result in walker {
        let entry = match result {
            Ok(e) => e,
            Err(_) => continue,
        };

        if !entry.file_type().is_some_and(|ft| ft.is_file()) {
            continue;
        }

        let path = entry.path();
        if is_default_excluded_file(path) {
            continue;
        }
        if let Some(ext) = path.extension().and_then(|e| e.to_str()) {
            let dot_ext = format!(".{}", ext.to_lowercase());
            if extensions.contains(&dot_ext) {
                files.push(path.to_path_buf());
            }
        }
    }

    files
}

fn is_default_excluded_file(path: &Path) -> bool {
    let Some(file_name) = path.file_name().and_then(|name| name.to_str()) else {
        return false;
    };
    let file_name = file_name.to_ascii_lowercase();
    file_name.contains(".generated.")
        || file_name.contains("_generated.")
        || file_name.contains(".pb.")
        || file_name.ends_with(".pb.rs")
        || file_name.ends_with(".min.js")
}

#[cfg(test)]
mod tests {
    use std::collections::HashSet;
    use std::fs;
    use std::time::{SystemTime, UNIX_EPOCH};

    use super::walk_source_files;

    fn temp_root(name: &str) -> std::path::PathBuf {
        let unique = SystemTime::now()
            .duration_since(UNIX_EPOCH)
            .expect("system time should be after unix epoch")
            .as_nanos();
        std::env::temp_dir().join(format!("asr-source-files-{name}-{unique}"))
    }

    #[test]
    fn walk_source_files_skips_common_generated_directories() {
        let root = temp_root("generated-dirs");
        fs::create_dir_all(root.join("Sources")).expect("source dir should be created");
        fs::create_dir_all(root.join(".build/checkouts/Dependency/Sources"))
            .expect(".build dir should be created");
        fs::create_dir_all(root.join("Projects/App/Derived/Sources"))
            .expect("Derived dir should be created");

        fs::write(root.join("Sources/App.swift"), "struct App {}\n")
            .expect("source file should be written");
        fs::write(
            root.join(".build/checkouts/Dependency/Sources/Dependency.swift"),
            "struct Dependency {}\n",
        )
        .expect(".build file should be written");
        fs::write(
            root.join("Projects/App/Derived/Sources/TuistAssets.swift"),
            "struct TuistAssets {}\n",
        )
        .expect("Derived file should be written");

        let extensions = HashSet::from([".swift".to_string()]);
        let files = walk_source_files(&root, &extensions, None);
        let relative_files: Vec<_> = files
            .iter()
            .map(|path| {
                path.strip_prefix(&root)
                    .expect("file should be under root")
                    .to_string_lossy()
                    .replace('\\', "/")
            })
            .collect();

        assert_eq!(relative_files, vec!["Sources/App.swift"]);

        let _ = fs::remove_dir_all(root);
    }
}