git-prism 0.8.0

Agent-optimized git data MCP server — structured change manifests and full file snapshots for LLM agents
use serde::Serialize;

/// Determines whether a file is likely generated or vendored.
///
/// Uses path patterns and optional content inspection to flag files
/// that agents should typically skip (lockfiles, vendored code,
/// auto-generated artifacts).
#[derive(Debug, Clone, Serialize, PartialEq, Eq)]
pub struct GeneratedFileDetector;

impl GeneratedFileDetector {
    pub fn is_generated(path: &str, content: Option<&str>) -> bool {
        Self::matches_path_pattern(path) || Self::matches_content_pattern(content)
    }

    fn matches_path_pattern(path: &str) -> bool {
        const DIR_PATTERNS: &[(&str, &str)] =
            &[("vendor/", "/vendor/"), ("node_modules/", "/node_modules/")];

        const EXACT_NAMES: &[&str] = &[
            "Cargo.lock",
            "package-lock.json",
            "poetry.lock",
            "go.sum",
            "yarn.lock",
            "pnpm-lock.yaml",
            "Gemfile.lock",
            "composer.lock",
        ];

        const EXTENSIONS: &[&str] = &[".min.js", ".min.css", ".pb.go"];

        let filename = path.rsplit('/').next().unwrap_or(path);

        if DIR_PATTERNS
            .iter()
            .any(|(prefix, nested)| path.starts_with(prefix) || path.contains(nested))
        {
            return true;
        }

        if EXACT_NAMES.contains(&filename) {
            return true;
        }

        if EXTENSIONS.iter().any(|ext| filename.ends_with(ext)) {
            return true;
        }

        if filename.ends_with("_generated.go") {
            return true;
        }

        false
    }

    fn matches_content_pattern(content: Option<&str>) -> bool {
        let Some(text) = content else {
            return false;
        };

        const MARKERS: &[&str] = &["DO NOT EDIT", "generated by", "auto-generated"];

        let header = &text[..text.len().min(1024)];
        let lowered = header.to_lowercase();
        MARKERS
            .iter()
            .any(|marker| lowered.contains(&marker.to_lowercase()))
    }
}

#[cfg(test)]
mod tests {
    use super::*;

    #[test]
    fn it_detects_vendor_directory_as_generated() {
        assert!(GeneratedFileDetector::is_generated(
            "vendor/github.com/foo/bar.go",
            None,
        ));
    }

    #[test]
    fn it_detects_node_modules_as_generated() {
        assert!(GeneratedFileDetector::is_generated(
            "node_modules/lodash/index.js",
            None,
        ));
    }

    #[test]
    fn it_detects_lockfiles_as_generated() {
        assert!(GeneratedFileDetector::is_generated("Cargo.lock", None));
        assert!(GeneratedFileDetector::is_generated(
            "package-lock.json",
            None
        ));
        assert!(GeneratedFileDetector::is_generated("poetry.lock", None));
        assert!(GeneratedFileDetector::is_generated("go.sum", None));
    }

    #[test]
    fn it_detects_minified_files_as_generated() {
        assert!(GeneratedFileDetector::is_generated("dist/app.min.js", None));
        assert!(GeneratedFileDetector::is_generated("styles.min.css", None));
    }

    #[test]
    fn it_detects_protobuf_generated_go_as_generated() {
        assert!(GeneratedFileDetector::is_generated(
            "api/v1/service.pb.go",
            None
        ));
        assert!(GeneratedFileDetector::is_generated(
            "internal/types_generated.go",
            None
        ));
    }

    #[test]
    fn it_detects_additional_lockfiles_as_generated() {
        assert!(GeneratedFileDetector::is_generated("yarn.lock", None));
        assert!(GeneratedFileDetector::is_generated("pnpm-lock.yaml", None));
        assert!(GeneratedFileDetector::is_generated("Gemfile.lock", None));
        assert!(GeneratedFileDetector::is_generated("composer.lock", None));
    }

    #[test]
    fn it_detects_content_with_do_not_edit_marker() {
        let content = "// DO NOT EDIT - generated by protoc\npackage api\n";
        assert!(GeneratedFileDetector::is_generated("api.go", Some(content)));
    }

    #[test]
    fn it_detects_content_with_generated_by_marker() {
        let content = "# This file was generated by tool v1.2\nfoo = bar\n";
        assert!(GeneratedFileDetector::is_generated(
            "config.py",
            Some(content)
        ));
    }

    #[test]
    fn it_detects_content_with_auto_generated_marker() {
        let content = "/* auto-generated */\nconst x = 1;\n";
        assert!(GeneratedFileDetector::is_generated("out.js", Some(content)));
    }

    #[test]
    fn it_does_not_flag_normal_content() {
        let content = "fn main() {\n    println!(\"hello\");\n}\n";
        assert!(!GeneratedFileDetector::is_generated(
            "src/main.rs",
            Some(content)
        ));
    }

    #[test]
    fn it_detects_nested_node_modules_as_generated() {
        assert!(GeneratedFileDetector::is_generated(
            "packages/ui/node_modules/react/index.js",
            None,
        ));
    }

    #[test]
    fn it_detects_nested_vendor_directory_as_generated() {
        assert!(GeneratedFileDetector::is_generated(
            "apps/backend/vendor/github.com/foo/bar.go",
            None,
        ));
    }

    #[test]
    fn it_does_not_flag_vendor_as_substring_in_filename() {
        // "vendor_utils.rs" contains "vendor" but it's not a vendor directory
        assert!(!GeneratedFileDetector::is_generated(
            "src/vendor_utils.rs",
            None,
        ));
    }

    #[test]
    fn it_does_not_flag_node_modules_as_substring_in_filename() {
        assert!(!GeneratedFileDetector::is_generated(
            "docs/node_modules_guide.md",
            None,
        ));
    }

    #[test]
    fn it_does_not_flag_normal_source_files() {
        assert!(!GeneratedFileDetector::is_generated("src/main.rs", None));
        assert!(!GeneratedFileDetector::is_generated("lib/utils.py", None));
        assert!(!GeneratedFileDetector::is_generated(
            "cmd/server/main.go",
            None
        ));
    }
}