vsec 0.0.1

Detect secrets and in Rust codebases
Documentation
// src/discovery/walker.rs

use std::path::{Path, PathBuf};
use std::sync::Arc;
use std::time::SystemTime;

use dashmap::DashMap;
use glob::Pattern;

use crate::discovery::PreFilter;
use crate::error::{Result, SecretraceError};

/// Thread-safe AST cache using DashMap for lock-free concurrent access.
/// Key: file path, Value: (modification time as secs, parsed AST)
pub type AstCache = Arc<DashMap<PathBuf, (u64, syn::File)>>;

/// Create a new AST cache
pub fn new_ast_cache() -> AstCache {
    Arc::new(DashMap::new())
}

/// File walker for discovering Rust files in a directory.
/// Uses jwalk for parallel directory traversal.
pub struct FileWalker {
    /// Root path to walk
    root: PathBuf,

    /// Patterns to ignore
    ignore_patterns: Vec<Pattern>,

    /// Patterns to exclusively include (if any)
    only_patterns: Vec<Pattern>,

    /// Whether to include test files
    include_tests: bool,

    /// Whether to include example files
    include_examples: bool,

    /// Whether to follow symlinks
    follow_symlinks: bool,

    /// Pre-filter for quick rejection
    pre_filter: PreFilter,
}

impl FileWalker {
    pub fn new(root: impl AsRef<Path>) -> Self {
        Self {
            root: root.as_ref().to_path_buf(),
            ignore_patterns: Vec::new(),
            only_patterns: Vec::new(),
            include_tests: false,
            include_examples: false,
            follow_symlinks: false,
            pre_filter: PreFilter::new(),
        }
    }

    /// Add ignore patterns (glob patterns)
    pub fn with_ignore_patterns(mut self, patterns: &[String]) -> Self {
        for pattern in patterns {
            if let Ok(p) = Pattern::new(pattern) {
                self.ignore_patterns.push(p);
            }
        }
        self
    }

    /// Add only patterns (glob patterns) - only match these
    pub fn with_only_patterns(mut self, patterns: &[String]) -> Self {
        for pattern in patterns {
            if let Ok(p) = Pattern::new(pattern) {
                self.only_patterns.push(p);
            }
        }
        self
    }

    /// Include test files
    pub fn with_include_tests(mut self, include: bool) -> Self {
        self.include_tests = include;
        self
    }

    /// Include example files
    pub fn with_include_examples(mut self, include: bool) -> Self {
        self.include_examples = include;
        self
    }

    /// Follow symlinks
    pub fn with_follow_symlinks(mut self, follow: bool) -> Self {
        self.follow_symlinks = follow;
        self
    }

    /// Set custom pre-filter
    pub fn with_pre_filter(mut self, filter: PreFilter) -> Self {
        self.pre_filter = filter;
        self
    }

    /// Walk the directory and return all matching files
    pub fn walk(&self) -> Result<Vec<PathBuf>> {
        let root = self.root.canonicalize().map_err(|e| SecretraceError::FileRead {
            path: self.root.clone(),
            source: e,
        })?;

        let files: Vec<PathBuf> = jwalk::WalkDir::new(&root)
            .follow_links(self.follow_symlinks)
            .into_iter()
            .filter_map(|entry| entry.ok())
            .filter(|entry| entry.file_type().is_file())
            .map(|entry| entry.path())
            .filter(|path| self.should_include(path))
            .collect();

        Ok(files)
    }

    /// Check if a path should be included
    fn should_include(&self, path: &Path) -> bool {
        // Must be a Rust file
        if path.extension().map(|e| e != "rs").unwrap_or(true) {
            return false;
        }

        // Check pre-filter (size, generated code)
        if self.pre_filter.should_skip_path(path) {
            return false;
        }

        let path_str = path.to_string_lossy();

        // Check ignore patterns
        for pattern in &self.ignore_patterns {
            if pattern.matches(&path_str) {
                return false;
            }
        }

        // Check only patterns (if specified)
        if !self.only_patterns.is_empty() {
            let matches_any = self.only_patterns.iter().any(|p| p.matches(&path_str));
            if !matches_any {
                return false;
            }
        }

        // Check test/example exclusions
        if !self.include_tests && self.is_test_path(path) {
            return false;
        }

        if !self.include_examples && self.is_example_path(path) {
            return false;
        }

        true
    }

    /// Check if path is a test file/directory
    fn is_test_path(&self, path: &Path) -> bool {
        let path_str = path.to_string_lossy();
        path_str.contains("/tests/")
            || path_str.contains("/test/")
            || path_str.ends_with("_test.rs")
            || path_str.ends_with("_tests.rs")
    }

    /// Check if path is an example file/directory
    fn is_example_path(&self, path: &Path) -> bool {
        let path_str = path.to_string_lossy();
        path_str.contains("/examples/")
            || path_str.contains("/example/")
    }
}

/// Get or parse an AST from the cache
pub fn get_or_parse(
    cache: &AstCache,
    path: &Path,
) -> Result<syn::File> {
    let modified = std::fs::metadata(path)
        .and_then(|m| m.modified())
        .and_then(|t| Ok(t.duration_since(SystemTime::UNIX_EPOCH).map(|d| d.as_secs()).unwrap_or(0)))
        .unwrap_or(0);

    // Check cache (lock-free read via DashMap)
    if let Some(entry) = cache.get(path) {
        let (cached_time, ref ast) = *entry;
        if cached_time == modified {
            return Ok(ast.clone());
        }
    }

    // Parse and cache (lock-free insert via DashMap)
    let content = std::fs::read_to_string(path).map_err(|e| SecretraceError::FileRead {
        path: path.to_path_buf(),
        source: e,
    })?;

    let ast = syn::parse_file(&content).map_err(|e| SecretraceError::Parse {
        path: path.to_path_buf(),
        source: e,
    })?;

    cache.insert(path.to_path_buf(), (modified, ast.clone()));

    Ok(ast)
}

/// Clear expired entries from the cache
pub fn clear_expired(cache: &AstCache) {
    let mut to_remove = Vec::new();

    for entry in cache.iter() {
        let path = entry.key();
        let (cached_time, _) = entry.value();

        // Check if file still exists and hasn't changed
        let current_time = std::fs::metadata(path)
            .and_then(|m| m.modified())
            .and_then(|t| Ok(t.duration_since(SystemTime::UNIX_EPOCH).map(|d| d.as_secs()).unwrap_or(0)))
            .unwrap_or(0);

        if current_time != *cached_time {
            to_remove.push(path.clone());
        }
    }

    for path in to_remove {
        cache.remove(&path);
    }
}

#[cfg(test)]
mod tests {
    use super::*;
    use std::io::Write;
    use tempfile::TempDir;

    fn create_test_tree() -> TempDir {
        let dir = TempDir::new().unwrap();

        // Create src directory with files
        std::fs::create_dir_all(dir.path().join("src")).unwrap();
        std::fs::write(dir.path().join("src/main.rs"), "fn main() {}").unwrap();
        std::fs::write(dir.path().join("src/lib.rs"), "pub fn foo() {}").unwrap();

        // Create tests directory
        std::fs::create_dir_all(dir.path().join("tests")).unwrap();
        std::fs::write(dir.path().join("tests/integration.rs"), "#[test] fn t() {}").unwrap();

        // Create examples directory
        std::fs::create_dir_all(dir.path().join("examples")).unwrap();
        std::fs::write(dir.path().join("examples/basic.rs"), "fn main() {}").unwrap();

        // Create target directory (should be skipped)
        std::fs::create_dir_all(dir.path().join("target/debug")).unwrap();
        std::fs::write(dir.path().join("target/debug/build.rs"), "// generated").unwrap();

        dir
    }

    #[test]
    fn test_walk_basic() {
        let dir = create_test_tree();
        let walker = FileWalker::new(dir.path());
        let files = walker.walk().unwrap();

        // Should find src files but not test/example/target
        assert!(files.iter().any(|p| p.ends_with("main.rs")));
        assert!(files.iter().any(|p| p.ends_with("lib.rs")));
        assert!(!files.iter().any(|p| p.to_string_lossy().contains("tests")));
        assert!(!files.iter().any(|p| p.to_string_lossy().contains("examples")));
        assert!(!files.iter().any(|p| p.to_string_lossy().contains("target")));
    }

    #[test]
    fn test_walk_with_tests() {
        let dir = create_test_tree();
        let walker = FileWalker::new(dir.path()).with_include_tests(true);
        let files = walker.walk().unwrap();

        assert!(files.iter().any(|p| p.to_string_lossy().contains("tests")));
    }

    #[test]
    fn test_walk_with_examples() {
        let dir = create_test_tree();
        let walker = FileWalker::new(dir.path()).with_include_examples(true);
        let files = walker.walk().unwrap();

        assert!(files.iter().any(|p| p.to_string_lossy().contains("examples")));
    }

    #[test]
    fn test_ignore_patterns() {
        let dir = create_test_tree();
        let walker = FileWalker::new(dir.path())
            .with_ignore_patterns(&["**/lib.rs".to_string()]);
        let files = walker.walk().unwrap();

        assert!(files.iter().any(|p| p.ends_with("main.rs")));
        assert!(!files.iter().any(|p| p.ends_with("lib.rs")));
    }

    #[test]
    fn test_only_patterns() {
        let dir = create_test_tree();
        let walker = FileWalker::new(dir.path())
            .with_only_patterns(&["**/main.rs".to_string()]);
        let files = walker.walk().unwrap();

        assert!(files.iter().any(|p| p.ends_with("main.rs")));
        assert!(!files.iter().any(|p| p.ends_with("lib.rs")));
    }

    #[test]
    fn test_ast_cache() {
        let dir = TempDir::new().unwrap();
        let file_path = dir.path().join("test.rs");
        std::fs::write(&file_path, "fn foo() {}").unwrap();

        let cache = new_ast_cache();

        // First parse - should cache
        let ast1 = get_or_parse(&cache, &file_path).unwrap();
        assert_eq!(cache.len(), 1);

        // Second parse - should hit cache
        let ast2 = get_or_parse(&cache, &file_path).unwrap();
        assert_eq!(cache.len(), 1);

        // Both should have the same number of items
        assert_eq!(ast1.items.len(), ast2.items.len());
    }
}