Skip to main content

rma_parser/
walker.rs

1//! File system walker for discovering source files
2
3use anyhow::Result;
4use ignore::WalkBuilder;
5use rma_common::{Language, RmaConfig};
6use std::path::{Path, PathBuf};
7use tracing::{debug, trace};
8
9/// Collect all source files from a directory tree
10pub fn collect_files(root: &Path, config: &RmaConfig) -> Result<Vec<PathBuf>> {
11    let mut files = Vec::new();
12
13    let walker = WalkBuilder::new(root)
14        .hidden(true) // Skip hidden files by default
15        .git_ignore(true) // Respect .gitignore
16        .git_global(true)
17        .git_exclude(true)
18        .follow_links(false)
19        .build();
20
21    let supported_extensions: Vec<&str> = if config.languages.is_empty() {
22        // All supported languages
23        vec![
24            "rs", "js", "mjs", "cjs", "ts", "tsx", "py", "pyi", "go", "java",
25        ]
26    } else {
27        config
28            .languages
29            .iter()
30            .flat_map(|l| l.extensions().iter().copied())
31            .collect()
32    };
33
34    for entry in walker.filter_map(|e| e.ok()) {
35        let path = entry.path();
36
37        // Skip directories
38        if path.is_dir() {
39            continue;
40        }
41
42        // Check extension
43        let ext = path.extension().and_then(|e| e.to_str()).unwrap_or("");
44
45        if !supported_extensions.contains(&ext) {
46            trace!("Skipping unsupported extension: {}", path.display());
47            continue;
48        }
49
50        // Check exclude patterns
51        let path_str = path.to_string_lossy();
52        let excluded = config.exclude_patterns.iter().any(|pattern| {
53            glob::Pattern::new(pattern)
54                .map(|p| p.matches(&path_str))
55                .unwrap_or(false)
56        });
57
58        if excluded {
59            debug!("Excluded by pattern: {}", path.display());
60            continue;
61        }
62
63        files.push(path.to_path_buf());
64    }
65
66    // Sort for deterministic ordering
67    files.sort();
68
69    Ok(files)
70}
71
72/// Check if a path should be excluded based on patterns
73pub fn is_excluded(path: &Path, patterns: &[String]) -> bool {
74    let path_str = path.to_string_lossy();
75    patterns.iter().any(|pattern| {
76        glob::Pattern::new(pattern)
77            .map(|p| p.matches(&path_str))
78            .unwrap_or(false)
79    })
80}
81
82/// Get language stats from a list of files
83pub fn language_stats(files: &[PathBuf]) -> std::collections::HashMap<Language, usize> {
84    let mut stats = std::collections::HashMap::new();
85
86    for file in files {
87        let ext = file.extension().and_then(|e| e.to_str()).unwrap_or("");
88        let lang = Language::from_extension(ext);
89        *stats.entry(lang).or_insert(0) += 1;
90    }
91
92    stats
93}
94
95#[cfg(test)]
96mod tests {
97    use super::*;
98    use std::fs;
99    use tempfile::TempDir;
100
101    #[test]
102    fn test_collect_files() {
103        let temp = TempDir::new().unwrap();
104
105        // Create test files
106        fs::write(temp.path().join("test.rs"), "fn main() {}").unwrap();
107        fs::write(temp.path().join("test.py"), "def main(): pass").unwrap();
108        fs::write(temp.path().join("test.txt"), "ignored").unwrap();
109
110        let config = RmaConfig::default();
111        let files = collect_files(temp.path(), &config).unwrap();
112
113        assert_eq!(files.len(), 2);
114        assert!(files.iter().any(|p| p.extension().unwrap() == "rs"));
115        assert!(files.iter().any(|p| p.extension().unwrap() == "py"));
116    }
117
118    #[test]
119    fn test_language_stats() {
120        let files = vec![
121            PathBuf::from("a.rs"),
122            PathBuf::from("b.rs"),
123            PathBuf::from("c.py"),
124            PathBuf::from("d.js"),
125        ];
126
127        let stats = language_stats(&files);
128
129        assert_eq!(stats.get(&Language::Rust), Some(&2));
130        assert_eq!(stats.get(&Language::Python), Some(&1));
131        assert_eq!(stats.get(&Language::JavaScript), Some(&1));
132    }
133}