Skip to main content

rma_parser/
walker.rs

1//! File system walker for discovering source files
2
3use anyhow::Result;
4use ignore::WalkBuilder;
5use rma_common::{Language, RmaConfig};
6use std::path::{Path, PathBuf};
7use tracing::{debug, trace};
8
9/// Collect all source files from a directory tree
10pub fn collect_files(root: &Path, config: &RmaConfig) -> Result<Vec<PathBuf>> {
11    let mut files = Vec::new();
12
13    let walker = WalkBuilder::new(root)
14        .hidden(true) // Skip hidden files by default
15        .git_ignore(true) // Respect .gitignore
16        .git_global(true)
17        .git_exclude(true)
18        .follow_links(false)
19        .build();
20
21    let supported_extensions: Vec<&str> = if config.languages.is_empty() {
22        // All supported languages
23        vec![
24            "rs", "js", "mjs", "cjs", "ts", "tsx", "py", "pyi", "go", "java",
25        ]
26    } else {
27        config
28            .languages
29            .iter()
30            .flat_map(|l| l.extensions().iter().copied())
31            .collect()
32    };
33
34    for entry in walker.filter_map(|e| e.ok()) {
35        let path = entry.path();
36
37        // Skip directories
38        if path.is_dir() {
39            continue;
40        }
41
42        // Check extension
43        let ext = path.extension().and_then(|e| e.to_str()).unwrap_or("");
44
45        if !supported_extensions.contains(&ext) {
46            trace!("Skipping unsupported extension: {}", path.display());
47            continue;
48        }
49
50        // Check exclude patterns
51        let path_str = path.to_string_lossy();
52        let excluded = config
53            .exclude_patterns
54            .iter()
55            .any(|pattern| matches_exclude(pattern, &path_str));
56
57        if excluded {
58            debug!("Excluded by pattern: {}", path.display());
59            continue;
60        }
61
62        files.push(path.to_path_buf());
63    }
64
65    // Sort for deterministic ordering
66    files.sort();
67
68    Ok(files)
69}
70
71/// Check if a path should be excluded based on patterns
72pub fn is_excluded(path: &Path, patterns: &[String]) -> bool {
73    let path_str = path.to_string_lossy();
74    patterns
75        .iter()
76        .any(|pattern| matches_exclude(pattern, &path_str))
77}
78
79/// Match an exclude pattern against a path string.
80///
81/// Supports `**` for recursive directory matching (which `glob::Pattern` does not).
82/// Patterns like `foo/**` become a prefix check on `foo/`.
83/// Patterns like `**/foo` become a suffix/contains check on `/foo`.
84fn matches_exclude(pattern: &str, path: &str) -> bool {
85    if pattern.contains("**") {
86        // "dir/**" → anything under dir/
87        if let Some(prefix) = pattern.strip_suffix("/**") {
88            return path.contains(&format!("{prefix}/"));
89        }
90        // "**/suffix" → anything ending with /suffix or matching suffix
91        if let Some(suffix) = pattern.strip_prefix("**/") {
92            return path.ends_with(suffix) || path.contains(&format!("/{suffix}"));
93        }
94        // General ** — split and check segments
95        let parts: Vec<&str> = pattern.split("**").collect();
96        if parts.len() == 2 {
97            return path.contains(parts[0]) && path.contains(parts[1]);
98        }
99    }
100
101    // Fall back to glob::Pattern for simple patterns
102    glob::Pattern::new(pattern)
103        .map(|p| {
104            p.matches_with(
105                path,
106                glob::MatchOptions {
107                    case_sensitive: true,
108                    require_literal_separator: false,
109                    require_literal_leading_dot: false,
110                },
111            )
112        })
113        .unwrap_or(false)
114}
115
116/// Get language stats from a list of files
117pub fn language_stats(files: &[PathBuf]) -> std::collections::HashMap<Language, usize> {
118    let mut stats = std::collections::HashMap::new();
119
120    for file in files {
121        let ext = file.extension().and_then(|e| e.to_str()).unwrap_or("");
122        let lang = Language::from_extension(ext);
123        *stats.entry(lang).or_insert(0) += 1;
124    }
125
126    stats
127}
128
129#[cfg(test)]
130mod tests {
131    use super::*;
132    use std::fs;
133    use tempfile::TempDir;
134
135    #[test]
136    fn test_collect_files() {
137        let temp = TempDir::new().unwrap();
138
139        // Create test files
140        fs::write(temp.path().join("test.rs"), "fn main() {}").unwrap();
141        fs::write(temp.path().join("test.py"), "def main(): pass").unwrap();
142        fs::write(temp.path().join("test.txt"), "ignored").unwrap();
143
144        let config = RmaConfig::default();
145        let files = collect_files(temp.path(), &config).unwrap();
146
147        assert_eq!(files.len(), 2);
148        assert!(files.iter().any(|p| p.extension().unwrap() == "rs"));
149        assert!(files.iter().any(|p| p.extension().unwrap() == "py"));
150    }
151
152    #[test]
153    fn test_language_stats() {
154        let files = vec![
155            PathBuf::from("a.rs"),
156            PathBuf::from("b.rs"),
157            PathBuf::from("c.py"),
158            PathBuf::from("d.js"),
159        ];
160
161        let stats = language_stats(&files);
162
163        assert_eq!(stats.get(&Language::Rust), Some(&2));
164        assert_eq!(stats.get(&Language::Python), Some(&1));
165        assert_eq!(stats.get(&Language::JavaScript), Some(&1));
166    }
167}