1use anyhow::Result;
4use ignore::WalkBuilder;
5use rma_common::{Language, RmaConfig};
6use std::path::{Path, PathBuf};
7use tracing::{debug, trace};
8
9pub fn collect_files(root: &Path, config: &RmaConfig) -> Result<Vec<PathBuf>> {
11 let mut files = Vec::new();
12
13 let walker = WalkBuilder::new(root)
14 .hidden(true) .git_ignore(true) .git_global(true)
17 .git_exclude(true)
18 .follow_links(false)
19 .build();
20
21 let supported_extensions: Vec<&str> = if config.languages.is_empty() {
22 vec![
24 "rs", "js", "mjs", "cjs", "ts", "tsx", "py", "pyi", "go", "java",
25 ]
26 } else {
27 config
28 .languages
29 .iter()
30 .flat_map(|l| l.extensions().iter().copied())
31 .collect()
32 };
33
34 for entry in walker.filter_map(|e| e.ok()) {
35 let path = entry.path();
36
37 if path.is_dir() {
39 continue;
40 }
41
42 let ext = path.extension().and_then(|e| e.to_str()).unwrap_or("");
44
45 if !supported_extensions.contains(&ext) {
46 trace!("Skipping unsupported extension: {}", path.display());
47 continue;
48 }
49
50 let path_str = path.to_string_lossy();
52 let excluded = config
53 .exclude_patterns
54 .iter()
55 .any(|pattern| matches_exclude(pattern, &path_str));
56
57 if excluded {
58 debug!("Excluded by pattern: {}", path.display());
59 continue;
60 }
61
62 files.push(path.to_path_buf());
63 }
64
65 files.sort();
67
68 Ok(files)
69}
70
71pub fn is_excluded(path: &Path, patterns: &[String]) -> bool {
73 let path_str = path.to_string_lossy();
74 patterns
75 .iter()
76 .any(|pattern| matches_exclude(pattern, &path_str))
77}
78
79fn matches_exclude(pattern: &str, path: &str) -> bool {
85 if pattern.contains("**") {
86 if let Some(prefix) = pattern.strip_suffix("/**") {
88 return path.contains(&format!("{prefix}/"));
89 }
90 if let Some(suffix) = pattern.strip_prefix("**/") {
92 return path.ends_with(suffix) || path.contains(&format!("/{suffix}"));
93 }
94 let parts: Vec<&str> = pattern.split("**").collect();
96 if parts.len() == 2 {
97 return path.contains(parts[0]) && path.contains(parts[1]);
98 }
99 }
100
101 glob::Pattern::new(pattern)
103 .map(|p| {
104 p.matches_with(
105 path,
106 glob::MatchOptions {
107 case_sensitive: true,
108 require_literal_separator: false,
109 require_literal_leading_dot: false,
110 },
111 )
112 })
113 .unwrap_or(false)
114}
115
116pub fn language_stats(files: &[PathBuf]) -> std::collections::HashMap<Language, usize> {
118 let mut stats = std::collections::HashMap::new();
119
120 for file in files {
121 let ext = file.extension().and_then(|e| e.to_str()).unwrap_or("");
122 let lang = Language::from_extension(ext);
123 *stats.entry(lang).or_insert(0) += 1;
124 }
125
126 stats
127}
128
129#[cfg(test)]
130mod tests {
131 use super::*;
132 use std::fs;
133 use tempfile::TempDir;
134
135 #[test]
136 fn test_collect_files() {
137 let temp = TempDir::new().unwrap();
138
139 fs::write(temp.path().join("test.rs"), "fn main() {}").unwrap();
141 fs::write(temp.path().join("test.py"), "def main(): pass").unwrap();
142 fs::write(temp.path().join("test.txt"), "ignored").unwrap();
143
144 let config = RmaConfig::default();
145 let files = collect_files(temp.path(), &config).unwrap();
146
147 assert_eq!(files.len(), 2);
148 assert!(files.iter().any(|p| p.extension().unwrap() == "rs"));
149 assert!(files.iter().any(|p| p.extension().unwrap() == "py"));
150 }
151
152 #[test]
153 fn test_language_stats() {
154 let files = vec![
155 PathBuf::from("a.rs"),
156 PathBuf::from("b.rs"),
157 PathBuf::from("c.py"),
158 PathBuf::from("d.js"),
159 ];
160
161 let stats = language_stats(&files);
162
163 assert_eq!(stats.get(&Language::Rust), Some(&2));
164 assert_eq!(stats.get(&Language::Python), Some(&1));
165 assert_eq!(stats.get(&Language::JavaScript), Some(&1));
166 }
167}