newt_coder/
workspace_scan.rs1use std::path::{Path, PathBuf};
21
22use crate::error::{CoderError, Result};
23
24const SCAN_EXTENSIONS: &[&str] = &[
28 "rs", "toml", "py", "js", "ts", "go", "java", "c", "h", "cpp", "hpp", "md",
29];
30
31const SKIP_DIRS: &[&str] = &["target", "node_modules"];
34
35pub fn scan_workspace_for_files(workspace: &Path, task: &str) -> Result<Vec<PathBuf>> {
37 let all = scan_all_source_files(workspace)?;
38 let mentioned = filter_mentioned(&all, task);
39 if !mentioned.is_empty() {
40 Ok(mentioned)
41 } else {
42 Ok(all)
43 }
44}
45
46fn filter_mentioned(all: &[PathBuf], task: &str) -> Vec<PathBuf> {
51 let mut hits = Vec::new();
52 for path in all {
53 let rel = path.display().to_string();
54 let stem = path.file_stem().and_then(|s| s.to_str()).unwrap_or("");
55 let fname = path.file_name().and_then(|s| s.to_str()).unwrap_or("");
56 if task.contains(&rel)
57 || (!fname.is_empty() && task.contains(fname))
58 || (!stem.is_empty() && task.contains(stem))
59 {
60 hits.push(path.clone());
61 }
62 }
63 hits
64}
65
66fn scan_all_source_files(workspace: &Path) -> Result<Vec<PathBuf>> {
69 let mut out = Vec::new();
70 walk(workspace, workspace, &mut out)?;
71 out.sort();
72 Ok(out)
73}
74
75fn walk(root: &Path, dir: &Path, out: &mut Vec<PathBuf>) -> Result<()> {
76 let entries = std::fs::read_dir(dir)
77 .map_err(|e| CoderError::Workspace(format!("read_dir {}: {e}", dir.display())))?;
78 for entry in entries {
79 let entry = entry?;
80 let path = entry.path();
81 let name = entry.file_name();
82 let name_str = name.to_string_lossy();
83
84 if name_str.starts_with('.') || SKIP_DIRS.contains(&name_str.as_ref()) {
85 continue;
86 }
87
88 if path.is_dir() {
89 walk(root, &path, out)?;
90 } else if path.is_file() {
91 if let Some(ext) = path.extension().and_then(|s| s.to_str()) {
92 if SCAN_EXTENSIONS.contains(&ext) {
93 let rel = path.strip_prefix(root).map_err(|e| {
94 CoderError::Workspace(format!("strip_prefix {}: {e}", path.display()))
95 })?;
96 out.push(rel.to_path_buf());
97 }
98 }
99 }
100 }
101 Ok(())
102}
103
104#[cfg(test)]
105mod tests {
106 use super::*;
107 use std::fs;
108 use tempfile::TempDir;
109
110 fn write(dir: &Path, rel: &str, contents: &str) {
111 let abs = dir.join(rel);
112 if let Some(parent) = abs.parent() {
113 fs::create_dir_all(parent).unwrap();
114 }
115 fs::write(abs, contents).unwrap();
116 }
117
118 #[test]
119 fn finds_rust_sources_and_skips_target() {
120 let tmp = TempDir::new().unwrap();
121 write(tmp.path(), "src/lib.rs", "pub fn x() {}\n");
122 write(tmp.path(), "src/main.rs", "fn main() {}\n");
123 write(tmp.path(), "target/debug/junk.rs", "fn junk() {}\n");
125
126 let files = scan_all_source_files(tmp.path()).unwrap();
127 let paths: Vec<String> = files.iter().map(|p| p.display().to_string()).collect();
128 assert!(paths.contains(&"src/lib.rs".to_string()));
129 assert!(paths.contains(&"src/main.rs".to_string()));
130 assert!(
131 !paths.iter().any(|p| p.starts_with("target/")),
132 "target/ leaked into the scan: {paths:?}"
133 );
134 }
135
136 #[test]
137 fn skips_hidden_dirs() {
138 let tmp = TempDir::new().unwrap();
139 write(tmp.path(), "src/lib.rs", "pub fn x() {}\n");
140 write(tmp.path(), ".git/config", "[core]\n");
141 write(tmp.path(), ".hidden/file.rs", "fn x() {}\n");
142
143 let files = scan_all_source_files(tmp.path()).unwrap();
144 let paths: Vec<String> = files.iter().map(|p| p.display().to_string()).collect();
145 assert!(paths.contains(&"src/lib.rs".to_string()));
146 assert!(
147 !paths
148 .iter()
149 .any(|p| p.starts_with(".git/") || p.starts_with(".hidden/")),
150 "hidden dir leaked: {paths:?}"
151 );
152 }
153
154 #[test]
155 fn scan_prefers_mentioned_files() {
156 let tmp = TempDir::new().unwrap();
157 write(tmp.path(), "src/lib.rs", "pub fn greet() {}\n");
158 write(tmp.path(), "src/other.rs", "pub fn other() {}\n");
159 write(tmp.path(), "Cargo.toml", "[package]\n");
160
161 let hits = scan_workspace_for_files(tmp.path(), "Rename greet in src/lib.rs").unwrap();
162 let paths: Vec<String> = hits.iter().map(|p| p.display().to_string()).collect();
163 assert!(paths.contains(&"src/lib.rs".to_string()));
164 assert!(
166 !paths.contains(&"src/other.rs".to_string()),
167 "unrelated file leaked into mentioned-only scan: {paths:?}"
168 );
169 }
170
171 #[test]
172 fn scan_falls_back_to_all_when_nothing_mentioned() {
173 let tmp = TempDir::new().unwrap();
174 write(tmp.path(), "src/lib.rs", "pub fn a() {}\n");
175 write(tmp.path(), "src/other.rs", "pub fn b() {}\n");
176
177 let hits = scan_workspace_for_files(tmp.path(), "Add a license header everywhere").unwrap();
179 let paths: Vec<String> = hits.iter().map(|p| p.display().to_string()).collect();
180 assert!(paths.contains(&"src/lib.rs".to_string()));
181 assert!(paths.contains(&"src/other.rs".to_string()));
182 }
183
184 #[test]
185 fn ignores_unknown_extensions() {
186 let tmp = TempDir::new().unwrap();
187 write(tmp.path(), "src/lib.rs", "pub fn x() {}\n");
188 write(tmp.path(), "binary.bin", "junk");
189 write(tmp.path(), "image.png", "fake png bytes");
190
191 let files = scan_all_source_files(tmp.path()).unwrap();
192 let paths: Vec<String> = files.iter().map(|p| p.display().to_string()).collect();
193 assert!(paths.contains(&"src/lib.rs".to_string()));
194 assert!(!paths
195 .iter()
196 .any(|p| p.ends_with(".bin") || p.ends_with(".png")));
197 }
198
199 #[test]
200 fn file_stem_match_triggers_mention() {
201 let tmp = TempDir::new().unwrap();
202 write(tmp.path(), "src/parser.rs", "pub fn parse() {}\n");
203
204 let hits =
206 scan_workspace_for_files(tmp.path(), "Update the parser to handle commas").unwrap();
207 let paths: Vec<String> = hits.iter().map(|p| p.display().to_string()).collect();
208 assert!(paths.contains(&"src/parser.rs".to_string()));
209 }
210}