1use std::collections::HashSet;
7use std::path::{Path, PathBuf};
8
9use thiserror::Error;
10use tracing::{debug, info, warn};
11use walkdir::WalkDir;
12
13use crate::parser::SupportedLanguage;
14
15#[derive(Debug, Error)]
17pub enum DiscoveryError {
18 #[error("Root path does not exist: {0}")]
19 RootNotFound(PathBuf),
20
21 #[error("IO error: {0}")]
22 Io(#[from] std::io::Error),
23
24 #[error("No code roots found under {0}")]
25 NoRootsFound(PathBuf),
26}
27
28pub type Result<T> = std::result::Result<T, DiscoveryError>;
29
30#[derive(Debug, Clone, PartialEq, Eq)]
32pub enum RootType {
33 GitRepository {
35 remote: Option<String>,
36 branch: Option<String>,
37 commit: Option<String>,
38 },
39 CodeDirectory,
41}
42
43impl RootType {
44 pub fn is_git(&self) -> bool {
46 matches!(self, RootType::GitRepository { .. })
47 }
48}
49
50#[derive(Debug, Clone)]
52pub struct DiscoveredRoot {
53 pub path: PathBuf,
55 pub relative_path: String,
57 pub root_type: RootType,
59 pub name: String,
61}
62
63impl DiscoveredRoot {
64 pub fn is_git(&self) -> bool {
66 self.root_type.is_git()
67 }
68}
69
70#[derive(Debug, Clone)]
72pub struct DiscoveryConfig {
73 pub max_depth: usize,
75 pub exclude_dirs: HashSet<String>,
77 pub include_code_dirs: bool,
79}
80
81impl Default for DiscoveryConfig {
82 fn default() -> Self {
83 let exclude_dirs: HashSet<String> = [
84 "node_modules",
85 "target",
86 "build",
87 "dist",
88 "__pycache__",
89 ".venv",
90 "venv",
91 ".idea",
92 ".vscode",
93 "vendor",
94 "bin",
95 "obj",
96 ".tox",
97 ".mypy_cache",
98 ".pytest_cache",
99 ".coverage",
100 "coverage",
101 ".next",
102 ".nuxt",
103 ]
104 .iter()
105 .map(|s| s.to_string())
106 .collect();
107
108 Self {
109 max_depth: 3,
110 exclude_dirs,
111 include_code_dirs: true,
112 }
113 }
114}
115
116pub struct RootDiscovery {
118 config: DiscoveryConfig,
119}
120
121impl Default for RootDiscovery {
122 fn default() -> Self {
123 Self::with_defaults()
124 }
125}
126
127impl RootDiscovery {
128 pub fn new(config: DiscoveryConfig) -> Self {
130 Self { config }
131 }
132
133 pub fn with_defaults() -> Self {
135 Self::new(DiscoveryConfig::default())
136 }
137
138 pub fn with_max_depth(mut self, max_depth: usize) -> Self {
140 self.config.max_depth = max_depth;
141 self
142 }
143
144 pub fn discover(&self, root_path: &Path) -> Result<Vec<DiscoveredRoot>> {
149 let root_path = root_path
150 .canonicalize()
151 .map_err(|_| DiscoveryError::RootNotFound(root_path.to_path_buf()))?;
152
153 info!("Discovering code roots under {:?}", root_path);
154
155 if self.is_git_repo(&root_path) {
157 info!("Root is a git repository");
158 return Ok(vec![self.create_discovered_root(&root_path, &root_path)?]);
159 }
160
161 if self.has_source_files(&root_path) && !self.has_discoverable_subdirs(&root_path) {
163 info!("Root is a code directory");
164 return Ok(vec![self.create_discovered_root(&root_path, &root_path)?]);
165 }
166
167 let mut roots = Vec::new();
168 let mut discovered_paths: HashSet<PathBuf> = HashSet::new();
169
170 for entry in WalkDir::new(&root_path)
172 .max_depth(self.config.max_depth)
173 .into_iter()
174 .filter_entry(|e| {
175 if !e.file_type().is_dir() {
176 return true;
177 }
178 if e.depth() == 0 {
181 return true;
182 }
183 let name = e.file_name().to_string_lossy();
184 !name.starts_with('.') && !self.config.exclude_dirs.contains(name.as_ref())
186 })
187 {
188 let entry = match entry {
189 Ok(e) => e,
190 Err(e) => {
191 warn!("Error walking directory: {}", e);
192 continue;
193 }
194 };
195
196 if !entry.file_type().is_dir() {
197 continue;
198 }
199
200 let path = entry.path();
201
202 if path == root_path {
204 continue;
205 }
206
207 if discovered_paths.iter().any(|p| path.starts_with(p)) {
209 continue;
210 }
211
212 if self.is_git_repo(path) {
214 debug!("Found git repository: {:?}", path);
215 if let Ok(root) = self.create_discovered_root(path, &root_path) {
216 discovered_paths.insert(path.to_path_buf());
217 roots.push(root);
218 }
219 continue;
220 }
221
222 if self.config.include_code_dirs {
224 let is_nested = roots.iter().any(|r| path.starts_with(&r.path));
227 if !is_nested && self.has_source_files(path) {
228 if !self.has_git_subdirs(path) {
231 debug!("Found code directory: {:?}", path);
232 if let Ok(root) = self.create_discovered_root(path, &root_path) {
233 roots.push(root);
234 }
235 }
236 }
237 }
238 }
239
240 if roots.is_empty() && self.has_source_files(&root_path) {
242 info!("No sub-roots found, treating root as code directory");
243 roots.push(self.create_discovered_root(&root_path, &root_path)?);
244 }
245
246 if roots.is_empty() {
247 return Err(DiscoveryError::NoRootsFound(root_path));
248 }
249
250 roots.sort_by(|a, b| a.path.cmp(&b.path));
252
253 info!("Discovered {} code root(s)", roots.len());
254 for root in &roots {
255 info!(
256 " - {} ({:?}) at {}",
257 root.name,
258 if root.is_git() { "git" } else { "code" },
259 root.relative_path
260 );
261 }
262
263 Ok(roots)
264 }
265
266 fn is_git_repo(&self, path: &Path) -> bool {
268 path.join(".git").exists()
269 }
270
271 fn has_discoverable_subdirs(&self, path: &Path) -> bool {
273 if let Ok(entries) = std::fs::read_dir(path) {
274 for entry in entries.flatten() {
275 if entry.file_type().map(|t| t.is_dir()).unwrap_or(false) {
276 let name = entry.file_name().to_string_lossy().to_string();
277 if !name.starts_with('.') && !self.config.exclude_dirs.contains(&name) {
278 return true;
279 }
280 }
281 }
282 }
283 false
284 }
285
286 fn has_git_subdirs(&self, path: &Path) -> bool {
288 if let Ok(entries) = std::fs::read_dir(path) {
289 for entry in entries.flatten() {
290 if entry.file_type().map(|t| t.is_dir()).unwrap_or(false) {
291 let subpath = entry.path();
292 if self.is_git_repo(&subpath) {
293 return true;
294 }
295 }
296 }
297 }
298 false
299 }
300
301 fn has_source_files(&self, path: &Path) -> bool {
303 if let Ok(entries) = std::fs::read_dir(path) {
304 for entry in entries.flatten() {
305 let entry_path = entry.path();
306 if entry_path.is_file() && SupportedLanguage::from_path(&entry_path).is_some() {
307 return true;
308 }
309 }
310 }
311 false
312 }
313
314 fn create_discovered_root(&self, path: &Path, root_path: &Path) -> Result<DiscoveredRoot> {
316 let relative_path = path
317 .strip_prefix(root_path)
318 .map(|p| {
319 let s = p.to_string_lossy().to_string();
320 if s.is_empty() { ".".to_string() } else { s }
321 })
322 .unwrap_or_else(|_| ".".to_string());
323
324 let name = path
325 .file_name()
326 .map(|n| n.to_string_lossy().to_string())
327 .unwrap_or_else(|| {
328 root_path
330 .file_name()
331 .map(|n| n.to_string_lossy().to_string())
332 .unwrap_or_else(|| "root".to_string())
333 });
334
335 let root_type = if self.is_git_repo(path) {
336 let git_info = extract_git_metadata(path);
337 RootType::GitRepository {
338 remote: git_info.0,
339 branch: git_info.1,
340 commit: git_info.2,
341 }
342 } else {
343 RootType::CodeDirectory
344 };
345
346 Ok(DiscoveredRoot {
347 path: path.to_path_buf(),
348 relative_path,
349 root_type,
350 name,
351 })
352 }
353}
354
355fn extract_git_metadata(repo_path: &Path) -> (Option<String>, Option<String>, Option<String>) {
357 let git_dir = repo_path.join(".git");
358 if !git_dir.exists() {
359 return (None, None, None);
360 }
361
362 let remote = std::process::Command::new("git")
364 .args(["remote", "get-url", "origin"])
365 .current_dir(repo_path)
366 .output()
367 .ok()
368 .filter(|o| o.status.success())
369 .map(|o| String::from_utf8_lossy(&o.stdout).trim().to_string())
370 .filter(|s| !s.is_empty());
371
372 let branch = std::process::Command::new("git")
374 .args(["rev-parse", "--abbrev-ref", "HEAD"])
375 .current_dir(repo_path)
376 .output()
377 .ok()
378 .filter(|o| o.status.success())
379 .map(|o| String::from_utf8_lossy(&o.stdout).trim().to_string())
380 .filter(|s| !s.is_empty());
381
382 let commit = std::process::Command::new("git")
384 .args(["rev-parse", "HEAD"])
385 .current_dir(repo_path)
386 .output()
387 .ok()
388 .filter(|o| o.status.success())
389 .map(|o| String::from_utf8_lossy(&o.stdout).trim().to_string())
390 .filter(|s| !s.is_empty());
391
392 (remote, branch, commit)
393}
394
395#[cfg(test)]
396mod tests {
397 use super::*;
398 use tempfile::TempDir;
399
400 #[test]
401 fn test_discovery_config_defaults() {
402 let config = DiscoveryConfig::default();
403 assert_eq!(config.max_depth, 3);
404 assert!(config.exclude_dirs.contains("node_modules"));
405 assert!(config.exclude_dirs.contains("target"));
406 assert!(config.include_code_dirs);
407 }
408
409 #[test]
410 fn test_is_git_repo() {
411 let temp = TempDir::new().unwrap();
412 let discovery = RootDiscovery::with_defaults();
413
414 assert!(!discovery.is_git_repo(temp.path()));
415
416 std::fs::create_dir(temp.path().join(".git")).unwrap();
417 assert!(discovery.is_git_repo(temp.path()));
418 }
419
420 #[test]
421 fn test_has_source_files() {
422 let temp = TempDir::new().unwrap();
423 let discovery = RootDiscovery::with_defaults();
424
425 assert!(!discovery.has_source_files(temp.path()));
427
428 std::fs::write(temp.path().join("readme.txt"), "hello").unwrap();
430 assert!(!discovery.has_source_files(temp.path()));
431
432 std::fs::write(temp.path().join("main.py"), "print('hello')").unwrap();
434 assert!(discovery.has_source_files(temp.path()));
435 }
436
437 #[test]
438 fn test_discover_single_git_repo() {
439 let temp = TempDir::new().unwrap();
440
441 std::fs::create_dir(temp.path().join(".git")).unwrap();
443 std::fs::write(temp.path().join("main.py"), "print('hello')").unwrap();
444
445 let discovery = RootDiscovery::with_defaults();
446 let roots = discovery.discover(temp.path()).unwrap();
447
448 assert_eq!(roots.len(), 1);
449 assert!(roots[0].is_git());
450 assert_eq!(roots[0].relative_path, ".");
451 }
452
453 #[test]
454 fn test_discover_multiple_git_repos() {
455 let temp = TempDir::new().unwrap();
456
457 let repo_a = temp.path().join("repo-a");
459 let repo_b = temp.path().join("repo-b");
460
461 std::fs::create_dir_all(repo_a.join(".git")).unwrap();
462 std::fs::write(repo_a.join("main.py"), "# repo a").unwrap();
463
464 std::fs::create_dir_all(repo_b.join(".git")).unwrap();
465 std::fs::write(repo_b.join("main.rs"), "fn main() {}").unwrap();
466
467 let discovery = RootDiscovery::with_defaults();
468 let roots = discovery.discover(temp.path()).unwrap();
469
470 assert_eq!(roots.len(), 2);
471 assert!(roots.iter().any(|r| r.name == "repo-a"));
472 assert!(roots.iter().any(|r| r.name == "repo-b"));
473 }
474
475 #[test]
476 fn test_discover_code_directory() {
477 let temp = TempDir::new().unwrap();
478
479 std::fs::write(temp.path().join("main.py"), "print('hello')").unwrap();
481
482 let discovery = RootDiscovery::with_defaults();
483 let roots = discovery.discover(temp.path()).unwrap();
484
485 assert_eq!(roots.len(), 1);
486 assert!(!roots[0].is_git());
487 assert_eq!(roots[0].root_type, RootType::CodeDirectory);
488 }
489
490 #[test]
491 fn test_discover_mixed_roots() {
492 let temp = TempDir::new().unwrap();
493
494 let git_repo = temp.path().join("git-project");
496 std::fs::create_dir_all(git_repo.join(".git")).unwrap();
497 std::fs::write(git_repo.join("main.py"), "# git project").unwrap();
498
499 let code_dir = temp.path().join("scripts");
501 std::fs::create_dir_all(&code_dir).unwrap();
502 std::fs::write(code_dir.join("util.py"), "# utilities").unwrap();
503
504 let discovery = RootDiscovery::with_defaults();
505 let roots = discovery.discover(temp.path()).unwrap();
506
507 assert_eq!(roots.len(), 2);
508
509 let git_root = roots.iter().find(|r| r.name == "git-project").unwrap();
510 assert!(git_root.is_git());
511
512 let code_root = roots.iter().find(|r| r.name == "scripts").unwrap();
513 assert!(!code_root.is_git());
514 }
515
516 #[test]
517 fn test_discover_skips_nested_repos() {
518 let temp = TempDir::new().unwrap();
519
520 std::fs::create_dir(temp.path().join(".git")).unwrap();
522 std::fs::write(temp.path().join("main.py"), "# parent").unwrap();
523
524 let nested = temp.path().join("nested");
526 std::fs::create_dir_all(nested.join(".git")).unwrap();
527 std::fs::write(nested.join("lib.py"), "# nested").unwrap();
528
529 let discovery = RootDiscovery::with_defaults();
530 let roots = discovery.discover(temp.path()).unwrap();
531
532 assert_eq!(roots.len(), 1);
534 assert_eq!(roots[0].relative_path, ".");
535 }
536
537 #[test]
538 fn test_discover_skips_excluded_dirs() {
539 let temp = TempDir::new().unwrap();
540
541 let node_modules = temp.path().join("node_modules").join("some-package");
543 std::fs::create_dir_all(&node_modules).unwrap();
544 std::fs::write(node_modules.join("index.js"), "// package").unwrap();
545
546 std::fs::write(temp.path().join("app.js"), "// app").unwrap();
548
549 let discovery = RootDiscovery::with_defaults();
550 let roots = discovery.discover(temp.path()).unwrap();
551
552 assert_eq!(roots.len(), 1);
553 assert_eq!(roots[0].relative_path, ".");
554 }
555
556 #[test]
557 fn test_root_type_is_git() {
558 let git_type = RootType::GitRepository {
559 remote: Some("origin".to_string()),
560 branch: Some("main".to_string()),
561 commit: None,
562 };
563 assert!(git_type.is_git());
564
565 let code_type = RootType::CodeDirectory;
566 assert!(!code_type.is_git());
567 }
568
569 #[test]
570 fn test_with_max_depth() {
571 let discovery = RootDiscovery::with_defaults().with_max_depth(5);
572 assert_eq!(discovery.config.max_depth, 5);
573 }
574
575 #[test]
576 fn test_no_roots_found_error() {
577 let temp = TempDir::new().unwrap();
578
579 let discovery = RootDiscovery::with_defaults();
581 let result = discovery.discover(temp.path());
582
583 assert!(matches!(result, Err(DiscoveryError::NoRootsFound(_))));
584 }
585}