1use std::collections::HashSet;
4use std::io::BufRead;
5use std::path::{Path, PathBuf};
6
7use globset::{Glob, GlobSet, GlobSetBuilder};
8
9use super::error::DiscoveryError;
10
11const MAX_STDIN_PATHS: usize = 100_000;
13
14const MAX_LINE_LENGTH: usize = 4096;
16
17const MAX_GLOB_MATCHES: usize = 100_000;
19
20#[derive(Debug, Clone)]
22pub struct DiscoveryConfig {
23 pub include_patterns: Vec<String>,
25 pub exclude_patterns: Vec<String>,
27 pub max_depth: Option<usize>,
29 pub include_hidden: bool,
31 pub respect_gitignore: bool,
33 pub follow_symlinks: bool,
35}
36
37impl Default for DiscoveryConfig {
38 fn default() -> Self {
39 Self {
40 include_patterns: vec!["*.yaml".into(), "*.yml".into()],
41 exclude_patterns: vec![],
42 max_depth: Some(100),
43 include_hidden: false,
44 respect_gitignore: true,
45 follow_symlinks: false,
46 }
47 }
48}
49
50impl DiscoveryConfig {
51 #[must_use]
53 pub fn new() -> Self {
54 Self::default()
55 }
56
57 #[must_use]
59 pub fn with_include_patterns(mut self, patterns: Vec<String>) -> Self {
60 self.include_patterns = patterns;
61 self
62 }
63
64 #[must_use]
66 pub fn with_exclude_patterns(mut self, patterns: Vec<String>) -> Self {
67 self.exclude_patterns = patterns;
68 self
69 }
70
71 #[must_use]
73 pub const fn with_max_depth(mut self, depth: Option<usize>) -> Self {
74 self.max_depth = depth;
75 self
76 }
77
78 #[must_use]
80 pub const fn with_unlimited_depth(mut self) -> Self {
81 self.max_depth = None;
82 self
83 }
84
85 #[must_use]
87 pub const fn with_hidden(mut self, include: bool) -> Self {
88 self.include_hidden = include;
89 self
90 }
91
92 #[must_use]
94 pub const fn with_gitignore(mut self, respect: bool) -> Self {
95 self.respect_gitignore = respect;
96 self
97 }
98
99 #[must_use]
101 pub const fn with_follow_symlinks(mut self, follow: bool) -> Self {
102 self.follow_symlinks = follow;
103 self
104 }
105}
106
107#[derive(Debug, Clone, Copy, PartialEq, Eq)]
109pub enum DiscoveryOrigin {
110 DirectPath,
112 DirectoryWalk,
114 GlobExpansion,
116 StdinList,
118}
119
120#[derive(Debug, Clone)]
122pub struct DiscoveredFile {
123 pub path: PathBuf,
125 pub origin: DiscoveryOrigin,
127}
128
129#[derive(Debug)]
131pub struct FileDiscovery {
132 config: DiscoveryConfig,
133 include_matcher: GlobSet,
134 exclude_matcher: GlobSet,
135}
136
137impl FileDiscovery {
138 pub fn new(config: DiscoveryConfig) -> Result<Self, DiscoveryError> {
140 let include_matcher = build_globset(&config.include_patterns)?;
141 let exclude_matcher = build_globset(&config.exclude_patterns)?;
142
143 Ok(Self {
144 config,
145 include_matcher,
146 exclude_matcher,
147 })
148 }
149
150 pub fn discover(&self, paths: &[PathBuf]) -> Result<Vec<DiscoveredFile>, DiscoveryError> {
157 let estimated_capacity = paths.len().saturating_mul(10);
159 let mut discovered = Vec::with_capacity(estimated_capacity);
160 let mut seen = HashSet::new();
161
162 for path in paths {
163 if path.exists() {
164 if path.is_file() {
165 self.discover_file(
166 path,
167 DiscoveryOrigin::DirectPath,
168 &mut discovered,
169 &mut seen,
170 )?;
171 } else if path.is_dir() {
172 self.discover_directory(path, &mut discovered, &mut seen);
173 }
174 } else {
175 self.discover_glob(&path.to_string_lossy(), &mut discovered, &mut seen);
177 }
178 }
179
180 Ok(discovered)
181 }
182
183 pub fn discover_from_stdin(&self) -> Result<Vec<DiscoveredFile>, DiscoveryError> {
185 self.discover_from_reader(std::io::stdin().lock())
186 }
187
188 pub fn discover_from_reader<R: BufRead>(
190 &self,
191 reader: R,
192 ) -> Result<Vec<DiscoveredFile>, DiscoveryError> {
193 let mut discovered = Vec::new();
194 let mut seen = HashSet::new();
195 let mut count = 0;
196
197 for line in reader.lines() {
198 let line = line.map_err(|e| DiscoveryError::StdinError { source: e })?;
199
200 count += 1;
201 if count > MAX_STDIN_PATHS {
202 return Err(DiscoveryError::TooManyPaths {
203 max: MAX_STDIN_PATHS,
204 });
205 }
206
207 let trimmed = line.trim();
208
209 if trimmed.len() > MAX_LINE_LENGTH {
210 eprintln!("Warning: skipping line {count} (exceeds {MAX_LINE_LENGTH} chars)");
211 continue;
212 }
213
214 if trimmed.is_empty() || trimmed.starts_with('#') {
216 continue;
217 }
218
219 let path = PathBuf::from(trimmed);
220 if path.is_file() {
221 self.discover_file(
222 &path,
223 DiscoveryOrigin::StdinList,
224 &mut discovered,
225 &mut seen,
226 )?;
227 }
228 }
229
230 Ok(discovered)
231 }
232
233 #[must_use]
235 pub fn should_include(&self, path: &Path) -> bool {
236 if self.exclude_matcher.is_match(path) {
238 return false;
239 }
240
241 path.file_name()
243 .is_some_and(|file_name| self.include_matcher.is_match(file_name))
244 }
245
246 fn discover_file(
247 &self,
248 path: &Path,
249 origin: DiscoveryOrigin,
250 discovered: &mut Vec<DiscoveredFile>,
251 seen: &mut HashSet<PathBuf>,
252 ) -> Result<(), DiscoveryError> {
253 if !self.should_include(path) {
254 return Ok(());
255 }
256
257 let canonical = path.canonicalize().map_err(|e| {
259 if e.kind() == std::io::ErrorKind::NotFound {
260 if path.symlink_metadata().is_ok() {
262 DiscoveryError::BrokenSymlink {
263 path: path.to_path_buf(),
264 }
265 } else {
266 DiscoveryError::PathNotFound {
267 path: path.to_path_buf(),
268 }
269 }
270 } else if e.kind() == std::io::ErrorKind::PermissionDenied {
271 DiscoveryError::PermissionDenied {
272 path: path.to_path_buf(),
273 }
274 } else {
275 DiscoveryError::IoError {
276 path: path.to_path_buf(),
277 source: e,
278 }
279 }
280 })?;
281
282 if seen.insert(canonical.clone()) {
284 discovered.push(DiscoveredFile {
285 path: canonical,
286 origin,
287 });
288 }
289
290 Ok(())
291 }
292
293 fn discover_directory(
294 &self,
295 dir: &Path,
296 discovered: &mut Vec<DiscoveredFile>,
297 seen: &mut HashSet<PathBuf>,
298 ) {
299 let mut builder = ignore::WalkBuilder::new(dir);
300 builder
301 .hidden(!self.config.include_hidden)
302 .git_ignore(self.config.respect_gitignore)
303 .git_global(self.config.respect_gitignore)
304 .git_exclude(self.config.respect_gitignore)
305 .follow_links(self.config.follow_symlinks);
306
307 if let Some(depth) = self.config.max_depth {
308 builder.max_depth(Some(depth));
309 }
310
311 for entry in builder.build() {
312 let entry = match entry {
313 Ok(e) => e,
314 Err(e) => {
315 eprintln!("Warning: failed to read entry: {e}");
317 continue;
318 }
319 };
320
321 if entry.file_type().is_some_and(|ft| ft.is_file()) {
322 let path = entry.path();
323 let _ = self.discover_file(path, DiscoveryOrigin::DirectoryWalk, discovered, seen);
325 }
326 }
327 }
328
329 fn discover_glob(
330 &self,
331 pattern: &str,
332 discovered: &mut Vec<DiscoveredFile>,
333 seen: &mut HashSet<PathBuf>,
334 ) {
335 let Ok(glob) = glob::glob(pattern) else {
336 eprintln!("Warning: invalid glob pattern: {pattern}");
337 return;
338 };
339
340 let mut match_count = 0;
341 for entry in glob {
342 match_count += 1;
343 if match_count > MAX_GLOB_MATCHES {
344 eprintln!(
345 "Warning: glob pattern '{pattern}' exceeded {MAX_GLOB_MATCHES} matches, stopping"
346 );
347 break;
348 }
349
350 match entry {
351 Ok(path) => {
352 if path.is_file() {
353 let _ = self.discover_file(
355 &path,
356 DiscoveryOrigin::GlobExpansion,
357 discovered,
358 seen,
359 );
360 }
361 }
362 Err(e) => {
363 eprintln!("Warning: glob error: {e}");
364 }
365 }
366 }
367 }
368}
369
370fn build_globset(patterns: &[String]) -> Result<GlobSet, DiscoveryError> {
371 let mut builder = GlobSetBuilder::new();
372
373 for pattern in patterns {
374 let glob = Glob::new(pattern).map_err(|e| DiscoveryError::InvalidPattern {
375 pattern: pattern.clone(),
376 source: e,
377 })?;
378 builder.add(glob);
379 }
380
381 builder.build().map_err(|e| DiscoveryError::InvalidPattern {
382 pattern: "<combined>".to_string(),
383 source: e,
384 })
385}
386
387#[cfg(test)]
388mod tests {
389 use super::*;
390 use std::fs;
391 use tempfile::TempDir;
392
393 fn default_config() -> DiscoveryConfig {
394 DiscoveryConfig::new()
395 }
396
397 #[test]
398 fn test_config_default() {
399 let config = DiscoveryConfig::default();
400 assert_eq!(config.include_patterns, vec!["*.yaml", "*.yml"]);
401 assert!(config.exclude_patterns.is_empty());
402 assert_eq!(config.max_depth, Some(100));
403 assert!(!config.include_hidden);
404 assert!(config.respect_gitignore);
405 assert!(!config.follow_symlinks);
406 }
407
408 #[test]
409 fn test_config_builder() {
410 let config = DiscoveryConfig::new()
411 .with_include_patterns(vec!["*.yml".to_string()])
412 .with_exclude_patterns(vec!["**/vendor/**".to_string()])
413 .with_max_depth(Some(5))
414 .with_hidden(true)
415 .with_gitignore(false)
416 .with_follow_symlinks(true);
417
418 assert_eq!(config.include_patterns, vec!["*.yml"]);
419 assert_eq!(config.exclude_patterns, vec!["**/vendor/**"]);
420 assert_eq!(config.max_depth, Some(5));
421 assert!(config.include_hidden);
422 assert!(!config.respect_gitignore);
423 assert!(config.follow_symlinks);
424 }
425
426 #[test]
427 fn test_include_pattern_yaml() {
428 let config = default_config();
429 let discovery = FileDiscovery::new(config).unwrap();
430
431 assert!(discovery.should_include(Path::new("test.yaml")));
432 assert!(discovery.should_include(Path::new("/path/to/test.yaml")));
433 }
434
435 #[test]
436 fn test_include_pattern_yml() {
437 let config = default_config();
438 let discovery = FileDiscovery::new(config).unwrap();
439
440 assert!(discovery.should_include(Path::new("test.yml")));
441 assert!(discovery.should_include(Path::new("/path/to/test.yml")));
442 }
443
444 #[test]
445 fn test_exclude_pattern() {
446 let config = default_config().with_exclude_patterns(vec!["**/vendor/**".to_string()]);
447 let discovery = FileDiscovery::new(config).unwrap();
448
449 assert!(!discovery.should_include(Path::new("vendor/test.yaml")));
450 assert!(!discovery.should_include(Path::new("path/vendor/test.yaml")));
451 assert!(discovery.should_include(Path::new("test.yaml")));
452 }
453
454 #[test]
455 fn test_exclude_vendor() {
456 let config = default_config().with_exclude_patterns(vec!["**/vendor/**".to_string()]);
457 let discovery = FileDiscovery::new(config).unwrap();
458
459 assert!(!discovery.should_include(Path::new("vendor/lib/config.yaml")));
460 assert!(discovery.should_include(Path::new("src/config.yaml")));
461 }
462
463 #[test]
464 fn test_discover_single_file() {
465 let temp = TempDir::new().unwrap();
466 let file = temp.path().join("test.yaml");
467 fs::write(&file, "key: value").unwrap();
468
469 let config = default_config();
470 let discovery = FileDiscovery::new(config).unwrap();
471 let files = discovery.discover(std::slice::from_ref(&file)).unwrap();
472
473 assert_eq!(files.len(), 1);
474 assert_eq!(files[0].origin, DiscoveryOrigin::DirectPath);
475 }
476
477 #[test]
478 fn test_discover_directory() {
479 let temp = TempDir::new().unwrap();
480 fs::write(temp.path().join("root.yaml"), "a: 1").unwrap();
481 fs::create_dir(temp.path().join("subdir")).unwrap();
482 fs::write(temp.path().join("subdir/nested.yaml"), "b: 2").unwrap();
483 fs::write(temp.path().join("subdir/skip.txt"), "c: 3").unwrap();
484
485 let config = default_config();
486 let discovery = FileDiscovery::new(config).unwrap();
487 let files = discovery.discover(&[temp.path().to_path_buf()]).unwrap();
488
489 assert_eq!(files.len(), 2);
490 assert!(
491 files
492 .iter()
493 .all(|f| f.origin == DiscoveryOrigin::DirectoryWalk)
494 );
495 }
496
497 #[test]
498 fn test_discover_glob() {
499 let temp = TempDir::new().unwrap();
500 fs::write(temp.path().join("config.yaml"), "a: 1").unwrap();
501 fs::write(temp.path().join("data.yml"), "b: 2").unwrap();
502 fs::write(temp.path().join("readme.md"), "# README").unwrap();
503
504 let pattern = format!("{}/*.yaml", temp.path().display());
505 let config = default_config();
506 let discovery = FileDiscovery::new(config).unwrap();
507 let files = discovery.discover(&[PathBuf::from(pattern)]).unwrap();
508
509 assert_eq!(files.len(), 1);
510 assert_eq!(files[0].origin, DiscoveryOrigin::GlobExpansion);
511 }
512
513 #[test]
514 fn test_discover_mixed_paths() {
515 let temp = TempDir::new().unwrap();
516
517 let file = temp.path().join("direct.yaml");
519 fs::write(&file, "a: 1").unwrap();
520
521 let dir = temp.path().join("dir");
523 fs::create_dir(&dir).unwrap();
524 fs::write(dir.join("in_dir.yaml"), "b: 2").unwrap();
525
526 let config = default_config();
527 let discovery = FileDiscovery::new(config).unwrap();
528 let files = discovery.discover(&[file, dir]).unwrap();
529
530 assert_eq!(files.len(), 2);
531 assert!(
532 files
533 .iter()
534 .any(|f| f.origin == DiscoveryOrigin::DirectPath)
535 );
536 assert!(
537 files
538 .iter()
539 .any(|f| f.origin == DiscoveryOrigin::DirectoryWalk)
540 );
541 }
542
543 #[test]
544 fn test_hidden_files_excluded() {
545 let temp = TempDir::new().unwrap();
546 fs::write(temp.path().join(".hidden.yaml"), "a: 1").unwrap();
547 fs::write(temp.path().join("visible.yaml"), "b: 2").unwrap();
548
549 let config = default_config(); let discovery = FileDiscovery::new(config).unwrap();
551 let files = discovery.discover(&[temp.path().to_path_buf()]).unwrap();
552
553 assert_eq!(files.len(), 1);
554 assert!(files[0].path.ends_with("visible.yaml"));
555 }
556
557 #[test]
558 fn test_hidden_files_included() {
559 let temp = TempDir::new().unwrap();
560 fs::write(temp.path().join(".hidden.yaml"), "a: 1").unwrap();
561 fs::write(temp.path().join("visible.yaml"), "b: 2").unwrap();
562
563 let config = default_config().with_hidden(true);
564 let discovery = FileDiscovery::new(config).unwrap();
565 let files = discovery.discover(&[temp.path().to_path_buf()]).unwrap();
566
567 assert_eq!(files.len(), 2);
568 }
569
570 #[test]
571 fn test_gitignore_respected() {
572 if std::process::Command::new("git")
574 .args(["--version"])
575 .output()
576 .is_err()
577 {
578 eprintln!("Skipping test_gitignore_respected: git not available");
579 return;
580 }
581
582 let temp = TempDir::new().unwrap();
583
584 std::process::Command::new("git")
586 .args(["init"])
587 .current_dir(temp.path())
588 .output()
589 .unwrap();
590
591 fs::write(temp.path().join(".gitignore"), "ignored.yaml\n").unwrap();
592 fs::write(temp.path().join("ignored.yaml"), "a: 1").unwrap();
593 fs::write(temp.path().join("included.yaml"), "b: 2").unwrap();
594
595 let config = default_config(); let discovery = FileDiscovery::new(config).unwrap();
597 let files = discovery.discover(&[temp.path().to_path_buf()]).unwrap();
598
599 assert_eq!(files.len(), 1);
601 assert!(files[0].path.ends_with("included.yaml"));
602 }
603
604 #[test]
605 fn test_max_depth() {
606 let temp = TempDir::new().unwrap();
607 fs::write(temp.path().join("root.yaml"), "a: 1").unwrap();
608
609 let level1 = temp.path().join("level1");
610 fs::create_dir(&level1).unwrap();
611 fs::write(level1.join("l1.yaml"), "b: 2").unwrap();
612
613 let level2 = level1.join("level2");
614 fs::create_dir(&level2).unwrap();
615 fs::write(level2.join("l2.yaml"), "c: 3").unwrap();
616
617 let config = default_config().with_max_depth(Some(1));
619 let discovery = FileDiscovery::new(config).unwrap();
620 let files = discovery.discover(&[temp.path().to_path_buf()]).unwrap();
621
622 assert_eq!(files.len(), 1);
623 assert!(files[0].path.ends_with("root.yaml"));
624 }
625
626 #[test]
627 fn test_deduplication() {
628 let temp = TempDir::new().unwrap();
629 let file = temp.path().join("test.yaml");
630 fs::write(&file, "key: value").unwrap();
631
632 let config = default_config();
633 let discovery = FileDiscovery::new(config).unwrap();
634
635 let files = discovery.discover(&[file.clone(), file]).unwrap();
637
638 assert_eq!(files.len(), 1);
640 }
641
642 #[test]
643 fn test_invalid_pattern_error() {
644 let config = default_config().with_include_patterns(vec!["[invalid".to_string()]);
645
646 let result = FileDiscovery::new(config);
647 assert!(result.is_err());
648 assert!(
649 result
650 .unwrap_err()
651 .to_string()
652 .contains("invalid glob pattern")
653 );
654 }
655
656 #[test]
657 fn test_discover_from_reader_valid_paths() {
658 let temp = TempDir::new().unwrap();
659 let file = temp.path().join("test.yaml");
660 fs::write(&file, "key: value").unwrap();
661
662 let input = format!("{}\n", file.display());
663 let reader = std::io::Cursor::new(input);
664
665 let config = default_config();
666 let discovery = FileDiscovery::new(config).unwrap();
667 let files = discovery.discover_from_reader(reader).unwrap();
668
669 assert_eq!(files.len(), 1);
670 assert_eq!(files[0].origin, DiscoveryOrigin::StdinList);
671 }
672
673 #[test]
674 fn test_discover_from_reader_comments_and_empty_lines() {
675 let temp = TempDir::new().unwrap();
676 let file = temp.path().join("test.yaml");
677 fs::write(&file, "key: value").unwrap();
678
679 let input = format!("# comment\n\n{}\n# another comment\n", file.display());
680 let reader = std::io::Cursor::new(input);
681
682 let config = default_config();
683 let discovery = FileDiscovery::new(config).unwrap();
684 let files = discovery.discover_from_reader(reader).unwrap();
685
686 assert_eq!(files.len(), 1);
687 }
688
689 #[test]
690 fn test_discover_from_reader_too_many_paths() {
691 let temp = TempDir::new().unwrap();
692 let file = temp.path().join("test.yaml");
693 fs::write(&file, "key: value").unwrap();
694
695 let mut input = String::new();
696 for _ in 0..=MAX_STDIN_PATHS {
697 use std::fmt::Write;
698 writeln!(&mut input, "{}", file.display()).unwrap();
699 }
700 let reader = std::io::Cursor::new(input);
701
702 let config = default_config();
703 let discovery = FileDiscovery::new(config).unwrap();
704 let result = discovery.discover_from_reader(reader);
705
706 assert!(result.is_err());
707 let err = result.unwrap_err();
708 assert!(err.to_string().contains("exceeded maximum"));
709 }
710
711 #[test]
712 fn test_discover_from_reader_long_line_skipped() {
713 let temp = TempDir::new().unwrap();
714 let file = temp.path().join("test.yaml");
715 fs::write(&file, "key: value").unwrap();
716
717 let long_line = "x".repeat(MAX_LINE_LENGTH + 1);
718 let input = format!("{}\n{}\n", long_line, file.display());
719 let reader = std::io::Cursor::new(input);
720
721 let config = default_config();
722 let discovery = FileDiscovery::new(config).unwrap();
723 let files = discovery.discover_from_reader(reader).unwrap();
724
725 assert_eq!(files.len(), 1);
727 }
728
729 #[test]
730 fn test_permission_denied_continues() {
731 }
734}