1use crate::error::CliError;
59use std::path::{Path, PathBuf};
60use walkdir::{DirEntry, WalkDir};
61
62#[derive(Debug, Clone)]
67pub struct DiscoveryConfig {
68 pub max_depth: Option<usize>,
74
75 pub extension: Option<String>,
80
81 pub max_file_size: Option<u64>,
85
86 pub follow_links: bool,
90
91 pub include_hidden: bool,
95
96 pub recursive: bool,
101}
102
103impl Default for DiscoveryConfig {
104 fn default() -> Self {
105 Self {
106 max_depth: Some(10),
107 extension: None,
108 max_file_size: None,
109 follow_links: false,
110 include_hidden: false,
111 recursive: false,
112 }
113 }
114}
115
116#[derive(Debug)]
121pub struct FileDiscovery {
122 patterns: Vec<String>,
123 config: DiscoveryConfig,
124}
125
126impl FileDiscovery {
127 #[must_use]
145 pub fn new(patterns: Vec<String>, config: DiscoveryConfig) -> Self {
146 Self { patterns, config }
147 }
148
149 pub fn validate_patterns(&self) -> Result<(), CliError> {
170 for pattern in &self.patterns {
171 if let Err(e) = glob::Pattern::new(pattern) {
172 return Err(CliError::GlobPattern {
173 pattern: pattern.clone(),
174 message: e.to_string(),
175 });
176 }
177 }
178 Ok(())
179 }
180
181 pub fn discover(&self) -> Result<Vec<PathBuf>, CliError> {
213 if self.patterns.is_empty() {
215 return Ok(Vec::new());
216 }
217
218 self.validate_patterns()?;
220
221 let mut all_files = Vec::new();
222
223 for pattern in &self.patterns {
224 let pattern_files = if self.config.recursive && pattern.contains("**") {
225 self.discover_recursive_glob(pattern)?
227 } else if self.config.recursive {
228 self.discover_recursive_simple(pattern)?
230 } else {
231 self.discover_simple_glob(pattern)?
233 };
234
235 all_files.extend(pattern_files);
236 }
237
238 let mut seen = std::collections::HashSet::new();
240 all_files.retain(|path| seen.insert(path.clone()));
241
242 if all_files.is_empty() {
243 return Err(CliError::NoFilesMatched {
244 patterns: self.patterns.clone(),
245 });
246 }
247
248 Ok(all_files)
249 }
250
251 fn discover_simple_glob(&self, pattern: &str) -> Result<Vec<PathBuf>, CliError> {
253 let mut files = Vec::new();
254
255 for entry in glob::glob(pattern).map_err(|e| CliError::GlobPattern {
256 pattern: pattern.to_string(),
257 message: e.to_string(),
258 })? {
259 let path = entry.map_err(|e| CliError::DirectoryTraversal {
260 path: PathBuf::from(pattern),
261 message: e.to_string(),
262 })?;
263
264 if self.should_include_file(&path)? {
265 files.push(path);
266 }
267 }
268
269 Ok(files)
270 }
271
272 fn discover_recursive_glob(&self, pattern: &str) -> Result<Vec<PathBuf>, CliError> {
274 let base_dir = self.extract_base_dir(pattern);
277
278 let mut files = Vec::new();
279
280 let walker = WalkDir::new(&base_dir)
281 .follow_links(self.config.follow_links)
282 .max_depth(self.config.max_depth.unwrap_or(usize::MAX));
283
284 let glob_pattern = glob::Pattern::new(pattern).map_err(|e| CliError::GlobPattern {
285 pattern: pattern.to_string(),
286 message: e.to_string(),
287 })?;
288
289 for entry in walker {
290 let entry = entry.map_err(|e| CliError::DirectoryTraversal {
291 path: base_dir.clone(),
292 message: e.to_string(),
293 })?;
294
295 if !self.should_include_entry(&entry) {
296 continue;
297 }
298
299 let path = entry.path();
300 if path.is_file()
301 && glob_pattern.matches_path(path)
302 && self.should_include_file(path)?
303 {
304 files.push(path.to_path_buf());
305 }
306 }
307
308 Ok(files)
309 }
310
311 fn discover_recursive_simple(&self, pattern: &str) -> Result<Vec<PathBuf>, CliError> {
313 let base_dir = self.extract_base_dir(pattern);
315 let filename_pattern = PathBuf::from(pattern)
316 .file_name()
317 .map_or_else(|| pattern.to_string(), |s| s.to_string_lossy().to_string());
318
319 let recursive_pattern = if base_dir == std::path::Path::new(".") {
320 format!("**/{filename_pattern}")
321 } else {
322 format!("{}/**/{}", base_dir.display(), filename_pattern)
323 };
324
325 self.discover_recursive_glob(&recursive_pattern)
326 }
327
328 fn extract_base_dir(&self, pattern: &str) -> PathBuf {
330 let path = PathBuf::from(pattern);
331
332 for ancestor in path.ancestors() {
334 let ancestor_str = ancestor.to_string_lossy();
335 if ancestor_str.contains('*')
337 || ancestor_str.contains('?')
338 || ancestor_str.contains('[')
339 {
340 continue;
341 }
342 if ancestor_str.is_empty() {
344 return PathBuf::from(".");
345 }
346 return ancestor.to_path_buf();
347 }
348
349 PathBuf::from(".")
351 }
352
353 fn should_include_entry(&self, entry: &DirEntry) -> bool {
355 if !self.config.include_hidden {
357 if let Some(name) = entry.file_name().to_str() {
358 if name.starts_with('.') && name != "." && name != ".." {
359 return false;
360 }
361 }
362 }
363
364 true
365 }
366
367 fn should_include_file(&self, path: &Path) -> Result<bool, CliError> {
369 if !path.is_file() {
371 return Ok(false);
372 }
373
374 if let Some(ref ext) = self.config.extension {
376 if path.extension().and_then(|s| s.to_str()) != Some(ext.as_str()) {
377 return Ok(false);
378 }
379 }
380
381 if let Some(max_size) = self.config.max_file_size {
383 let metadata = std::fs::metadata(path).map_err(|e| CliError::io_error(path, e))?;
384 if metadata.len() > max_size {
385 return Ok(false);
386 }
387 }
388
389 Ok(true)
390 }
391}
392
393#[cfg(test)]
394mod tests {
395 use super::*;
396 use std::fs;
397 use tempfile::TempDir;
398
399 fn create_test_files(dir: &Path, files: &[&str]) -> Result<(), std::io::Error> {
400 for file in files {
401 let path = dir.join(file);
402 if let Some(parent) = path.parent() {
403 fs::create_dir_all(parent)?;
404 }
405 fs::write(path, "test content")?;
406 }
407 Ok(())
408 }
409
410 #[test]
411 fn test_discovery_config_default() {
412 let config = DiscoveryConfig::default();
413 assert_eq!(config.max_depth, Some(10));
414 assert!(config.extension.is_none());
415 assert!(config.max_file_size.is_none());
416 assert!(!config.follow_links);
417 assert!(!config.include_hidden);
418 assert!(!config.recursive);
419 }
420
421 #[test]
422 fn test_validate_patterns_valid() {
423 let discovery = FileDiscovery::new(
424 vec!["*.hedl".to_string(), "test/*.hedl".to_string()],
425 DiscoveryConfig::default(),
426 );
427 assert!(discovery.validate_patterns().is_ok());
428 }
429
430 #[test]
431 fn test_validate_patterns_invalid() {
432 let discovery =
433 FileDiscovery::new(vec!["[invalid".to_string()], DiscoveryConfig::default());
434 let result = discovery.validate_patterns();
435 assert!(result.is_err());
436 if let Err(CliError::GlobPattern { pattern, .. }) = result {
437 assert_eq!(pattern, "[invalid");
438 }
439 }
440
441 #[test]
442 fn test_discover_simple_glob() -> Result<(), Box<dyn std::error::Error>> {
443 let temp_dir = TempDir::new()?;
444 create_test_files(temp_dir.path(), &["file1.hedl", "file2.hedl", "file3.txt"])?;
445
446 let pattern = format!("{}/*.hedl", temp_dir.path().display());
447 let discovery = FileDiscovery::new(vec![pattern], DiscoveryConfig::default());
448
449 let files = discovery.discover()?;
450 assert_eq!(files.len(), 2);
451
452 Ok(())
453 }
454
455 #[test]
456 fn test_discover_no_matches() {
457 let temp_dir = TempDir::new().unwrap();
458 let pattern = format!("{}/*.hedl", temp_dir.path().display());
459 let discovery = FileDiscovery::new(vec![pattern.clone()], DiscoveryConfig::default());
460
461 let result = discovery.discover();
462 assert!(result.is_err());
463 if let Err(CliError::NoFilesMatched { patterns }) = result {
464 assert_eq!(patterns, vec![pattern]);
465 }
466 }
467
468 #[test]
469 fn test_discover_recursive() -> Result<(), Box<dyn std::error::Error>> {
470 let temp_dir = TempDir::new()?;
471 create_test_files(
472 temp_dir.path(),
473 &[
474 "file1.hedl",
475 "dir1/file2.hedl",
476 "dir1/dir2/file3.hedl",
477 "dir1/file4.txt",
478 ],
479 )?;
480
481 let pattern = format!("{}/**/*.hedl", temp_dir.path().display());
482 let discovery = FileDiscovery::new(
483 vec![pattern],
484 DiscoveryConfig {
485 recursive: true,
486 ..Default::default()
487 },
488 );
489
490 let files = discovery.discover()?;
491 assert_eq!(files.len(), 3);
492
493 Ok(())
494 }
495
496 #[test]
497 fn test_discover_with_depth_limit() -> Result<(), Box<dyn std::error::Error>> {
498 let temp_dir = TempDir::new()?;
499 create_test_files(
500 temp_dir.path(),
501 &[
502 "file1.hedl",
503 "dir1/file2.hedl",
504 "dir1/dir2/file3.hedl",
505 "dir1/dir2/dir3/file4.hedl",
506 ],
507 )?;
508
509 let pattern = format!("{}/**/*.hedl", temp_dir.path().display());
510 let discovery = FileDiscovery::new(
511 vec![pattern],
512 DiscoveryConfig {
513 recursive: true,
514 max_depth: Some(2),
515 ..Default::default()
516 },
517 );
518
519 let files = discovery.discover()?;
520 assert!(files.len() <= 3); Ok(())
524 }
525
526 #[test]
527 fn test_discover_with_extension_filter() -> Result<(), Box<dyn std::error::Error>> {
528 let temp_dir = TempDir::new()?;
529 create_test_files(temp_dir.path(), &["file1.hedl", "file2.txt", "file3.hedl"])?;
530
531 let pattern = format!("{}/*", temp_dir.path().display());
532 let discovery = FileDiscovery::new(
533 vec![pattern],
534 DiscoveryConfig {
535 extension: Some("hedl".to_string()),
536 ..Default::default()
537 },
538 );
539
540 let files = discovery.discover()?;
541 assert_eq!(files.len(), 2);
542 assert!(files.iter().all(|p| p.extension().unwrap() == "hedl"));
543
544 Ok(())
545 }
546
547 #[test]
548 fn test_discover_hidden_files() -> Result<(), Box<dyn std::error::Error>> {
549 let temp_dir = TempDir::new()?;
550 create_test_files(
551 temp_dir.path(),
552 &["file1.hedl", ".hidden.hedl", "dir/.hidden2.hedl"],
553 )?;
554
555 let pattern = format!("{}/**/*.hedl", temp_dir.path().display());
556
557 let discovery = FileDiscovery::new(
559 vec![pattern.clone()],
560 DiscoveryConfig {
561 recursive: true,
562 include_hidden: false,
563 ..Default::default()
564 },
565 );
566 let files = discovery.discover()?;
567 assert_eq!(files.len(), 1); let discovery = FileDiscovery::new(
571 vec![pattern],
572 DiscoveryConfig {
573 recursive: true,
574 include_hidden: true,
575 ..Default::default()
576 },
577 );
578 let files = discovery.discover()?;
579 assert!(files.len() >= 2); Ok(())
582 }
583
584 #[test]
585 fn test_extract_base_dir() {
586 let discovery = FileDiscovery::new(vec![], DiscoveryConfig::default());
587
588 assert_eq!(discovery.extract_base_dir("*.hedl"), PathBuf::from("."));
589 assert_eq!(
590 discovery.extract_base_dir("dir/*.hedl"),
591 PathBuf::from("dir")
592 );
593 assert_eq!(
594 discovery.extract_base_dir("dir/subdir/*.hedl"),
595 PathBuf::from("dir/subdir")
596 );
597 assert_eq!(
598 discovery.extract_base_dir("**/file.hedl"),
599 PathBuf::from(".")
600 );
601 }
602
603 #[test]
604 fn test_multiple_patterns() -> Result<(), Box<dyn std::error::Error>> {
605 let temp_dir = TempDir::new()?;
606 create_test_files(
607 temp_dir.path(),
608 &["dir1/file1.hedl", "dir2/file2.hedl", "file3.hedl"],
609 )?;
610
611 let patterns = vec![
612 format!("{}/dir1/*.hedl", temp_dir.path().display()),
613 format!("{}/dir2/*.hedl", temp_dir.path().display()),
614 ];
615 let discovery = FileDiscovery::new(patterns, DiscoveryConfig::default());
616
617 let files = discovery.discover()?;
618 assert_eq!(files.len(), 2);
619
620 Ok(())
621 }
622
623 #[test]
624 fn test_deduplicate_files() -> Result<(), Box<dyn std::error::Error>> {
625 let temp_dir = TempDir::new()?;
626 create_test_files(temp_dir.path(), &["file1.hedl"])?;
627
628 let file_path = format!("{}/file1.hedl", temp_dir.path().display());
630 let patterns = vec![
631 file_path.clone(),
632 format!("{}/*.hedl", temp_dir.path().display()),
633 ];
634 let discovery = FileDiscovery::new(patterns, DiscoveryConfig::default());
635
636 let files = discovery.discover()?;
637 assert_eq!(files.len(), 1);
639
640 Ok(())
641 }
642}