1use crate::error::CliError;
59use std::path::{Path, PathBuf};
60use walkdir::{DirEntry, WalkDir};
61
62#[derive(Debug, Clone)]
67pub struct DiscoveryConfig {
68 pub max_depth: Option<usize>,
74
75 pub extension: Option<String>,
80
81 pub max_file_size: Option<u64>,
85
86 pub follow_links: bool,
90
91 pub include_hidden: bool,
95
96 pub recursive: bool,
101}
102
103impl Default for DiscoveryConfig {
104 fn default() -> Self {
105 Self {
106 max_depth: Some(10),
107 extension: None,
108 max_file_size: None,
109 follow_links: false,
110 include_hidden: false,
111 recursive: false,
112 }
113 }
114}
115
116#[derive(Debug)]
121pub struct FileDiscovery {
122 patterns: Vec<String>,
123 config: DiscoveryConfig,
124}
125
126impl FileDiscovery {
127 #[must_use]
145 pub fn new(patterns: Vec<String>, config: DiscoveryConfig) -> Self {
146 Self { patterns, config }
147 }
148
149 pub fn validate_patterns(&self) -> Result<(), CliError> {
170 for pattern in &self.patterns {
171 if let Err(e) = glob::Pattern::new(pattern) {
172 return Err(CliError::GlobPattern {
173 pattern: pattern.clone(),
174 message: e.to_string(),
175 });
176 }
177 }
178 Ok(())
179 }
180
181 pub fn discover(&self) -> Result<Vec<PathBuf>, CliError> {
213 if self.patterns.is_empty() {
215 return Ok(Vec::new());
216 }
217
218 self.validate_patterns()?;
220
221 let mut all_files = Vec::new();
222
223 for pattern in &self.patterns {
224 let pattern_files = if self.config.recursive && pattern.contains("**") {
225 self.discover_recursive_glob(pattern)?
227 } else if self.config.recursive {
228 self.discover_recursive_simple(pattern)?
230 } else {
231 self.discover_simple_glob(pattern)?
233 };
234
235 all_files.extend(pattern_files);
236 }
237
238 let mut seen = std::collections::HashSet::new();
240 all_files.retain(|path| seen.insert(path.clone()));
241
242 if all_files.is_empty() {
243 return Err(CliError::NoFilesMatched {
244 patterns: self.patterns.clone(),
245 });
246 }
247
248 Ok(all_files)
249 }
250
251 fn discover_simple_glob(&self, pattern: &str) -> Result<Vec<PathBuf>, CliError> {
253 let mut files = Vec::new();
254
255 for entry in glob::glob(pattern).map_err(|e| CliError::GlobPattern {
256 pattern: pattern.to_string(),
257 message: e.to_string(),
258 })? {
259 let path = entry.map_err(|e| CliError::DirectoryTraversal {
260 path: PathBuf::from(pattern),
261 message: e.to_string(),
262 })?;
263
264 if self.should_include_file(&path)? {
265 files.push(path);
266 }
267 }
268
269 Ok(files)
270 }
271
272 fn discover_recursive_glob(&self, pattern: &str) -> Result<Vec<PathBuf>, CliError> {
274 let base_dir = self.extract_base_dir(pattern);
277
278 let mut files = Vec::new();
279
280 let walker = WalkDir::new(&base_dir)
281 .follow_links(self.config.follow_links)
282 .max_depth(self.config.max_depth.unwrap_or(usize::MAX));
283
284 let glob_pattern = glob::Pattern::new(pattern).map_err(|e| CliError::GlobPattern {
285 pattern: pattern.to_string(),
286 message: e.to_string(),
287 })?;
288
289 for entry in walker {
290 let entry = entry.map_err(|e| CliError::DirectoryTraversal {
291 path: base_dir.clone(),
292 message: e.to_string(),
293 })?;
294
295 if !self.should_include_entry(&entry) {
296 continue;
297 }
298
299 let path = entry.path();
300 if path.is_file()
301 && glob_pattern.matches_path(path)
302 && self.should_include_file(path)?
303 {
304 files.push(path.to_path_buf());
305 }
306 }
307
308 Ok(files)
309 }
310
311 fn discover_recursive_simple(&self, pattern: &str) -> Result<Vec<PathBuf>, CliError> {
313 let base_dir = self.extract_base_dir(pattern);
315 let filename_pattern = PathBuf::from(pattern)
316 .file_name()
317 .map_or_else(|| pattern.to_string(), |s| s.to_string_lossy().to_string());
318
319 let recursive_pattern = if base_dir == std::path::Path::new(".") {
320 format!("**/{filename_pattern}")
321 } else {
322 format!("{}/**/{}", base_dir.display(), filename_pattern)
323 };
324
325 self.discover_recursive_glob(&recursive_pattern)
326 }
327
328 fn extract_base_dir(&self, pattern: &str) -> PathBuf {
330 let path = PathBuf::from(pattern);
331
332 for ancestor in path.ancestors() {
334 let s = ancestor.to_string_lossy();
335 if s.contains('*') || s.contains('?') || s.contains('[') {
337 continue;
338 }
339 if s.is_empty() {
341 return PathBuf::from(".");
342 }
343 return ancestor.to_path_buf();
344 }
345
346 PathBuf::from(".")
348 }
349
350 fn should_include_entry(&self, entry: &DirEntry) -> bool {
352 if !self.config.include_hidden {
354 if let Some(name) = entry.file_name().to_str() {
355 if name.starts_with('.') && name != "." && name != ".." {
356 return false;
357 }
358 }
359 }
360
361 true
362 }
363
364 fn should_include_file(&self, path: &Path) -> Result<bool, CliError> {
366 if !path.is_file() {
368 return Ok(false);
369 }
370
371 if let Some(ref ext) = self.config.extension {
373 if path.extension().and_then(|s| s.to_str()) != Some(ext.as_str()) {
374 return Ok(false);
375 }
376 }
377
378 if let Some(max_size) = self.config.max_file_size {
380 let metadata = std::fs::metadata(path).map_err(|e| CliError::io_error(path, e))?;
381 if metadata.len() > max_size {
382 return Ok(false);
383 }
384 }
385
386 Ok(true)
387 }
388}
389
390#[cfg(test)]
391mod tests {
392 use super::*;
393 use std::fs;
394 use tempfile::TempDir;
395
396 fn create_test_files(dir: &Path, files: &[&str]) -> Result<(), std::io::Error> {
397 for file in files {
398 let path = dir.join(file);
399 if let Some(parent) = path.parent() {
400 fs::create_dir_all(parent)?;
401 }
402 fs::write(path, "test content")?;
403 }
404 Ok(())
405 }
406
407 #[test]
408 fn test_discovery_config_default() {
409 let config = DiscoveryConfig::default();
410 assert_eq!(config.max_depth, Some(10));
411 assert!(config.extension.is_none());
412 assert!(config.max_file_size.is_none());
413 assert!(!config.follow_links);
414 assert!(!config.include_hidden);
415 assert!(!config.recursive);
416 }
417
418 #[test]
419 fn test_validate_patterns_valid() {
420 let discovery = FileDiscovery::new(
421 vec!["*.hedl".to_string(), "test/*.hedl".to_string()],
422 DiscoveryConfig::default(),
423 );
424 assert!(discovery.validate_patterns().is_ok());
425 }
426
427 #[test]
428 fn test_validate_patterns_invalid() {
429 let discovery =
430 FileDiscovery::new(vec!["[invalid".to_string()], DiscoveryConfig::default());
431 let result = discovery.validate_patterns();
432 assert!(result.is_err());
433 if let Err(CliError::GlobPattern { pattern, .. }) = result {
434 assert_eq!(pattern, "[invalid");
435 }
436 }
437
438 #[test]
439 fn test_discover_simple_glob() -> Result<(), Box<dyn std::error::Error>> {
440 let temp_dir = TempDir::new()?;
441 create_test_files(temp_dir.path(), &["file1.hedl", "file2.hedl", "file3.txt"])?;
442
443 let pattern = format!("{}/*.hedl", temp_dir.path().display());
444 let discovery = FileDiscovery::new(vec![pattern], DiscoveryConfig::default());
445
446 let files = discovery.discover()?;
447 assert_eq!(files.len(), 2);
448
449 Ok(())
450 }
451
452 #[test]
453 fn test_discover_no_matches() {
454 let temp_dir = TempDir::new().unwrap();
455 let pattern = format!("{}/*.hedl", temp_dir.path().display());
456 let discovery = FileDiscovery::new(vec![pattern.clone()], DiscoveryConfig::default());
457
458 let result = discovery.discover();
459 assert!(result.is_err());
460 if let Err(CliError::NoFilesMatched { patterns }) = result {
461 assert_eq!(patterns, vec![pattern]);
462 }
463 }
464
465 #[test]
466 fn test_discover_recursive() -> Result<(), Box<dyn std::error::Error>> {
467 let temp_dir = TempDir::new()?;
468 create_test_files(
469 temp_dir.path(),
470 &[
471 "file1.hedl",
472 "dir1/file2.hedl",
473 "dir1/dir2/file3.hedl",
474 "dir1/file4.txt",
475 ],
476 )?;
477
478 let pattern = format!("{}/**/*.hedl", temp_dir.path().display());
479 let discovery = FileDiscovery::new(
480 vec![pattern],
481 DiscoveryConfig {
482 recursive: true,
483 ..Default::default()
484 },
485 );
486
487 let files = discovery.discover()?;
488 assert_eq!(files.len(), 3);
489
490 Ok(())
491 }
492
493 #[test]
494 fn test_discover_with_depth_limit() -> Result<(), Box<dyn std::error::Error>> {
495 let temp_dir = TempDir::new()?;
496 create_test_files(
497 temp_dir.path(),
498 &[
499 "file1.hedl",
500 "dir1/file2.hedl",
501 "dir1/dir2/file3.hedl",
502 "dir1/dir2/dir3/file4.hedl",
503 ],
504 )?;
505
506 let pattern = format!("{}/**/*.hedl", temp_dir.path().display());
507 let discovery = FileDiscovery::new(
508 vec![pattern],
509 DiscoveryConfig {
510 recursive: true,
511 max_depth: Some(2),
512 ..Default::default()
513 },
514 );
515
516 let files = discovery.discover()?;
517 assert!(files.len() <= 3); Ok(())
521 }
522
523 #[test]
524 fn test_discover_with_extension_filter() -> Result<(), Box<dyn std::error::Error>> {
525 let temp_dir = TempDir::new()?;
526 create_test_files(temp_dir.path(), &["file1.hedl", "file2.txt", "file3.hedl"])?;
527
528 let pattern = format!("{}/*", temp_dir.path().display());
529 let discovery = FileDiscovery::new(
530 vec![pattern],
531 DiscoveryConfig {
532 extension: Some("hedl".to_string()),
533 ..Default::default()
534 },
535 );
536
537 let files = discovery.discover()?;
538 assert_eq!(files.len(), 2);
539 assert!(files.iter().all(|p| p.extension().unwrap() == "hedl"));
540
541 Ok(())
542 }
543
544 #[test]
545 fn test_discover_hidden_files() -> Result<(), Box<dyn std::error::Error>> {
546 let temp_dir = TempDir::new()?;
547 create_test_files(
548 temp_dir.path(),
549 &["file1.hedl", ".hidden.hedl", "dir/.hidden2.hedl"],
550 )?;
551
552 let pattern = format!("{}/**/*.hedl", temp_dir.path().display());
553
554 let discovery = FileDiscovery::new(
556 vec![pattern.clone()],
557 DiscoveryConfig {
558 recursive: true,
559 include_hidden: false,
560 ..Default::default()
561 },
562 );
563 let files = discovery.discover()?;
564 assert_eq!(files.len(), 1); let discovery = FileDiscovery::new(
568 vec![pattern],
569 DiscoveryConfig {
570 recursive: true,
571 include_hidden: true,
572 ..Default::default()
573 },
574 );
575 let files = discovery.discover()?;
576 assert!(files.len() >= 2); Ok(())
579 }
580
581 #[test]
582 fn test_extract_base_dir() {
583 let discovery = FileDiscovery::new(vec![], DiscoveryConfig::default());
584
585 assert_eq!(discovery.extract_base_dir("*.hedl"), PathBuf::from("."));
586 assert_eq!(
587 discovery.extract_base_dir("dir/*.hedl"),
588 PathBuf::from("dir")
589 );
590 assert_eq!(
591 discovery.extract_base_dir("dir/subdir/*.hedl"),
592 PathBuf::from("dir/subdir")
593 );
594 assert_eq!(
595 discovery.extract_base_dir("**/file.hedl"),
596 PathBuf::from(".")
597 );
598 }
599
600 #[test]
601 fn test_multiple_patterns() -> Result<(), Box<dyn std::error::Error>> {
602 let temp_dir = TempDir::new()?;
603 create_test_files(
604 temp_dir.path(),
605 &["dir1/file1.hedl", "dir2/file2.hedl", "file3.hedl"],
606 )?;
607
608 let patterns = vec![
609 format!("{}/dir1/*.hedl", temp_dir.path().display()),
610 format!("{}/dir2/*.hedl", temp_dir.path().display()),
611 ];
612 let discovery = FileDiscovery::new(patterns, DiscoveryConfig::default());
613
614 let files = discovery.discover()?;
615 assert_eq!(files.len(), 2);
616
617 Ok(())
618 }
619
620 #[test]
621 fn test_deduplicate_files() -> Result<(), Box<dyn std::error::Error>> {
622 let temp_dir = TempDir::new()?;
623 create_test_files(temp_dir.path(), &["file1.hedl"])?;
624
625 let file_path = format!("{}/file1.hedl", temp_dir.path().display());
627 let patterns = vec![
628 file_path.clone(),
629 format!("{}/*.hedl", temp_dir.path().display()),
630 ];
631 let discovery = FileDiscovery::new(patterns, DiscoveryConfig::default());
632
633 let files = discovery.discover()?;
634 assert_eq!(files.len(), 1);
636
637 Ok(())
638 }
639}