exarch_core/creation/
walker.rs

1//! Directory tree walking with filtering.
2//!
3//! This module provides efficient directory traversal with built-in filtering
4//! based on configuration options like hidden files, exclude patterns, and size
5//! limits.
6
7use crate::ExtractionError;
8use crate::Result;
9use crate::creation::config::CreationConfig;
10use crate::creation::filters;
11use std::fs::Metadata;
12use std::path::Path;
13use std::path::PathBuf;
14use walkdir::WalkDir;
15
16/// Walks a directory tree with filtering based on `CreationConfig`.
17///
18/// This walker handles:
19/// - Hidden file filtering
20/// - Pattern-based exclusion
21/// - Symlink handling (follow or store as-is)
22/// - Size limit enforcement
23/// - Archive path computation
24///
25/// # Examples
26///
27/// ```no_run
28/// use exarch_core::creation::CreationConfig;
29/// use exarch_core::creation::walker::FilteredWalker;
30/// use std::path::Path;
31///
32/// let config = CreationConfig::default();
33/// let root = Path::new("./project");
34/// let walker = FilteredWalker::new(root, &config);
35///
36/// for entry in walker.walk() {
37///     let entry = entry.unwrap();
38///     println!("Would add: {}", entry.archive_path.display());
39/// }
40/// ```
41pub struct FilteredWalker<'a> {
42    root: &'a Path,
43    config: &'a CreationConfig,
44}
45
46impl<'a> FilteredWalker<'a> {
47    /// Creates a new filtered walker for the given root directory.
48    ///
49    /// # Examples
50    ///
51    /// ```
52    /// use exarch_core::creation::CreationConfig;
53    /// use exarch_core::creation::walker::FilteredWalker;
54    /// use std::path::Path;
55    ///
56    /// let config = CreationConfig::default();
57    /// let walker = FilteredWalker::new(Path::new("."), &config);
58    /// ```
59    #[must_use]
60    pub fn new(root: &'a Path, config: &'a CreationConfig) -> Self {
61        Self { root, config }
62    }
63
64    /// Returns an iterator over filtered directory entries.
65    ///
66    /// The iterator:
67    /// - Skips entries based on configuration (hidden files, patterns, size)
68    /// - Handles symlinks according to `follow_symlinks` setting
69    /// - Computes archive paths using `strip_prefix` if configured
70    /// - Returns errors for inaccessible files/directories
71    ///
72    /// # Errors
73    ///
74    /// Entries may error if:
75    /// - File metadata cannot be read
76    /// - Path is not valid UTF-8 (platform-specific)
77    /// - Symlink target cannot be read
78    pub fn walk(&self) -> impl Iterator<Item = Result<FilteredEntry>> + '_ {
79        let walker = WalkDir::new(self.root)
80            .follow_links(self.config.follow_symlinks)
81            .into_iter();
82
83        walker.filter_map(move |entry| {
84            match entry {
85                Ok(entry) => {
86                    let path = entry.path();
87
88                    // Skip if matches filter rules
89                    if filters::should_skip(path, self.config) {
90                        return None;
91                    }
92
93                    // Build FilteredEntry
94                    match self.build_filtered_entry(&entry) {
95                        Ok(Some(filtered)) => Some(Ok(filtered)),
96                        Ok(None) => None, // Filtered out (e.g., size limit)
97                        Err(e) => Some(Err(e)),
98                    }
99                }
100                Err(e) => {
101                    // Convert walkdir error to ExtractionError
102                    Some(Err(ExtractionError::Io(std::io::Error::other(format!(
103                        "walkdir error: {e}"
104                    )))))
105                }
106            }
107        })
108    }
109
110    /// Builds a `FilteredEntry` from a `walkdir::DirEntry`.
111    ///
112    /// Returns `Ok(None)` if the entry should be filtered out (e.g., exceeds
113    /// size limit).
114    fn build_filtered_entry(&self, entry: &walkdir::DirEntry) -> Result<Option<FilteredEntry>> {
115        let path = entry.path().to_path_buf();
116        let metadata = entry.metadata().map_err(|e| {
117            ExtractionError::Io(std::io::Error::other(format!(
118                "cannot read metadata for {}: {e}",
119                path.display()
120            )))
121        })?;
122
123        // Determine entry type
124        let entry_type = if metadata.is_symlink() {
125            let target = std::fs::read_link(&path).map_err(|e| {
126                ExtractionError::Io(std::io::Error::other(format!(
127                    "cannot read symlink target for {}: {e}",
128                    path.display()
129                )))
130            })?;
131            EntryType::Symlink { target }
132        } else if metadata.is_dir() {
133            EntryType::Directory
134        } else {
135            EntryType::File
136        };
137
138        // Check file size limit (only for regular files)
139        let size = get_file_size(&metadata);
140        if entry_type == EntryType::File
141            && let Some(max_size) = self.config.max_file_size
142            && size > max_size
143        {
144            return Ok(None); // Filter out
145        }
146
147        // Compute archive path
148        let archive_path = filters::compute_archive_path(&path, self.root, self.config)?;
149
150        Ok(Some(FilteredEntry {
151            path,
152            archive_path,
153            entry_type,
154            size,
155        }))
156    }
157}
158
159/// A filtered directory entry with computed archive path.
160///
161/// Represents a file, directory, or symlink that passed all filtering rules
162/// and is ready to be added to an archive.
163#[derive(Debug, Clone, PartialEq, Eq)]
164pub struct FilteredEntry {
165    /// Full filesystem path to the entry.
166    pub path: PathBuf,
167
168    /// Path to use in the archive (relative, with `strip_prefix` applied).
169    pub archive_path: PathBuf,
170
171    /// Type of entry (file, directory, or symlink).
172    pub entry_type: EntryType,
173
174    /// Size in bytes (0 for directories).
175    pub size: u64,
176}
177
178/// Type of directory entry.
179#[derive(Debug, Clone, PartialEq, Eq)]
180pub enum EntryType {
181    /// Regular file.
182    File,
183
184    /// Directory.
185    Directory,
186
187    /// Symbolic link with its target path.
188    Symlink {
189        /// Target of the symlink.
190        target: PathBuf,
191    },
192}
193
194/// Collects all entries from sources into a vector for single-pass processing.
195///
196/// This function performs a single directory traversal and collects all
197/// filtered entries into memory, avoiding the need to traverse the directory
198/// tree twice (once for counting, once for processing).
199///
200/// # Examples
201///
202/// ```no_run
203/// use exarch_core::creation::CreationConfig;
204/// use exarch_core::creation::walker::collect_entries;
205/// use std::path::Path;
206///
207/// let config = CreationConfig::default();
208/// let sources = [Path::new("./src")];
209/// let entries = collect_entries(&sources, &config)?;
210/// println!("Total entries: {}", entries.len());
211/// # Ok::<(), exarch_core::ExtractionError>(())
212/// ```
213///
214/// # Errors
215///
216/// Returns an error if:
217/// - Source path does not exist
218/// - Directory traversal fails
219/// - File metadata cannot be read
220pub fn collect_entries<P: AsRef<Path>>(
221    sources: &[P],
222    config: &CreationConfig,
223) -> Result<Vec<FilteredEntry>> {
224    let mut entries = Vec::new();
225
226    for source in sources {
227        let path = source.as_ref();
228
229        if !path.exists() {
230            return Err(ExtractionError::SourceNotFound {
231                path: path.to_path_buf(),
232            });
233        }
234
235        if path.is_dir() {
236            let walker = FilteredWalker::new(path, config);
237            for entry in walker.walk() {
238                entries.push(entry?);
239            }
240        } else {
241            // For single files, we need to create a FilteredEntry manually
242            let metadata = std::fs::metadata(path)?;
243            let size = if metadata.is_file() {
244                metadata.len()
245            } else {
246                0
247            };
248
249            let entry_type = if metadata.is_symlink() {
250                let target = std::fs::read_link(path)?;
251                EntryType::Symlink { target }
252            } else if metadata.is_dir() {
253                EntryType::Directory
254            } else {
255                EntryType::File
256            };
257
258            let archive_path = if let Some(parent) = path.parent() {
259                filters::compute_archive_path(path, parent, config)?
260            } else {
261                path.file_name()
262                    .ok_or_else(|| {
263                        ExtractionError::Io(std::io::Error::other(format!(
264                            "cannot determine filename for {}",
265                            path.display()
266                        )))
267                    })?
268                    .into()
269            };
270
271            entries.push(FilteredEntry {
272                path: path.to_path_buf(),
273                archive_path,
274                entry_type,
275                size,
276            });
277        }
278    }
279
280    Ok(entries)
281}
282
283/// Gets the file size from metadata in a cross-platform way.
284#[cfg(unix)]
285fn get_file_size(metadata: &Metadata) -> u64 {
286    use std::os::unix::fs::MetadataExt;
287    metadata.size()
288}
289
290#[cfg(not(unix))]
291fn get_file_size(metadata: &Metadata) -> u64 {
292    metadata.len()
293}
294
295#[cfg(test)]
296#[allow(clippy::unwrap_used)] // Allow unwrap in tests for brevity
297mod tests {
298    use super::*;
299    use std::fs;
300    use tempfile::TempDir;
301
302    #[test]
303    fn test_walker_basic_directory() {
304        let temp = TempDir::new().unwrap();
305        let root = temp.path();
306
307        // Create test structure
308        fs::write(root.join("file1.txt"), "content1").unwrap();
309        fs::write(root.join("file2.rs"), "content2").unwrap();
310        fs::create_dir(root.join("subdir")).unwrap();
311        fs::write(root.join("subdir/file3.txt"), "content3").unwrap();
312
313        let config = CreationConfig::default()
314            .with_include_hidden(true)
315            .with_exclude_patterns(vec![]);
316
317        let walker = FilteredWalker::new(root, &config);
318        let entries: Vec<_> = walker.walk().collect::<Result<Vec<_>>>().unwrap();
319
320        // Should find exactly: root dir, file1, file2, subdir, file3 = 5 entries
321        assert_eq!(entries.len(), 5, "expected exactly 5 entries");
322
323        let paths: Vec<_> = entries
324            .iter()
325            .map(|e| e.archive_path.to_str().unwrap())
326            .collect();
327
328        assert!(paths.iter().any(|p| p.contains("file1.txt")));
329        assert!(paths.iter().any(|p| p.contains("file2.rs")));
330        assert!(paths.iter().any(|p| p.contains("subdir")));
331        assert!(paths.iter().any(|p| p.contains("file3.txt")));
332    }
333
334    #[test]
335    fn test_walker_skips_hidden_files() {
336        let temp = TempDir::new().unwrap();
337        let root = temp.path();
338
339        fs::write(root.join("visible.txt"), "content").unwrap();
340        fs::write(root.join(".hidden"), "secret").unwrap();
341
342        let config = CreationConfig::default(); // include_hidden = false by default
343        let walker = FilteredWalker::new(root, &config);
344        let entries: Vec<_> = walker.walk().collect::<Result<Vec<_>>>().unwrap();
345
346        let paths: Vec<_> = entries
347            .iter()
348            .map(|e| e.archive_path.to_str().unwrap())
349            .collect();
350
351        assert!(paths.iter().any(|p| p.contains("visible.txt")));
352        assert!(!paths.iter().any(|p| p.contains(".hidden")));
353    }
354
355    #[test]
356    fn test_walker_includes_hidden_when_configured() {
357        let temp = TempDir::new().unwrap();
358        let root = temp.path();
359
360        fs::write(root.join("visible.txt"), "content").unwrap();
361        fs::write(root.join(".hidden"), "secret").unwrap();
362
363        let config = CreationConfig::default().with_include_hidden(true);
364        let walker = FilteredWalker::new(root, &config);
365        let entries: Vec<_> = walker.walk().collect::<Result<Vec<_>>>().unwrap();
366
367        let paths: Vec<_> = entries
368            .iter()
369            .map(|e| e.archive_path.to_str().unwrap())
370            .collect();
371
372        assert!(paths.iter().any(|p| p.contains("visible.txt")));
373        assert!(paths.iter().any(|p| p.contains(".hidden")));
374    }
375
376    #[test]
377    fn test_walker_skips_excluded_patterns() {
378        let temp = TempDir::new().unwrap();
379        let root = temp.path();
380
381        fs::write(root.join("keep.txt"), "keep").unwrap();
382        fs::write(root.join("skip.tmp"), "skip").unwrap();
383        fs::write(root.join("also.log"), "skip").unwrap();
384
385        let config = CreationConfig::default()
386            .with_exclude_patterns(vec!["*.tmp".to_string(), "*.log".to_string()]);
387
388        let walker = FilteredWalker::new(root, &config);
389        let entries: Vec<_> = walker.walk().collect::<Result<Vec<_>>>().unwrap();
390
391        let paths: Vec<_> = entries
392            .iter()
393            .map(|e| e.archive_path.to_str().unwrap())
394            .collect();
395
396        assert!(paths.iter().any(|p| p.contains("keep.txt")));
397        assert!(!paths.iter().any(|p| p.contains("skip.tmp")));
398        assert!(!paths.iter().any(|p| p.contains("also.log")));
399    }
400
401    #[cfg(unix)]
402    #[test]
403    fn test_walker_handles_symlinks() {
404        let temp = TempDir::new().unwrap();
405        let root = temp.path();
406
407        fs::write(root.join("target.txt"), "content").unwrap();
408        std::os::unix::fs::symlink(root.join("target.txt"), root.join("link.txt")).unwrap();
409
410        // Don't follow symlinks (default)
411        let config = CreationConfig::default();
412        let walker = FilteredWalker::new(root, &config);
413        let entries: Vec<_> = walker.walk().collect::<Result<Vec<_>>>().unwrap();
414
415        let link_entry = entries
416            .iter()
417            .find(|e| e.archive_path.to_str().unwrap().contains("link.txt"));
418
419        assert!(link_entry.is_some());
420        if let Some(entry) = link_entry {
421            assert!(matches!(entry.entry_type, EntryType::Symlink { .. }));
422        }
423    }
424
425    #[cfg(unix)]
426    #[test]
427    fn test_walker_detects_symlink_cycles() {
428        let temp = TempDir::new().unwrap();
429        let root = temp.path();
430
431        fs::create_dir(root.join("dir1")).unwrap();
432        fs::create_dir(root.join("dir1/dir2")).unwrap();
433
434        // Create symlink cycle: dir1/dir2/link -> dir1
435        std::os::unix::fs::symlink(root.join("dir1"), root.join("dir1/dir2/link")).unwrap();
436
437        // Follow symlinks - walkdir handles cycle detection
438        let config = CreationConfig::default().with_follow_symlinks(true);
439        let walker = FilteredWalker::new(root, &config);
440
441        // Collect all entries - should get an error for the cycle
442        let results: Vec<_> = walker.walk().collect();
443
444        // Should have some successful entries before hitting the cycle
445        let successes = results.iter().filter(|r| r.is_ok()).count();
446        assert!(successes > 0, "should have some entries before cycle");
447
448        // Should detect the cycle and return an error
449        let has_cycle_error = results.iter().any(|r| {
450            if let Err(e) = r {
451                e.to_string().contains("File system loop")
452                    || e.to_string().contains("walkdir error")
453            } else {
454                false
455            }
456        });
457        assert!(has_cycle_error, "should detect symlink cycle");
458    }
459
460    #[test]
461    fn test_walker_respects_max_file_size() {
462        let temp = TempDir::new().unwrap();
463        let root = temp.path();
464
465        fs::write(root.join("small.txt"), "tiny").unwrap(); // 4 bytes
466        fs::write(root.join("large.txt"), "a".repeat(1000)).unwrap(); // 1000 bytes
467
468        let config = CreationConfig::default().with_max_file_size(Some(100));
469
470        let walker = FilteredWalker::new(root, &config);
471        let entries: Vec<_> = walker.walk().collect::<Result<Vec<_>>>().unwrap();
472
473        let paths: Vec<_> = entries
474            .iter()
475            .map(|e| e.archive_path.to_str().unwrap())
476            .collect();
477
478        assert!(paths.iter().any(|p| p.contains("small.txt")));
479        assert!(!paths.iter().any(|p| p.contains("large.txt")));
480    }
481
482    #[test]
483    fn test_walker_computes_archive_paths() {
484        let temp = TempDir::new().unwrap();
485        let root = temp.path();
486
487        fs::create_dir(root.join("src")).unwrap();
488        fs::write(root.join("src/main.rs"), "code").unwrap();
489
490        let config = CreationConfig::default();
491        let walker = FilteredWalker::new(root, &config);
492        let entries: Vec<_> = walker.walk().collect::<Result<Vec<_>>>().unwrap();
493
494        let main_entry = entries
495            .iter()
496            .find(|e| e.archive_path.to_str().unwrap().contains("main.rs"));
497
498        assert!(main_entry.is_some());
499        if let Some(entry) = main_entry {
500            assert_eq!(entry.archive_path, Path::new("src/main.rs"));
501        }
502    }
503
504    #[test]
505    fn test_walker_strip_prefix() {
506        let temp = TempDir::new().unwrap();
507        let root = temp.path();
508
509        fs::create_dir(root.join("project")).unwrap();
510        fs::create_dir(root.join("project/src")).unwrap();
511        fs::write(root.join("project/src/main.rs"), "code").unwrap();
512
513        let config = CreationConfig::default().with_strip_prefix(Some(PathBuf::from("project")));
514
515        let walker = FilteredWalker::new(root, &config);
516        let entries: Vec<_> = walker.walk().collect::<Result<Vec<_>>>().unwrap();
517
518        let main_entry = entries
519            .iter()
520            .find(|e| e.archive_path.to_str().unwrap().contains("main.rs"));
521
522        assert!(main_entry.is_some());
523        if let Some(entry) = main_entry {
524            assert_eq!(entry.archive_path, Path::new("src/main.rs"));
525        }
526    }
527
528    #[test]
529    fn test_filtered_entry_file() {
530        let entry = FilteredEntry {
531            path: PathBuf::from("/tmp/file.txt"),
532            archive_path: PathBuf::from("file.txt"),
533            entry_type: EntryType::File,
534            size: 1024,
535        };
536
537        assert_eq!(entry.path, Path::new("/tmp/file.txt"));
538        assert_eq!(entry.archive_path, Path::new("file.txt"));
539        assert!(matches!(entry.entry_type, EntryType::File));
540        assert_eq!(entry.size, 1024);
541    }
542
543    #[test]
544    fn test_filtered_entry_directory() {
545        let entry = FilteredEntry {
546            path: PathBuf::from("/tmp/dir"),
547            archive_path: PathBuf::from("dir"),
548            entry_type: EntryType::Directory,
549            size: 0,
550        };
551
552        assert!(matches!(entry.entry_type, EntryType::Directory));
553        assert_eq!(entry.size, 0);
554    }
555
556    #[test]
557    fn test_filtered_entry_symlink() {
558        let entry = FilteredEntry {
559            path: PathBuf::from("/tmp/link"),
560            archive_path: PathBuf::from("link"),
561            entry_type: EntryType::Symlink {
562                target: PathBuf::from("target.txt"),
563            },
564            size: 0,
565        };
566
567        match &entry.entry_type {
568            EntryType::Symlink { target } => {
569                assert_eq!(target, Path::new("target.txt"));
570            }
571            _ => panic!("expected symlink"),
572        }
573    }
574
575    #[test]
576    fn test_entry_type_equality() {
577        assert_eq!(EntryType::File, EntryType::File);
578        assert_eq!(EntryType::Directory, EntryType::Directory);
579        assert_eq!(
580            EntryType::Symlink {
581                target: PathBuf::from("a")
582            },
583            EntryType::Symlink {
584                target: PathBuf::from("a")
585            }
586        );
587        assert_ne!(EntryType::File, EntryType::Directory);
588        assert_ne!(
589            EntryType::Symlink {
590                target: PathBuf::from("a")
591            },
592            EntryType::Symlink {
593                target: PathBuf::from("b")
594            }
595        );
596    }
597
598    #[test]
599    fn test_collect_entries_empty_sources() {
600        let config = CreationConfig::default();
601        let sources: Vec<&Path> = vec![];
602
603        let entries = collect_entries(&sources, &config).unwrap();
604
605        assert_eq!(entries.len(), 0);
606    }
607
608    #[test]
609    fn test_collect_entries_nonexistent_source() {
610        let config = CreationConfig::default();
611        let sources = [Path::new("/nonexistent/path/that/does/not/exist")];
612
613        let result = collect_entries(&sources, &config);
614
615        assert!(result.is_err());
616        assert!(matches!(
617            result.unwrap_err(),
618            ExtractionError::SourceNotFound { .. }
619        ));
620    }
621
622    #[test]
623    fn test_collect_entries_mixed_files_and_directories() {
624        let temp = TempDir::new().unwrap();
625        let root = temp.path();
626
627        // Create mixed structure
628        fs::write(root.join("single_file.txt"), "standalone").unwrap();
629        fs::create_dir(root.join("dir1")).unwrap();
630        fs::write(root.join("dir1/file1.txt"), "content1").unwrap();
631        fs::write(root.join("dir1/file2.txt"), "content2").unwrap();
632        fs::create_dir(root.join("dir2")).unwrap();
633        fs::write(root.join("dir2/file3.txt"), "content3").unwrap();
634
635        let config = CreationConfig::default().with_include_hidden(true);
636        let sources = [
637            root.join("single_file.txt"),
638            root.join("dir1"),
639            root.join("dir2"),
640        ];
641
642        let entries = collect_entries(&sources, &config).unwrap();
643
644        // Should have: single_file.txt (1) + dir1 entries (2 files + 1 dir = 3) + dir2
645        // entries (1 file + 1 dir = 2) = 6 total
646        assert!(
647            entries.len() >= 5,
648            "Expected at least 5 entries (files and dirs), got {}",
649            entries.len()
650        );
651
652        let paths: Vec<_> = entries
653            .iter()
654            .map(|e| e.archive_path.to_str().unwrap())
655            .collect();
656
657        assert!(paths.iter().any(|p| p.contains("single_file.txt")));
658        assert!(paths.iter().any(|p| p.contains("file1.txt")));
659        assert!(paths.iter().any(|p| p.contains("file2.txt")));
660        assert!(paths.iter().any(|p| p.contains("file3.txt")));
661    }
662
663    #[test]
664    fn test_collect_entries_large_directory_count() {
665        let temp = TempDir::new().unwrap();
666        let root = temp.path();
667
668        // Create a directory with known number of entries
669        for i in 0..50 {
670            fs::write(root.join(format!("file_{i}.txt")), format!("content {i}")).unwrap();
671        }
672        fs::create_dir(root.join("subdir")).unwrap();
673        for i in 0..30 {
674            fs::write(
675                root.join(format!("subdir/file_{i}.txt")),
676                format!("sub content {i}"),
677            )
678            .unwrap();
679        }
680
681        let config = CreationConfig::default().with_include_hidden(true);
682        let sources = [root];
683
684        let entries = collect_entries(&sources, &config).unwrap();
685
686        // Should have: 50 files in root + 1 subdir + 1 root dir + 30 files in subdir =
687        // 82
688        assert!(
689            entries.len() >= 80,
690            "Expected at least 80 entries, got {}",
691            entries.len()
692        );
693    }
694
695    #[test]
696    fn test_collect_entries_single_file() {
697        let temp = TempDir::new().unwrap();
698        let file_path = temp.path().join("test.txt");
699        fs::write(&file_path, "content").unwrap();
700
701        let config = CreationConfig::default();
702        let sources = [&file_path];
703
704        let entries = collect_entries(&sources, &config).unwrap();
705
706        assert_eq!(entries.len(), 1);
707        assert_eq!(entries[0].entry_type, EntryType::File);
708        assert!(
709            entries[0]
710                .archive_path
711                .to_str()
712                .unwrap()
713                .contains("test.txt")
714        );
715    }
716
717    #[test]
718    fn test_collect_entries_respects_filters() {
719        let temp = TempDir::new().unwrap();
720        let root = temp.path();
721
722        fs::write(root.join("keep.txt"), "keep").unwrap();
723        fs::write(root.join("skip.tmp"), "skip").unwrap();
724        fs::write(root.join(".hidden"), "hidden").unwrap();
725
726        let config = CreationConfig::default()
727            .with_exclude_patterns(vec!["*.tmp".to_string()])
728            .with_include_hidden(false);
729
730        let sources = [root];
731        let entries = collect_entries(&sources, &config).unwrap();
732
733        let paths: Vec<_> = entries
734            .iter()
735            .map(|e| e.archive_path.to_str().unwrap())
736            .collect();
737
738        assert!(paths.iter().any(|p| p.contains("keep.txt")));
739        assert!(!paths.iter().any(|p| p.contains("skip.tmp")));
740        assert!(!paths.iter().any(|p| p.contains(".hidden")));
741    }
742}