1use crate::ExtractionError;
8use crate::Result;
9use crate::creation::config::CreationConfig;
10use crate::creation::filters;
11use std::fs::Metadata;
12use std::path::Path;
13use std::path::PathBuf;
14use walkdir::WalkDir;
15
16pub struct FilteredWalker<'a> {
42 root: &'a Path,
43 config: &'a CreationConfig,
44}
45
46impl<'a> FilteredWalker<'a> {
47 #[must_use]
60 pub fn new(root: &'a Path, config: &'a CreationConfig) -> Self {
61 Self { root, config }
62 }
63
64 pub fn walk(&self) -> impl Iterator<Item = Result<FilteredEntry>> + '_ {
79 let walker = WalkDir::new(self.root)
80 .follow_links(self.config.follow_symlinks)
81 .into_iter();
82
83 walker.filter_map(move |entry| {
84 match entry {
85 Ok(entry) => {
86 let path = entry.path();
87
88 if filters::should_skip(path, self.config) {
90 return None;
91 }
92
93 match self.build_filtered_entry(&entry) {
95 Ok(Some(filtered)) => Some(Ok(filtered)),
96 Ok(None) => None, Err(e) => Some(Err(e)),
98 }
99 }
100 Err(e) => {
101 Some(Err(ExtractionError::Io(std::io::Error::other(format!(
103 "walkdir error: {e}"
104 )))))
105 }
106 }
107 })
108 }
109
110 fn build_filtered_entry(&self, entry: &walkdir::DirEntry) -> Result<Option<FilteredEntry>> {
115 let path = entry.path().to_path_buf();
116 let metadata = entry.metadata().map_err(|e| {
117 ExtractionError::Io(std::io::Error::other(format!(
118 "cannot read metadata for {}: {e}",
119 path.display()
120 )))
121 })?;
122
123 let entry_type = if metadata.is_symlink() {
125 let target = std::fs::read_link(&path).map_err(|e| {
126 ExtractionError::Io(std::io::Error::other(format!(
127 "cannot read symlink target for {}: {e}",
128 path.display()
129 )))
130 })?;
131 EntryType::Symlink { target }
132 } else if metadata.is_dir() {
133 EntryType::Directory
134 } else {
135 EntryType::File
136 };
137
138 let size = get_file_size(&metadata);
140 if entry_type == EntryType::File
141 && let Some(max_size) = self.config.max_file_size
142 && size > max_size
143 {
144 return Ok(None); }
146
147 let archive_path = filters::compute_archive_path(&path, self.root, self.config)?;
149
150 Ok(Some(FilteredEntry {
151 path,
152 archive_path,
153 entry_type,
154 size,
155 }))
156 }
157}
158
159#[derive(Debug, Clone, PartialEq, Eq)]
164pub struct FilteredEntry {
165 pub path: PathBuf,
167
168 pub archive_path: PathBuf,
170
171 pub entry_type: EntryType,
173
174 pub size: u64,
176}
177
178#[derive(Debug, Clone, PartialEq, Eq)]
180pub enum EntryType {
181 File,
183
184 Directory,
186
187 Symlink {
189 target: PathBuf,
191 },
192}
193
194pub fn collect_entries<P: AsRef<Path>>(
221 sources: &[P],
222 config: &CreationConfig,
223) -> Result<Vec<FilteredEntry>> {
224 let mut entries = Vec::new();
225
226 for source in sources {
227 let path = source.as_ref();
228
229 if !path.exists() {
230 return Err(ExtractionError::SourceNotFound {
231 path: path.to_path_buf(),
232 });
233 }
234
235 if path.is_dir() {
236 let walker = FilteredWalker::new(path, config);
237 for entry in walker.walk() {
238 entries.push(entry?);
239 }
240 } else {
241 let metadata = std::fs::metadata(path)?;
243 let size = if metadata.is_file() {
244 metadata.len()
245 } else {
246 0
247 };
248
249 let entry_type = if metadata.is_symlink() {
250 let target = std::fs::read_link(path)?;
251 EntryType::Symlink { target }
252 } else if metadata.is_dir() {
253 EntryType::Directory
254 } else {
255 EntryType::File
256 };
257
258 let archive_path = if let Some(parent) = path.parent() {
259 filters::compute_archive_path(path, parent, config)?
260 } else {
261 path.file_name()
262 .ok_or_else(|| {
263 ExtractionError::Io(std::io::Error::other(format!(
264 "cannot determine filename for {}",
265 path.display()
266 )))
267 })?
268 .into()
269 };
270
271 entries.push(FilteredEntry {
272 path: path.to_path_buf(),
273 archive_path,
274 entry_type,
275 size,
276 });
277 }
278 }
279
280 Ok(entries)
281}
282
283#[cfg(unix)]
285fn get_file_size(metadata: &Metadata) -> u64 {
286 use std::os::unix::fs::MetadataExt;
287 metadata.size()
288}
289
290#[cfg(not(unix))]
291fn get_file_size(metadata: &Metadata) -> u64 {
292 metadata.len()
293}
294
295#[cfg(test)]
296#[allow(clippy::unwrap_used)] mod tests {
298 use super::*;
299 use std::fs;
300 use tempfile::TempDir;
301
302 #[test]
303 fn test_walker_basic_directory() {
304 let temp = TempDir::new().unwrap();
305 let root = temp.path();
306
307 fs::write(root.join("file1.txt"), "content1").unwrap();
309 fs::write(root.join("file2.rs"), "content2").unwrap();
310 fs::create_dir(root.join("subdir")).unwrap();
311 fs::write(root.join("subdir/file3.txt"), "content3").unwrap();
312
313 let config = CreationConfig::default()
314 .with_include_hidden(true)
315 .with_exclude_patterns(vec![]);
316
317 let walker = FilteredWalker::new(root, &config);
318 let entries: Vec<_> = walker.walk().collect::<Result<Vec<_>>>().unwrap();
319
320 assert_eq!(entries.len(), 5, "expected exactly 5 entries");
322
323 let paths: Vec<_> = entries
324 .iter()
325 .map(|e| e.archive_path.to_str().unwrap())
326 .collect();
327
328 assert!(paths.iter().any(|p| p.contains("file1.txt")));
329 assert!(paths.iter().any(|p| p.contains("file2.rs")));
330 assert!(paths.iter().any(|p| p.contains("subdir")));
331 assert!(paths.iter().any(|p| p.contains("file3.txt")));
332 }
333
334 #[test]
335 fn test_walker_skips_hidden_files() {
336 let temp = TempDir::new().unwrap();
337 let root = temp.path();
338
339 fs::write(root.join("visible.txt"), "content").unwrap();
340 fs::write(root.join(".hidden"), "secret").unwrap();
341
342 let config = CreationConfig::default(); let walker = FilteredWalker::new(root, &config);
344 let entries: Vec<_> = walker.walk().collect::<Result<Vec<_>>>().unwrap();
345
346 let paths: Vec<_> = entries
347 .iter()
348 .map(|e| e.archive_path.to_str().unwrap())
349 .collect();
350
351 assert!(paths.iter().any(|p| p.contains("visible.txt")));
352 assert!(!paths.iter().any(|p| p.contains(".hidden")));
353 }
354
355 #[test]
356 fn test_walker_includes_hidden_when_configured() {
357 let temp = TempDir::new().unwrap();
358 let root = temp.path();
359
360 fs::write(root.join("visible.txt"), "content").unwrap();
361 fs::write(root.join(".hidden"), "secret").unwrap();
362
363 let config = CreationConfig::default().with_include_hidden(true);
364 let walker = FilteredWalker::new(root, &config);
365 let entries: Vec<_> = walker.walk().collect::<Result<Vec<_>>>().unwrap();
366
367 let paths: Vec<_> = entries
368 .iter()
369 .map(|e| e.archive_path.to_str().unwrap())
370 .collect();
371
372 assert!(paths.iter().any(|p| p.contains("visible.txt")));
373 assert!(paths.iter().any(|p| p.contains(".hidden")));
374 }
375
376 #[test]
377 fn test_walker_skips_excluded_patterns() {
378 let temp = TempDir::new().unwrap();
379 let root = temp.path();
380
381 fs::write(root.join("keep.txt"), "keep").unwrap();
382 fs::write(root.join("skip.tmp"), "skip").unwrap();
383 fs::write(root.join("also.log"), "skip").unwrap();
384
385 let config = CreationConfig::default()
386 .with_exclude_patterns(vec!["*.tmp".to_string(), "*.log".to_string()]);
387
388 let walker = FilteredWalker::new(root, &config);
389 let entries: Vec<_> = walker.walk().collect::<Result<Vec<_>>>().unwrap();
390
391 let paths: Vec<_> = entries
392 .iter()
393 .map(|e| e.archive_path.to_str().unwrap())
394 .collect();
395
396 assert!(paths.iter().any(|p| p.contains("keep.txt")));
397 assert!(!paths.iter().any(|p| p.contains("skip.tmp")));
398 assert!(!paths.iter().any(|p| p.contains("also.log")));
399 }
400
401 #[cfg(unix)]
402 #[test]
403 fn test_walker_handles_symlinks() {
404 let temp = TempDir::new().unwrap();
405 let root = temp.path();
406
407 fs::write(root.join("target.txt"), "content").unwrap();
408 std::os::unix::fs::symlink(root.join("target.txt"), root.join("link.txt")).unwrap();
409
410 let config = CreationConfig::default();
412 let walker = FilteredWalker::new(root, &config);
413 let entries: Vec<_> = walker.walk().collect::<Result<Vec<_>>>().unwrap();
414
415 let link_entry = entries
416 .iter()
417 .find(|e| e.archive_path.to_str().unwrap().contains("link.txt"));
418
419 assert!(link_entry.is_some());
420 if let Some(entry) = link_entry {
421 assert!(matches!(entry.entry_type, EntryType::Symlink { .. }));
422 }
423 }
424
425 #[cfg(unix)]
426 #[test]
427 fn test_walker_detects_symlink_cycles() {
428 let temp = TempDir::new().unwrap();
429 let root = temp.path();
430
431 fs::create_dir(root.join("dir1")).unwrap();
432 fs::create_dir(root.join("dir1/dir2")).unwrap();
433
434 std::os::unix::fs::symlink(root.join("dir1"), root.join("dir1/dir2/link")).unwrap();
436
437 let config = CreationConfig::default().with_follow_symlinks(true);
439 let walker = FilteredWalker::new(root, &config);
440
441 let results: Vec<_> = walker.walk().collect();
443
444 let successes = results.iter().filter(|r| r.is_ok()).count();
446 assert!(successes > 0, "should have some entries before cycle");
447
448 let has_cycle_error = results.iter().any(|r| {
450 if let Err(e) = r {
451 e.to_string().contains("File system loop")
452 || e.to_string().contains("walkdir error")
453 } else {
454 false
455 }
456 });
457 assert!(has_cycle_error, "should detect symlink cycle");
458 }
459
460 #[test]
461 fn test_walker_respects_max_file_size() {
462 let temp = TempDir::new().unwrap();
463 let root = temp.path();
464
465 fs::write(root.join("small.txt"), "tiny").unwrap(); fs::write(root.join("large.txt"), "a".repeat(1000)).unwrap(); let config = CreationConfig::default().with_max_file_size(Some(100));
469
470 let walker = FilteredWalker::new(root, &config);
471 let entries: Vec<_> = walker.walk().collect::<Result<Vec<_>>>().unwrap();
472
473 let paths: Vec<_> = entries
474 .iter()
475 .map(|e| e.archive_path.to_str().unwrap())
476 .collect();
477
478 assert!(paths.iter().any(|p| p.contains("small.txt")));
479 assert!(!paths.iter().any(|p| p.contains("large.txt")));
480 }
481
482 #[test]
483 fn test_walker_computes_archive_paths() {
484 let temp = TempDir::new().unwrap();
485 let root = temp.path();
486
487 fs::create_dir(root.join("src")).unwrap();
488 fs::write(root.join("src/main.rs"), "code").unwrap();
489
490 let config = CreationConfig::default();
491 let walker = FilteredWalker::new(root, &config);
492 let entries: Vec<_> = walker.walk().collect::<Result<Vec<_>>>().unwrap();
493
494 let main_entry = entries
495 .iter()
496 .find(|e| e.archive_path.to_str().unwrap().contains("main.rs"));
497
498 assert!(main_entry.is_some());
499 if let Some(entry) = main_entry {
500 assert_eq!(entry.archive_path, Path::new("src/main.rs"));
501 }
502 }
503
504 #[test]
505 fn test_walker_strip_prefix() {
506 let temp = TempDir::new().unwrap();
507 let root = temp.path();
508
509 fs::create_dir(root.join("project")).unwrap();
510 fs::create_dir(root.join("project/src")).unwrap();
511 fs::write(root.join("project/src/main.rs"), "code").unwrap();
512
513 let config = CreationConfig::default().with_strip_prefix(Some(PathBuf::from("project")));
514
515 let walker = FilteredWalker::new(root, &config);
516 let entries: Vec<_> = walker.walk().collect::<Result<Vec<_>>>().unwrap();
517
518 let main_entry = entries
519 .iter()
520 .find(|e| e.archive_path.to_str().unwrap().contains("main.rs"));
521
522 assert!(main_entry.is_some());
523 if let Some(entry) = main_entry {
524 assert_eq!(entry.archive_path, Path::new("src/main.rs"));
525 }
526 }
527
528 #[test]
529 fn test_filtered_entry_file() {
530 let entry = FilteredEntry {
531 path: PathBuf::from("/tmp/file.txt"),
532 archive_path: PathBuf::from("file.txt"),
533 entry_type: EntryType::File,
534 size: 1024,
535 };
536
537 assert_eq!(entry.path, Path::new("/tmp/file.txt"));
538 assert_eq!(entry.archive_path, Path::new("file.txt"));
539 assert!(matches!(entry.entry_type, EntryType::File));
540 assert_eq!(entry.size, 1024);
541 }
542
543 #[test]
544 fn test_filtered_entry_directory() {
545 let entry = FilteredEntry {
546 path: PathBuf::from("/tmp/dir"),
547 archive_path: PathBuf::from("dir"),
548 entry_type: EntryType::Directory,
549 size: 0,
550 };
551
552 assert!(matches!(entry.entry_type, EntryType::Directory));
553 assert_eq!(entry.size, 0);
554 }
555
556 #[test]
557 fn test_filtered_entry_symlink() {
558 let entry = FilteredEntry {
559 path: PathBuf::from("/tmp/link"),
560 archive_path: PathBuf::from("link"),
561 entry_type: EntryType::Symlink {
562 target: PathBuf::from("target.txt"),
563 },
564 size: 0,
565 };
566
567 match &entry.entry_type {
568 EntryType::Symlink { target } => {
569 assert_eq!(target, Path::new("target.txt"));
570 }
571 _ => panic!("expected symlink"),
572 }
573 }
574
575 #[test]
576 fn test_entry_type_equality() {
577 assert_eq!(EntryType::File, EntryType::File);
578 assert_eq!(EntryType::Directory, EntryType::Directory);
579 assert_eq!(
580 EntryType::Symlink {
581 target: PathBuf::from("a")
582 },
583 EntryType::Symlink {
584 target: PathBuf::from("a")
585 }
586 );
587 assert_ne!(EntryType::File, EntryType::Directory);
588 assert_ne!(
589 EntryType::Symlink {
590 target: PathBuf::from("a")
591 },
592 EntryType::Symlink {
593 target: PathBuf::from("b")
594 }
595 );
596 }
597
598 #[test]
599 fn test_collect_entries_empty_sources() {
600 let config = CreationConfig::default();
601 let sources: Vec<&Path> = vec![];
602
603 let entries = collect_entries(&sources, &config).unwrap();
604
605 assert_eq!(entries.len(), 0);
606 }
607
608 #[test]
609 fn test_collect_entries_nonexistent_source() {
610 let config = CreationConfig::default();
611 let sources = [Path::new("/nonexistent/path/that/does/not/exist")];
612
613 let result = collect_entries(&sources, &config);
614
615 assert!(result.is_err());
616 assert!(matches!(
617 result.unwrap_err(),
618 ExtractionError::SourceNotFound { .. }
619 ));
620 }
621
622 #[test]
623 fn test_collect_entries_mixed_files_and_directories() {
624 let temp = TempDir::new().unwrap();
625 let root = temp.path();
626
627 fs::write(root.join("single_file.txt"), "standalone").unwrap();
629 fs::create_dir(root.join("dir1")).unwrap();
630 fs::write(root.join("dir1/file1.txt"), "content1").unwrap();
631 fs::write(root.join("dir1/file2.txt"), "content2").unwrap();
632 fs::create_dir(root.join("dir2")).unwrap();
633 fs::write(root.join("dir2/file3.txt"), "content3").unwrap();
634
635 let config = CreationConfig::default().with_include_hidden(true);
636 let sources = [
637 root.join("single_file.txt"),
638 root.join("dir1"),
639 root.join("dir2"),
640 ];
641
642 let entries = collect_entries(&sources, &config).unwrap();
643
644 assert!(
647 entries.len() >= 5,
648 "Expected at least 5 entries (files and dirs), got {}",
649 entries.len()
650 );
651
652 let paths: Vec<_> = entries
653 .iter()
654 .map(|e| e.archive_path.to_str().unwrap())
655 .collect();
656
657 assert!(paths.iter().any(|p| p.contains("single_file.txt")));
658 assert!(paths.iter().any(|p| p.contains("file1.txt")));
659 assert!(paths.iter().any(|p| p.contains("file2.txt")));
660 assert!(paths.iter().any(|p| p.contains("file3.txt")));
661 }
662
663 #[test]
664 fn test_collect_entries_large_directory_count() {
665 let temp = TempDir::new().unwrap();
666 let root = temp.path();
667
668 for i in 0..50 {
670 fs::write(root.join(format!("file_{i}.txt")), format!("content {i}")).unwrap();
671 }
672 fs::create_dir(root.join("subdir")).unwrap();
673 for i in 0..30 {
674 fs::write(
675 root.join(format!("subdir/file_{i}.txt")),
676 format!("sub content {i}"),
677 )
678 .unwrap();
679 }
680
681 let config = CreationConfig::default().with_include_hidden(true);
682 let sources = [root];
683
684 let entries = collect_entries(&sources, &config).unwrap();
685
686 assert!(
689 entries.len() >= 80,
690 "Expected at least 80 entries, got {}",
691 entries.len()
692 );
693 }
694
695 #[test]
696 fn test_collect_entries_single_file() {
697 let temp = TempDir::new().unwrap();
698 let file_path = temp.path().join("test.txt");
699 fs::write(&file_path, "content").unwrap();
700
701 let config = CreationConfig::default();
702 let sources = [&file_path];
703
704 let entries = collect_entries(&sources, &config).unwrap();
705
706 assert_eq!(entries.len(), 1);
707 assert_eq!(entries[0].entry_type, EntryType::File);
708 assert!(
709 entries[0]
710 .archive_path
711 .to_str()
712 .unwrap()
713 .contains("test.txt")
714 );
715 }
716
717 #[test]
718 fn test_collect_entries_respects_filters() {
719 let temp = TempDir::new().unwrap();
720 let root = temp.path();
721
722 fs::write(root.join("keep.txt"), "keep").unwrap();
723 fs::write(root.join("skip.tmp"), "skip").unwrap();
724 fs::write(root.join(".hidden"), "hidden").unwrap();
725
726 let config = CreationConfig::default()
727 .with_exclude_patterns(vec!["*.tmp".to_string()])
728 .with_include_hidden(false);
729
730 let sources = [root];
731 let entries = collect_entries(&sources, &config).unwrap();
732
733 let paths: Vec<_> = entries
734 .iter()
735 .map(|e| e.archive_path.to_str().unwrap())
736 .collect();
737
738 assert!(paths.iter().any(|p| p.contains("keep.txt")));
739 assert!(!paths.iter().any(|p| p.contains("skip.tmp")));
740 assert!(!paths.iter().any(|p| p.contains(".hidden")));
741 }
742}