1use crate::ExtractionError;
8use crate::Result;
9use crate::creation::config::CreationConfig;
10use crate::creation::filters;
11use std::fs::Metadata;
12use std::path::Path;
13use std::path::PathBuf;
14use walkdir::WalkDir;
15
16pub struct FilteredWalker<'a> {
42 root: &'a Path,
43 config: &'a CreationConfig,
44}
45
46impl<'a> FilteredWalker<'a> {
47 #[must_use]
60 pub fn new(root: &'a Path, config: &'a CreationConfig) -> Self {
61 Self { root, config }
62 }
63
64 pub fn walk(&self) -> impl Iterator<Item = Result<FilteredEntry>> + '_ {
79 let walker = WalkDir::new(self.root)
80 .follow_links(self.config.follow_symlinks)
81 .into_iter();
82
83 walker.filter_map(move |entry| {
84 match entry {
85 Ok(entry) => {
86 let path = entry.path();
87
88 if filters::should_skip(path, self.config) {
90 return None;
91 }
92
93 match self.build_filtered_entry(&entry) {
95 Ok(Some(filtered)) => Some(Ok(filtered)),
96 Ok(None) => None, Err(e) => Some(Err(e)),
98 }
99 }
100 Err(e) => {
101 Some(Err(ExtractionError::Io(std::io::Error::other(format!(
103 "walkdir error: {e}"
104 )))))
105 }
106 }
107 })
108 }
109
110 fn build_filtered_entry(&self, entry: &walkdir::DirEntry) -> Result<Option<FilteredEntry>> {
115 let path = entry.path().to_path_buf();
116 let metadata = entry.metadata().map_err(|e| {
117 ExtractionError::Io(std::io::Error::other(format!(
118 "cannot read metadata for {}: {e}",
119 path.display()
120 )))
121 })?;
122
123 let entry_type = if metadata.is_symlink() {
125 let target = std::fs::read_link(&path).map_err(|e| {
126 ExtractionError::Io(std::io::Error::other(format!(
127 "cannot read symlink target for {}: {e}",
128 path.display()
129 )))
130 })?;
131 EntryType::Symlink { target }
132 } else if metadata.is_dir() {
133 EntryType::Directory
134 } else {
135 EntryType::File
136 };
137
138 let size = get_file_size(&metadata);
140 if entry_type == EntryType::File
141 && let Some(max_size) = self.config.max_file_size
142 && size > max_size
143 {
144 return Ok(None); }
146
147 let archive_path = filters::compute_archive_path(&path, self.root, self.config)?;
149
150 Ok(Some(FilteredEntry {
151 path,
152 archive_path,
153 entry_type,
154 size,
155 }))
156 }
157}
158
159#[derive(Debug, Clone, PartialEq, Eq)]
164pub struct FilteredEntry {
165 pub path: PathBuf,
167
168 pub archive_path: PathBuf,
170
171 pub entry_type: EntryType,
173
174 pub size: u64,
176}
177
178#[derive(Debug, Clone, PartialEq, Eq)]
180pub enum EntryType {
181 File,
183
184 Directory,
186
187 Symlink {
189 target: PathBuf,
191 },
192}
193
194pub fn collect_entries<P: AsRef<Path>>(
221 sources: &[P],
222 config: &CreationConfig,
223) -> Result<Vec<FilteredEntry>> {
224 let mut entries = Vec::new();
225
226 for source in sources {
227 let path = source.as_ref();
228
229 if !path.exists() {
230 return Err(ExtractionError::SourceNotFound {
231 path: path.to_path_buf(),
232 });
233 }
234
235 if path.is_dir() {
236 let walker = FilteredWalker::new(path, config);
237 for entry in walker.walk() {
238 entries.push(entry?);
239 }
240 } else {
241 let metadata = std::fs::metadata(path)?;
244 let size = if metadata.is_file() {
245 metadata.len()
246 } else {
247 0
248 };
249
250 let entry_type = if metadata.is_symlink() {
251 let target = std::fs::read_link(path)?;
252 EntryType::Symlink { target }
253 } else if metadata.is_dir() {
254 EntryType::Directory
255 } else {
256 EntryType::File
257 };
258
259 let archive_path = if let Some(parent) = path.parent() {
261 filters::compute_archive_path(path, parent, config)?
262 } else {
263 path.file_name()
264 .ok_or_else(|| {
265 ExtractionError::Io(std::io::Error::other(format!(
266 "cannot determine filename for {}",
267 path.display()
268 )))
269 })?
270 .into()
271 };
272
273 entries.push(FilteredEntry {
274 path: path.to_path_buf(),
275 archive_path,
276 entry_type,
277 size,
278 });
279 }
280 }
281
282 Ok(entries)
283}
284
285#[cfg(unix)]
287fn get_file_size(metadata: &Metadata) -> u64 {
288 use std::os::unix::fs::MetadataExt;
289 metadata.size()
290}
291
292#[cfg(not(unix))]
293fn get_file_size(metadata: &Metadata) -> u64 {
294 metadata.len()
295}
296
297#[cfg(test)]
298#[allow(clippy::unwrap_used)] mod tests {
300 use super::*;
301 use std::fs;
302 use tempfile::TempDir;
303
304 #[test]
305 fn test_walker_basic_directory() {
306 let temp = TempDir::new().unwrap();
307 let root = temp.path();
308
309 fs::write(root.join("file1.txt"), "content1").unwrap();
311 fs::write(root.join("file2.rs"), "content2").unwrap();
312 fs::create_dir(root.join("subdir")).unwrap();
313 fs::write(root.join("subdir/file3.txt"), "content3").unwrap();
314
315 let config = CreationConfig::default()
316 .with_include_hidden(true)
317 .with_exclude_patterns(vec![]);
318
319 let walker = FilteredWalker::new(root, &config);
320 let entries: Vec<_> = walker.walk().collect::<Result<Vec<_>>>().unwrap();
321
322 assert_eq!(entries.len(), 5, "expected exactly 5 entries");
324
325 let paths: Vec<_> = entries
326 .iter()
327 .map(|e| e.archive_path.to_str().unwrap())
328 .collect();
329
330 assert!(paths.iter().any(|p| p.contains("file1.txt")));
331 assert!(paths.iter().any(|p| p.contains("file2.rs")));
332 assert!(paths.iter().any(|p| p.contains("subdir")));
333 assert!(paths.iter().any(|p| p.contains("file3.txt")));
334 }
335
336 #[test]
337 fn test_walker_skips_hidden_files() {
338 let temp = TempDir::new().unwrap();
339 let root = temp.path();
340
341 fs::write(root.join("visible.txt"), "content").unwrap();
342 fs::write(root.join(".hidden"), "secret").unwrap();
343
344 let config = CreationConfig::default(); let walker = FilteredWalker::new(root, &config);
346 let entries: Vec<_> = walker.walk().collect::<Result<Vec<_>>>().unwrap();
347
348 let paths: Vec<_> = entries
349 .iter()
350 .map(|e| e.archive_path.to_str().unwrap())
351 .collect();
352
353 assert!(paths.iter().any(|p| p.contains("visible.txt")));
354 assert!(!paths.iter().any(|p| p.contains(".hidden")));
355 }
356
357 #[test]
358 fn test_walker_includes_hidden_when_configured() {
359 let temp = TempDir::new().unwrap();
360 let root = temp.path();
361
362 fs::write(root.join("visible.txt"), "content").unwrap();
363 fs::write(root.join(".hidden"), "secret").unwrap();
364
365 let config = CreationConfig::default().with_include_hidden(true);
366 let walker = FilteredWalker::new(root, &config);
367 let entries: Vec<_> = walker.walk().collect::<Result<Vec<_>>>().unwrap();
368
369 let paths: Vec<_> = entries
370 .iter()
371 .map(|e| e.archive_path.to_str().unwrap())
372 .collect();
373
374 assert!(paths.iter().any(|p| p.contains("visible.txt")));
375 assert!(paths.iter().any(|p| p.contains(".hidden")));
376 }
377
378 #[test]
379 fn test_walker_skips_excluded_patterns() {
380 let temp = TempDir::new().unwrap();
381 let root = temp.path();
382
383 fs::write(root.join("keep.txt"), "keep").unwrap();
384 fs::write(root.join("skip.tmp"), "skip").unwrap();
385 fs::write(root.join("also.log"), "skip").unwrap();
386
387 let config = CreationConfig::default()
388 .with_exclude_patterns(vec!["*.tmp".to_string(), "*.log".to_string()]);
389
390 let walker = FilteredWalker::new(root, &config);
391 let entries: Vec<_> = walker.walk().collect::<Result<Vec<_>>>().unwrap();
392
393 let paths: Vec<_> = entries
394 .iter()
395 .map(|e| e.archive_path.to_str().unwrap())
396 .collect();
397
398 assert!(paths.iter().any(|p| p.contains("keep.txt")));
399 assert!(!paths.iter().any(|p| p.contains("skip.tmp")));
400 assert!(!paths.iter().any(|p| p.contains("also.log")));
401 }
402
403 #[cfg(unix)]
404 #[test]
405 fn test_walker_handles_symlinks() {
406 let temp = TempDir::new().unwrap();
407 let root = temp.path();
408
409 fs::write(root.join("target.txt"), "content").unwrap();
410 std::os::unix::fs::symlink(root.join("target.txt"), root.join("link.txt")).unwrap();
411
412 let config = CreationConfig::default();
414 let walker = FilteredWalker::new(root, &config);
415 let entries: Vec<_> = walker.walk().collect::<Result<Vec<_>>>().unwrap();
416
417 let link_entry = entries
418 .iter()
419 .find(|e| e.archive_path.to_str().unwrap().contains("link.txt"));
420
421 assert!(link_entry.is_some());
422 if let Some(entry) = link_entry {
423 assert!(matches!(entry.entry_type, EntryType::Symlink { .. }));
424 }
425 }
426
427 #[cfg(unix)]
428 #[test]
429 fn test_walker_detects_symlink_cycles() {
430 let temp = TempDir::new().unwrap();
431 let root = temp.path();
432
433 fs::create_dir(root.join("dir1")).unwrap();
434 fs::create_dir(root.join("dir1/dir2")).unwrap();
435
436 std::os::unix::fs::symlink(root.join("dir1"), root.join("dir1/dir2/link")).unwrap();
438
439 let config = CreationConfig::default().with_follow_symlinks(true);
441 let walker = FilteredWalker::new(root, &config);
442
443 let results: Vec<_> = walker.walk().collect();
445
446 let successes = results.iter().filter(|r| r.is_ok()).count();
448 assert!(successes > 0, "should have some entries before cycle");
449
450 let has_cycle_error = results.iter().any(|r| {
452 if let Err(e) = r {
453 e.to_string().contains("File system loop")
454 || e.to_string().contains("walkdir error")
455 } else {
456 false
457 }
458 });
459 assert!(has_cycle_error, "should detect symlink cycle");
460 }
461
462 #[test]
463 fn test_walker_respects_max_file_size() {
464 let temp = TempDir::new().unwrap();
465 let root = temp.path();
466
467 fs::write(root.join("small.txt"), "tiny").unwrap(); fs::write(root.join("large.txt"), "a".repeat(1000)).unwrap(); let config = CreationConfig::default().with_max_file_size(Some(100));
471
472 let walker = FilteredWalker::new(root, &config);
473 let entries: Vec<_> = walker.walk().collect::<Result<Vec<_>>>().unwrap();
474
475 let paths: Vec<_> = entries
476 .iter()
477 .map(|e| e.archive_path.to_str().unwrap())
478 .collect();
479
480 assert!(paths.iter().any(|p| p.contains("small.txt")));
481 assert!(!paths.iter().any(|p| p.contains("large.txt")));
482 }
483
484 #[test]
485 fn test_walker_computes_archive_paths() {
486 let temp = TempDir::new().unwrap();
487 let root = temp.path();
488
489 fs::create_dir(root.join("src")).unwrap();
490 fs::write(root.join("src/main.rs"), "code").unwrap();
491
492 let config = CreationConfig::default();
493 let walker = FilteredWalker::new(root, &config);
494 let entries: Vec<_> = walker.walk().collect::<Result<Vec<_>>>().unwrap();
495
496 let main_entry = entries
497 .iter()
498 .find(|e| e.archive_path.to_str().unwrap().contains("main.rs"));
499
500 assert!(main_entry.is_some());
501 if let Some(entry) = main_entry {
502 assert_eq!(entry.archive_path, Path::new("src/main.rs"));
503 }
504 }
505
506 #[test]
507 fn test_walker_strip_prefix() {
508 let temp = TempDir::new().unwrap();
509 let root = temp.path();
510
511 fs::create_dir(root.join("project")).unwrap();
512 fs::create_dir(root.join("project/src")).unwrap();
513 fs::write(root.join("project/src/main.rs"), "code").unwrap();
514
515 let config = CreationConfig::default().with_strip_prefix(Some(PathBuf::from("project")));
516
517 let walker = FilteredWalker::new(root, &config);
518 let entries: Vec<_> = walker.walk().collect::<Result<Vec<_>>>().unwrap();
519
520 let main_entry = entries
521 .iter()
522 .find(|e| e.archive_path.to_str().unwrap().contains("main.rs"));
523
524 assert!(main_entry.is_some());
525 if let Some(entry) = main_entry {
526 assert_eq!(entry.archive_path, Path::new("src/main.rs"));
527 }
528 }
529
530 #[test]
531 fn test_filtered_entry_file() {
532 let entry = FilteredEntry {
533 path: PathBuf::from("/tmp/file.txt"),
534 archive_path: PathBuf::from("file.txt"),
535 entry_type: EntryType::File,
536 size: 1024,
537 };
538
539 assert_eq!(entry.path, Path::new("/tmp/file.txt"));
540 assert_eq!(entry.archive_path, Path::new("file.txt"));
541 assert!(matches!(entry.entry_type, EntryType::File));
542 assert_eq!(entry.size, 1024);
543 }
544
545 #[test]
546 fn test_filtered_entry_directory() {
547 let entry = FilteredEntry {
548 path: PathBuf::from("/tmp/dir"),
549 archive_path: PathBuf::from("dir"),
550 entry_type: EntryType::Directory,
551 size: 0,
552 };
553
554 assert!(matches!(entry.entry_type, EntryType::Directory));
555 assert_eq!(entry.size, 0);
556 }
557
558 #[test]
559 fn test_filtered_entry_symlink() {
560 let entry = FilteredEntry {
561 path: PathBuf::from("/tmp/link"),
562 archive_path: PathBuf::from("link"),
563 entry_type: EntryType::Symlink {
564 target: PathBuf::from("target.txt"),
565 },
566 size: 0,
567 };
568
569 match &entry.entry_type {
570 EntryType::Symlink { target } => {
571 assert_eq!(target, Path::new("target.txt"));
572 }
573 _ => panic!("expected symlink"),
574 }
575 }
576
577 #[test]
578 fn test_entry_type_equality() {
579 assert_eq!(EntryType::File, EntryType::File);
580 assert_eq!(EntryType::Directory, EntryType::Directory);
581 assert_eq!(
582 EntryType::Symlink {
583 target: PathBuf::from("a")
584 },
585 EntryType::Symlink {
586 target: PathBuf::from("a")
587 }
588 );
589 assert_ne!(EntryType::File, EntryType::Directory);
590 assert_ne!(
591 EntryType::Symlink {
592 target: PathBuf::from("a")
593 },
594 EntryType::Symlink {
595 target: PathBuf::from("b")
596 }
597 );
598 }
599
600 #[test]
601 fn test_collect_entries_empty_sources() {
602 let config = CreationConfig::default();
603 let sources: Vec<&Path> = vec![];
604
605 let entries = collect_entries(&sources, &config).unwrap();
606
607 assert_eq!(entries.len(), 0);
608 }
609
610 #[test]
611 fn test_collect_entries_nonexistent_source() {
612 let config = CreationConfig::default();
613 let sources = [Path::new("/nonexistent/path/that/does/not/exist")];
614
615 let result = collect_entries(&sources, &config);
616
617 assert!(result.is_err());
618 assert!(matches!(
619 result.unwrap_err(),
620 ExtractionError::SourceNotFound { .. }
621 ));
622 }
623
624 #[test]
625 fn test_collect_entries_mixed_files_and_directories() {
626 let temp = TempDir::new().unwrap();
627 let root = temp.path();
628
629 fs::write(root.join("single_file.txt"), "standalone").unwrap();
631 fs::create_dir(root.join("dir1")).unwrap();
632 fs::write(root.join("dir1/file1.txt"), "content1").unwrap();
633 fs::write(root.join("dir1/file2.txt"), "content2").unwrap();
634 fs::create_dir(root.join("dir2")).unwrap();
635 fs::write(root.join("dir2/file3.txt"), "content3").unwrap();
636
637 let config = CreationConfig::default().with_include_hidden(true);
638 let sources = [
639 root.join("single_file.txt"),
640 root.join("dir1"),
641 root.join("dir2"),
642 ];
643
644 let entries = collect_entries(&sources, &config).unwrap();
645
646 assert!(
649 entries.len() >= 5,
650 "Expected at least 5 entries (files and dirs), got {}",
651 entries.len()
652 );
653
654 let paths: Vec<_> = entries
655 .iter()
656 .map(|e| e.archive_path.to_str().unwrap())
657 .collect();
658
659 assert!(paths.iter().any(|p| p.contains("single_file.txt")));
660 assert!(paths.iter().any(|p| p.contains("file1.txt")));
661 assert!(paths.iter().any(|p| p.contains("file2.txt")));
662 assert!(paths.iter().any(|p| p.contains("file3.txt")));
663 }
664
665 #[test]
666 fn test_collect_entries_large_directory_count() {
667 let temp = TempDir::new().unwrap();
668 let root = temp.path();
669
670 for i in 0..50 {
672 fs::write(root.join(format!("file_{i}.txt")), format!("content {i}")).unwrap();
673 }
674 fs::create_dir(root.join("subdir")).unwrap();
675 for i in 0..30 {
676 fs::write(
677 root.join(format!("subdir/file_{i}.txt")),
678 format!("sub content {i}"),
679 )
680 .unwrap();
681 }
682
683 let config = CreationConfig::default().with_include_hidden(true);
684 let sources = [root];
685
686 let entries = collect_entries(&sources, &config).unwrap();
687
688 assert!(
691 entries.len() >= 80,
692 "Expected at least 80 entries, got {}",
693 entries.len()
694 );
695 }
696
697 #[test]
698 fn test_collect_entries_single_file() {
699 let temp = TempDir::new().unwrap();
700 let file_path = temp.path().join("test.txt");
701 fs::write(&file_path, "content").unwrap();
702
703 let config = CreationConfig::default();
704 let sources = [&file_path];
705
706 let entries = collect_entries(&sources, &config).unwrap();
707
708 assert_eq!(entries.len(), 1);
709 assert_eq!(entries[0].entry_type, EntryType::File);
710 assert!(
711 entries[0]
712 .archive_path
713 .to_str()
714 .unwrap()
715 .contains("test.txt")
716 );
717 }
718
719 #[test]
720 fn test_collect_entries_respects_filters() {
721 let temp = TempDir::new().unwrap();
722 let root = temp.path();
723
724 fs::write(root.join("keep.txt"), "keep").unwrap();
725 fs::write(root.join("skip.tmp"), "skip").unwrap();
726 fs::write(root.join(".hidden"), "hidden").unwrap();
727
728 let config = CreationConfig::default()
729 .with_exclude_patterns(vec!["*.tmp".to_string()])
730 .with_include_hidden(false);
731
732 let sources = [root];
733 let entries = collect_entries(&sources, &config).unwrap();
734
735 let paths: Vec<_> = entries
736 .iter()
737 .map(|e| e.archive_path.to_str().unwrap())
738 .collect();
739
740 assert!(paths.iter().any(|p| p.contains("keep.txt")));
741 assert!(!paths.iter().any(|p| p.contains("skip.tmp")));
742 assert!(!paths.iter().any(|p| p.contains(".hidden")));
743 }
744}