use crate::ExtractionError;
use crate::Result;
use crate::creation::config::CreationConfig;
use crate::creation::filters;
use std::fs::Metadata;
use std::path::Path;
use std::path::PathBuf;
use walkdir::WalkDir;
pub struct FilteredWalker<'a> {
root: &'a Path,
config: &'a CreationConfig,
}
impl<'a> FilteredWalker<'a> {
#[must_use]
pub fn new(root: &'a Path, config: &'a CreationConfig) -> Self {
Self { root, config }
}
pub fn walk(&self) -> impl Iterator<Item = Result<FilteredEntry>> + '_ {
let walker = WalkDir::new(self.root)
.follow_links(self.config.follow_symlinks)
.into_iter();
walker.filter_map(move |entry| {
match entry {
Ok(entry) => {
let path = entry.path();
if filters::should_skip(path, self.config) {
return None;
}
match self.build_filtered_entry(&entry) {
Ok(Some(filtered)) => Some(Ok(filtered)),
Ok(None) => None, Err(e) => Some(Err(e)),
}
}
Err(e) => {
Some(Err(ExtractionError::Io(std::io::Error::other(format!(
"walkdir error: {e}"
)))))
}
}
})
}
fn build_filtered_entry(&self, entry: &walkdir::DirEntry) -> Result<Option<FilteredEntry>> {
let path = entry.path().to_path_buf();
let metadata = entry.metadata().map_err(|e| {
ExtractionError::Io(std::io::Error::other(format!(
"cannot read metadata for {}: {e}",
path.display()
)))
})?;
let entry_type = if metadata.is_symlink() {
let target = std::fs::read_link(&path).map_err(|e| {
ExtractionError::Io(std::io::Error::other(format!(
"cannot read symlink target for {}: {e}",
path.display()
)))
})?;
EntryType::Symlink { target }
} else if metadata.is_dir() {
EntryType::Directory
} else {
EntryType::File
};
let size = get_file_size(&metadata);
if entry_type == EntryType::File
&& let Some(max_size) = self.config.max_file_size
&& size > max_size
{
return Ok(None); }
let archive_path = filters::compute_archive_path(&path, self.root, self.config)?;
Ok(Some(FilteredEntry {
path,
archive_path,
entry_type,
size,
}))
}
}
#[derive(Debug, Clone, PartialEq, Eq)]
pub struct FilteredEntry {
pub path: PathBuf,
pub archive_path: PathBuf,
pub entry_type: EntryType,
pub size: u64,
}
#[derive(Debug, Clone, PartialEq, Eq)]
pub enum EntryType {
File,
Directory,
Symlink {
target: PathBuf,
},
}
pub fn collect_entries<P: AsRef<Path>>(
sources: &[P],
config: &CreationConfig,
) -> Result<Vec<FilteredEntry>> {
let mut entries = Vec::new();
for source in sources {
let path = source.as_ref();
if !path.exists() {
return Err(ExtractionError::SourceNotFound {
path: path.to_path_buf(),
});
}
if path.is_dir() {
let walker = FilteredWalker::new(path, config);
for entry in walker.walk() {
entries.push(entry?);
}
} else {
let metadata = std::fs::metadata(path)?;
let size = if metadata.is_file() {
metadata.len()
} else {
0
};
let entry_type = if metadata.is_symlink() {
let target = std::fs::read_link(path)?;
EntryType::Symlink { target }
} else if metadata.is_dir() {
EntryType::Directory
} else {
EntryType::File
};
let archive_path = if let Some(parent) = path.parent() {
filters::compute_archive_path(path, parent, config)?
} else {
path.file_name()
.ok_or_else(|| {
ExtractionError::Io(std::io::Error::other(format!(
"cannot determine filename for {}",
path.display()
)))
})?
.into()
};
entries.push(FilteredEntry {
path: path.to_path_buf(),
archive_path,
entry_type,
size,
});
}
}
Ok(entries)
}
#[cfg(unix)]
fn get_file_size(metadata: &Metadata) -> u64 {
use std::os::unix::fs::MetadataExt;
metadata.size()
}
#[cfg(not(unix))]
fn get_file_size(metadata: &Metadata) -> u64 {
metadata.len()
}
#[cfg(test)]
#[allow(clippy::unwrap_used)] mod tests {
use super::*;
use std::fs;
use tempfile::TempDir;
#[test]
fn test_walker_basic_directory() {
let temp = TempDir::new().unwrap();
let root = temp.path();
fs::write(root.join("file1.txt"), "content1").unwrap();
fs::write(root.join("file2.rs"), "content2").unwrap();
fs::create_dir(root.join("subdir")).unwrap();
fs::write(root.join("subdir/file3.txt"), "content3").unwrap();
let config = CreationConfig::default()
.with_include_hidden(true)
.with_exclude_patterns(vec![]);
let walker = FilteredWalker::new(root, &config);
let entries: Vec<_> = walker.walk().collect::<Result<Vec<_>>>().unwrap();
assert_eq!(entries.len(), 5, "expected exactly 5 entries");
let paths: Vec<_> = entries
.iter()
.map(|e| e.archive_path.to_str().unwrap())
.collect();
assert!(paths.iter().any(|p| p.contains("file1.txt")));
assert!(paths.iter().any(|p| p.contains("file2.rs")));
assert!(paths.iter().any(|p| p.contains("subdir")));
assert!(paths.iter().any(|p| p.contains("file3.txt")));
}
#[test]
fn test_walker_skips_hidden_files() {
let temp = TempDir::new().unwrap();
let root = temp.path();
fs::write(root.join("visible.txt"), "content").unwrap();
fs::write(root.join(".hidden"), "secret").unwrap();
let config = CreationConfig::default(); let walker = FilteredWalker::new(root, &config);
let entries: Vec<_> = walker.walk().collect::<Result<Vec<_>>>().unwrap();
let paths: Vec<_> = entries
.iter()
.map(|e| e.archive_path.to_str().unwrap())
.collect();
assert!(paths.iter().any(|p| p.contains("visible.txt")));
assert!(!paths.iter().any(|p| p.contains(".hidden")));
}
#[test]
fn test_walker_includes_hidden_when_configured() {
let temp = TempDir::new().unwrap();
let root = temp.path();
fs::write(root.join("visible.txt"), "content").unwrap();
fs::write(root.join(".hidden"), "secret").unwrap();
let config = CreationConfig::default().with_include_hidden(true);
let walker = FilteredWalker::new(root, &config);
let entries: Vec<_> = walker.walk().collect::<Result<Vec<_>>>().unwrap();
let paths: Vec<_> = entries
.iter()
.map(|e| e.archive_path.to_str().unwrap())
.collect();
assert!(paths.iter().any(|p| p.contains("visible.txt")));
assert!(paths.iter().any(|p| p.contains(".hidden")));
}
#[test]
fn test_walker_skips_excluded_patterns() {
let temp = TempDir::new().unwrap();
let root = temp.path();
fs::write(root.join("keep.txt"), "keep").unwrap();
fs::write(root.join("skip.tmp"), "skip").unwrap();
fs::write(root.join("also.log"), "skip").unwrap();
let config = CreationConfig::default()
.with_exclude_patterns(vec!["*.tmp".to_string(), "*.log".to_string()]);
let walker = FilteredWalker::new(root, &config);
let entries: Vec<_> = walker.walk().collect::<Result<Vec<_>>>().unwrap();
let paths: Vec<_> = entries
.iter()
.map(|e| e.archive_path.to_str().unwrap())
.collect();
assert!(paths.iter().any(|p| p.contains("keep.txt")));
assert!(!paths.iter().any(|p| p.contains("skip.tmp")));
assert!(!paths.iter().any(|p| p.contains("also.log")));
}
#[cfg(unix)]
#[test]
fn test_walker_handles_symlinks() {
let temp = TempDir::new().unwrap();
let root = temp.path();
fs::write(root.join("target.txt"), "content").unwrap();
std::os::unix::fs::symlink(root.join("target.txt"), root.join("link.txt")).unwrap();
let config = CreationConfig::default();
let walker = FilteredWalker::new(root, &config);
let entries: Vec<_> = walker.walk().collect::<Result<Vec<_>>>().unwrap();
let link_entry = entries
.iter()
.find(|e| e.archive_path.to_str().unwrap().contains("link.txt"));
assert!(link_entry.is_some());
if let Some(entry) = link_entry {
assert!(matches!(entry.entry_type, EntryType::Symlink { .. }));
}
}
#[cfg(unix)]
#[test]
fn test_walker_detects_symlink_cycles() {
let temp = TempDir::new().unwrap();
let root = temp.path();
fs::create_dir(root.join("dir1")).unwrap();
fs::create_dir(root.join("dir1/dir2")).unwrap();
std::os::unix::fs::symlink(root.join("dir1"), root.join("dir1/dir2/link")).unwrap();
let config = CreationConfig::default().with_follow_symlinks(true);
let walker = FilteredWalker::new(root, &config);
let results: Vec<_> = walker.walk().collect();
let successes = results.iter().filter(|r| r.is_ok()).count();
assert!(successes > 0, "should have some entries before cycle");
let has_cycle_error = results.iter().any(|r| {
if let Err(e) = r {
e.to_string().contains("File system loop")
|| e.to_string().contains("walkdir error")
} else {
false
}
});
assert!(has_cycle_error, "should detect symlink cycle");
}
#[test]
fn test_walker_respects_max_file_size() {
let temp = TempDir::new().unwrap();
let root = temp.path();
fs::write(root.join("small.txt"), "tiny").unwrap(); fs::write(root.join("large.txt"), "a".repeat(1000)).unwrap();
let config = CreationConfig::default().with_max_file_size(Some(100));
let walker = FilteredWalker::new(root, &config);
let entries: Vec<_> = walker.walk().collect::<Result<Vec<_>>>().unwrap();
let paths: Vec<_> = entries
.iter()
.map(|e| e.archive_path.to_str().unwrap())
.collect();
assert!(paths.iter().any(|p| p.contains("small.txt")));
assert!(!paths.iter().any(|p| p.contains("large.txt")));
}
#[test]
fn test_walker_computes_archive_paths() {
let temp = TempDir::new().unwrap();
let root = temp.path();
fs::create_dir(root.join("src")).unwrap();
fs::write(root.join("src/main.rs"), "code").unwrap();
let config = CreationConfig::default();
let walker = FilteredWalker::new(root, &config);
let entries: Vec<_> = walker.walk().collect::<Result<Vec<_>>>().unwrap();
let main_entry = entries
.iter()
.find(|e| e.archive_path.to_str().unwrap().contains("main.rs"));
assert!(main_entry.is_some());
if let Some(entry) = main_entry {
assert_eq!(entry.archive_path, Path::new("src/main.rs"));
}
}
#[test]
fn test_walker_strip_prefix() {
let temp = TempDir::new().unwrap();
let root = temp.path();
fs::create_dir(root.join("project")).unwrap();
fs::create_dir(root.join("project/src")).unwrap();
fs::write(root.join("project/src/main.rs"), "code").unwrap();
let config = CreationConfig::default().with_strip_prefix(Some(PathBuf::from("project")));
let walker = FilteredWalker::new(root, &config);
let entries: Vec<_> = walker.walk().collect::<Result<Vec<_>>>().unwrap();
let main_entry = entries
.iter()
.find(|e| e.archive_path.to_str().unwrap().contains("main.rs"));
assert!(main_entry.is_some());
if let Some(entry) = main_entry {
assert_eq!(entry.archive_path, Path::new("src/main.rs"));
}
}
#[test]
fn test_filtered_entry_file() {
let entry = FilteredEntry {
path: PathBuf::from("/tmp/file.txt"),
archive_path: PathBuf::from("file.txt"),
entry_type: EntryType::File,
size: 1024,
};
assert_eq!(entry.path, Path::new("/tmp/file.txt"));
assert_eq!(entry.archive_path, Path::new("file.txt"));
assert!(matches!(entry.entry_type, EntryType::File));
assert_eq!(entry.size, 1024);
}
#[test]
fn test_filtered_entry_directory() {
let entry = FilteredEntry {
path: PathBuf::from("/tmp/dir"),
archive_path: PathBuf::from("dir"),
entry_type: EntryType::Directory,
size: 0,
};
assert!(matches!(entry.entry_type, EntryType::Directory));
assert_eq!(entry.size, 0);
}
#[test]
fn test_filtered_entry_symlink() {
let entry = FilteredEntry {
path: PathBuf::from("/tmp/link"),
archive_path: PathBuf::from("link"),
entry_type: EntryType::Symlink {
target: PathBuf::from("target.txt"),
},
size: 0,
};
match &entry.entry_type {
EntryType::Symlink { target } => {
assert_eq!(target, Path::new("target.txt"));
}
_ => panic!("expected symlink"),
}
}
#[test]
fn test_entry_type_equality() {
assert_eq!(EntryType::File, EntryType::File);
assert_eq!(EntryType::Directory, EntryType::Directory);
assert_eq!(
EntryType::Symlink {
target: PathBuf::from("a")
},
EntryType::Symlink {
target: PathBuf::from("a")
}
);
assert_ne!(EntryType::File, EntryType::Directory);
assert_ne!(
EntryType::Symlink {
target: PathBuf::from("a")
},
EntryType::Symlink {
target: PathBuf::from("b")
}
);
}
#[test]
fn test_collect_entries_empty_sources() {
let config = CreationConfig::default();
let sources: Vec<&Path> = vec![];
let entries = collect_entries(&sources, &config).unwrap();
assert_eq!(entries.len(), 0);
}
#[test]
fn test_collect_entries_nonexistent_source() {
let config = CreationConfig::default();
let sources = [Path::new("/nonexistent/path/that/does/not/exist")];
let result = collect_entries(&sources, &config);
assert!(result.is_err());
assert!(matches!(
result.unwrap_err(),
ExtractionError::SourceNotFound { .. }
));
}
#[test]
fn test_collect_entries_mixed_files_and_directories() {
let temp = TempDir::new().unwrap();
let root = temp.path();
fs::write(root.join("single_file.txt"), "standalone").unwrap();
fs::create_dir(root.join("dir1")).unwrap();
fs::write(root.join("dir1/file1.txt"), "content1").unwrap();
fs::write(root.join("dir1/file2.txt"), "content2").unwrap();
fs::create_dir(root.join("dir2")).unwrap();
fs::write(root.join("dir2/file3.txt"), "content3").unwrap();
let config = CreationConfig::default().with_include_hidden(true);
let sources = [
root.join("single_file.txt"),
root.join("dir1"),
root.join("dir2"),
];
let entries = collect_entries(&sources, &config).unwrap();
assert!(
entries.len() >= 5,
"Expected at least 5 entries (files and dirs), got {}",
entries.len()
);
let paths: Vec<_> = entries
.iter()
.map(|e| e.archive_path.to_str().unwrap())
.collect();
assert!(paths.iter().any(|p| p.contains("single_file.txt")));
assert!(paths.iter().any(|p| p.contains("file1.txt")));
assert!(paths.iter().any(|p| p.contains("file2.txt")));
assert!(paths.iter().any(|p| p.contains("file3.txt")));
}
#[test]
fn test_collect_entries_large_directory_count() {
let temp = TempDir::new().unwrap();
let root = temp.path();
for i in 0..50 {
fs::write(root.join(format!("file_{i}.txt")), format!("content {i}")).unwrap();
}
fs::create_dir(root.join("subdir")).unwrap();
for i in 0..30 {
fs::write(
root.join(format!("subdir/file_{i}.txt")),
format!("sub content {i}"),
)
.unwrap();
}
let config = CreationConfig::default().with_include_hidden(true);
let sources = [root];
let entries = collect_entries(&sources, &config).unwrap();
assert!(
entries.len() >= 80,
"Expected at least 80 entries, got {}",
entries.len()
);
}
#[test]
fn test_collect_entries_single_file() {
let temp = TempDir::new().unwrap();
let file_path = temp.path().join("test.txt");
fs::write(&file_path, "content").unwrap();
let config = CreationConfig::default();
let sources = [&file_path];
let entries = collect_entries(&sources, &config).unwrap();
assert_eq!(entries.len(), 1);
assert_eq!(entries[0].entry_type, EntryType::File);
assert!(
entries[0]
.archive_path
.to_str()
.unwrap()
.contains("test.txt")
);
}
#[test]
fn test_collect_entries_respects_filters() {
let temp = TempDir::new().unwrap();
let root = temp.path();
fs::write(root.join("keep.txt"), "keep").unwrap();
fs::write(root.join("skip.tmp"), "skip").unwrap();
fs::write(root.join(".hidden"), "hidden").unwrap();
let config = CreationConfig::default()
.with_exclude_patterns(vec!["*.tmp".to_string()])
.with_include_hidden(false);
let sources = [root];
let entries = collect_entries(&sources, &config).unwrap();
let paths: Vec<_> = entries
.iter()
.map(|e| e.archive_path.to_str().unwrap())
.collect();
assert!(paths.iter().any(|p| p.contains("keep.txt")));
assert!(!paths.iter().any(|p| p.contains("skip.tmp")));
assert!(!paths.iter().any(|p| p.contains(".hidden")));
}
}