use crate::error::StorageError;
use std::path::PathBuf;
use walkdir::{DirEntry, WalkDir};
#[derive(Debug, Clone)]
pub enum Entry {
File { path: PathBuf, size: u64 },
Directory { path: PathBuf },
}
#[derive(Debug, Clone)]
pub struct WalkerConfig {
pub follow_symlinks: bool,
pub ignore_patterns: Vec<String>,
pub max_depth: Option<usize>,
}
impl Default for WalkerConfig {
fn default() -> Self {
Self {
follow_symlinks: false,
ignore_patterns: vec![
".git".to_string(),
"target".to_string(),
"node_modules".to_string(),
".cargo".to_string(),
],
max_depth: None,
}
}
}
pub struct Walker {
root: PathBuf,
config: WalkerConfig,
}
impl Walker {
pub fn new(root: PathBuf) -> Self {
Self {
root,
config: WalkerConfig::default(),
}
}
pub fn with_config(root: PathBuf, config: WalkerConfig) -> Self {
Self { root, config }
}
pub fn walk(&self) -> Result<Vec<Entry>, StorageError> {
let mut entries = Vec::new();
let walker = WalkDir::new(&self.root)
.follow_links(self.config.follow_symlinks)
.max_depth(self.config.max_depth.unwrap_or(usize::MAX));
for entry in walker {
let entry = entry.map_err(|e| {
StorageError::IoError(std::io::Error::new(
std::io::ErrorKind::Other,
format!("Failed to walk directory: {}", e),
))
})?;
if self.should_ignore(&entry) {
continue;
}
let path = entry.path().to_path_buf();
if path == self.root {
continue;
}
let metadata = entry.metadata().map_err(|e| {
StorageError::IoError(std::io::Error::new(
std::io::ErrorKind::Other,
format!("Failed to read metadata for {:?}: {}", path, e),
))
})?;
if metadata.is_file() {
entries.push(Entry::File {
path,
size: metadata.len(),
});
} else if metadata.is_dir() {
entries.push(Entry::Directory { path });
}
}
entries.sort_by(|a, b| {
let path_a = match a {
Entry::File { path, .. } | Entry::Directory { path } => path,
};
let path_b = match b {
Entry::File { path, .. } | Entry::Directory { path } => path,
};
path_a.cmp(path_b)
});
Ok(entries)
}
fn should_ignore(&self, entry: &DirEntry) -> bool {
let path = entry.path();
if path.file_name() == Some(std::ffi::OsStr::new(".gitignore")) {
return false;
}
let path_str = path.to_string_lossy();
for pattern in &self.config.ignore_patterns {
if path_str.contains(pattern) {
return true;
}
for component in path.components() {
if let std::path::Component::Normal(name) = component {
if name.to_string_lossy() == pattern.as_str() {
return true;
}
}
}
}
false
}
}
#[cfg(test)]
mod tests {
use super::*;
use std::fs;
use tempfile::TempDir;
#[test]
fn test_walker_collects_files() {
let temp_dir = TempDir::new().unwrap();
let root = temp_dir.path().to_path_buf();
fs::write(root.join("file1.txt"), "content1").unwrap();
fs::write(root.join("file2.txt"), "content2").unwrap();
let walker = Walker::new(root);
let entries = walker.walk().unwrap();
assert_eq!(entries.len(), 2);
let mut file_paths: Vec<_> = entries
.iter()
.filter_map(|e| match e {
Entry::File { path, .. } => Some(path.clone()),
_ => None,
})
.collect();
file_paths.sort();
assert!(file_paths[0].ends_with("file1.txt"));
assert!(file_paths[1].ends_with("file2.txt"));
}
#[test]
fn test_walker_collects_directories() {
let temp_dir = TempDir::new().unwrap();
let root = temp_dir.path().to_path_buf();
fs::create_dir(root.join("dir1")).unwrap();
fs::create_dir(root.join("dir2")).unwrap();
fs::write(root.join("dir1").join("file.txt"), "content").unwrap();
let walker = Walker::new(root);
let entries = walker.walk().unwrap();
let dirs: Vec<_> = entries
.iter()
.filter_map(|e| match e {
Entry::Directory { path } => Some(path.clone()),
_ => None,
})
.collect();
assert!(dirs.len() >= 1);
}
#[test]
fn test_walker_ignores_patterns() {
let temp_dir = TempDir::new().unwrap();
let root = temp_dir.path().to_path_buf();
fs::write(root.join("file.txt"), "content").unwrap();
let git_dir = root.join(".git");
if fs::create_dir(&git_dir).is_ok() {
let _ = fs::write(git_dir.join("config"), "git config");
}
let target_dir = root.join("target");
if fs::create_dir(&target_dir).is_ok() {
let _ = fs::write(target_dir.join("file.rs"), "rust code");
}
let walker = Walker::new(root);
let entries = walker.walk().unwrap();
let paths: Vec<_> = entries
.iter()
.map(|e| match e {
Entry::File { path, .. } | Entry::Directory { path } => path.clone(),
})
.collect();
assert!(!paths.iter().any(|p| p.to_string_lossy().contains(".git")));
assert!(!paths.iter().any(|p| p.to_string_lossy().contains("target")));
assert!(paths.iter().any(|p| p.ends_with("file.txt")));
}
#[test]
fn test_walker_deterministic_ordering() {
let temp_dir = TempDir::new().unwrap();
let root = temp_dir.path().to_path_buf();
fs::write(root.join("z_file.txt"), "content").unwrap();
fs::write(root.join("a_file.txt"), "content").unwrap();
fs::write(root.join("m_file.txt"), "content").unwrap();
let walker = Walker::new(root);
let entries1 = walker.walk().unwrap();
let entries2 = walker.walk().unwrap();
assert_eq!(entries1.len(), entries2.len());
for (e1, e2) in entries1.iter().zip(entries2.iter()) {
let path1 = match e1 {
Entry::File { path, .. } | Entry::Directory { path } => path,
};
let path2 = match e2 {
Entry::File { path, .. } | Entry::Directory { path } => path,
};
assert_eq!(path1, path2);
}
let paths: Vec<_> = entries1
.iter()
.map(|e| match e {
Entry::File { path, .. } | Entry::Directory { path } => path.clone(),
})
.collect();
let mut sorted_paths = paths.clone();
sorted_paths.sort();
assert_eq!(paths, sorted_paths);
}
}