use crate::{FileHashCache, FileHasher};
use globset::GlobSet;
use std::path::PathBuf;
use std::sync::mpsc;
use walkdir::WalkDir;
pub(crate) struct FileIterator<'a> {
iter: walkdir::IntoIter,
dir: PathBuf,
pub(crate) hasher: Option<&'a FileHasher>,
pub(crate) exclude: Option<&'a GlobSet>,
}
impl<'a> FileIterator<'a> {
pub(crate) fn new(dir: PathBuf) -> Self {
log::info!("Scanning directory: {:?}", dir);
let iter = WalkDir::new(&dir).sort_by_file_name().into_iter();
FileIterator {
iter,
dir,
hasher: None,
exclude: None,
}
}
pub(crate) fn spawn_in_scope_with_sender<'scope>(
self,
scope: &'scope std::thread::Scope<'scope, '_>,
tx: mpsc::Sender<(PathBuf, PathBuf)>,
) where
'a: 'scope,
{
scope.spawn(move || {
for item in self {
if tx.send(item).is_err() {
log::error!("Send failed");
break;
}
}
});
}
pub(crate) fn spawn_in_scope<'scope>(
self,
scope: &'scope std::thread::Scope<'scope, '_>,
) -> mpsc::Receiver<(PathBuf, PathBuf)>
where
'a: 'scope,
{
let (tx, rx) = mpsc::channel();
self.spawn_in_scope_with_sender(scope, tx);
rx
}
}
impl<'a> Iterator for FileIterator<'a> {
type Item = (PathBuf, PathBuf);
fn next(&mut self) -> Option<Self::Item> {
while let Some(entry) = self.iter.next() {
match entry {
Ok(entry) => {
if self.exclude.is_some_and(|f| f.is_match(entry.file_name())) {
if entry.file_type().is_dir() {
self.iter.skip_current_dir();
}
continue;
}
if entry.file_type().is_file() {
let rel_path = crate::strip_prefix(entry.path(), &self.dir).unwrap();
return Some((rel_path.to_path_buf(), entry.path().to_path_buf()));
} else if entry.file_type().is_dir()
&& let Some(hasher) = self.hasher
{
let dir = entry.path();
if dir != self.dir {
let cache_path = dir.join(FileHashCache::FILE_NAME);
if cache_path.is_file() {
let child_cache = FileHashCache::new(dir);
hasher.merge_cache(&child_cache);
}
}
}
}
Err(error) => {
log::error!("Error while walking directory: {}", error);
}
}
}
None
}
}
#[cfg(test)]
mod tests {
use super::*;
use std::fs;
use tempfile::tempdir;
#[test]
fn symbolic_links_are_skipped() -> anyhow::Result<()> {
let dir = tempdir()?;
let dir_path = dir.path();
let file_path = dir_path.join("real_file.txt");
fs::write(&file_path, "content")?;
let outside_dir = tempdir()?;
let target_dir = outside_dir.path().join("target_dir");
fs::create_dir(&target_dir)?;
let target_path = target_dir.join("file_in_dir.txt");
fs::write(&target_path, "content")?;
let symlink_path = dir_path.join("symlink.txt");
#[cfg(unix)]
std::os::unix::fs::symlink(&target_path, &symlink_path)?;
#[cfg(windows)]
std::os::windows::fs::symlink_file(&target_path, &symlink_path)?;
let dir_symlink_path = dir_path.join("dir_symlink");
#[cfg(unix)]
std::os::unix::fs::symlink(&target_dir, &dir_symlink_path)?;
#[cfg(windows)]
std::os::windows::fs::symlink_dir(&target_dir, &dir_symlink_path)?;
let it = FileIterator::new(dir_path.to_path_buf());
let files: Vec<_> = it.collect();
assert_eq!(files.len(), 1);
assert_eq!(files[0].0, PathBuf::from("real_file.txt"));
assert_eq!(files[0].1, file_path);
Ok(())
}
#[test]
fn single_file_path() -> anyhow::Result<()> {
let dir = tempdir()?;
let dir_path = dir.path();
let file1_path = dir_path.join("file1.txt");
fs::write(&file1_path, "content1")?;
let file2_path = dir_path.join("file2.txt");
fs::write(&file2_path, "content2")?;
let it = FileIterator::new(file1_path.clone());
let files: Vec<_> = it.collect();
assert_eq!(files.len(), 1);
assert_eq!(files[0].0, PathBuf::from(""));
assert_eq!(files[0].1, file1_path);
Ok(())
}
}