compare-dir 0.8.0

A high-performance directory comparison tool and library
Documentation
use crate::{FileHashCache, FileHasher};
use globset::GlobSet;
use std::path::PathBuf;
use std::sync::mpsc;
use walkdir::WalkDir;

pub(crate) struct FileIterator<'a> {
    iter: walkdir::IntoIter,
    dir: PathBuf,
    pub(crate) hasher: Option<&'a FileHasher>,
    pub(crate) exclude: Option<&'a GlobSet>,
}

impl<'a> FileIterator<'a> {
    pub(crate) fn new(dir: PathBuf) -> Self {
        log::info!("Scanning directory: {:?}", dir);
        let iter = WalkDir::new(&dir).sort_by_file_name().into_iter();
        FileIterator {
            iter,
            dir,
            hasher: None,
            exclude: None,
        }
    }

    pub(crate) fn spawn_in_scope_with_sender<'scope>(
        self,
        scope: &'scope std::thread::Scope<'scope, '_>,
        tx: mpsc::Sender<(PathBuf, PathBuf)>,
    ) where
        'a: 'scope,
    {
        scope.spawn(move || {
            for item in self {
                if tx.send(item).is_err() {
                    log::error!("Send failed");
                    break;
                }
            }
        });
    }

    pub(crate) fn spawn_in_scope<'scope>(
        self,
        scope: &'scope std::thread::Scope<'scope, '_>,
    ) -> mpsc::Receiver<(PathBuf, PathBuf)>
    where
        'a: 'scope,
    {
        let (tx, rx) = mpsc::channel();
        self.spawn_in_scope_with_sender(scope, tx);
        rx
    }
}

impl<'a> Iterator for FileIterator<'a> {
    type Item = (PathBuf, PathBuf);

    fn next(&mut self) -> Option<Self::Item> {
        while let Some(entry) = self.iter.next() {
            match entry {
                Ok(entry) => {
                    if self.exclude.is_some_and(|f| f.is_match(entry.file_name())) {
                        if entry.file_type().is_dir() {
                            self.iter.skip_current_dir();
                        }
                        continue;
                    }

                    if entry.file_type().is_file() {
                        let rel_path = crate::strip_prefix(entry.path(), &self.dir).unwrap();
                        return Some((rel_path.to_path_buf(), entry.path().to_path_buf()));
                    } else if entry.file_type().is_dir()
                        && let Some(hasher) = self.hasher
                    {
                        // If there's a hash cache in the directory, merge it.
                        // Except the root directory, `WalkDir` emits it first.
                        let dir = entry.path();
                        if dir != self.dir {
                            let cache_path = dir.join(FileHashCache::FILE_NAME);
                            if cache_path.is_file() {
                                let child_cache = FileHashCache::new(dir);
                                hasher.merge_cache(&child_cache);
                            }
                        }
                    }
                }
                Err(error) => {
                    log::error!("Error while walking directory: {}", error);
                }
            }
        }
        None
    }
}

#[cfg(test)]
mod tests {
    use super::*;
    use std::fs;
    use tempfile::tempdir;

    #[test]
    fn symbolic_links_are_skipped() -> anyhow::Result<()> {
        let dir = tempdir()?;
        let dir_path = dir.path();
        let file_path = dir_path.join("real_file.txt");
        fs::write(&file_path, "content")?;

        // Create a target directory OUTSIDE the scanned directory
        let outside_dir = tempdir()?;
        let target_dir = outside_dir.path().join("target_dir");
        fs::create_dir(&target_dir)?;
        let target_path = target_dir.join("file_in_dir.txt");
        fs::write(&target_path, "content")?;

        // Create a file symlink and a directory symlink to `target_dir`.
        let symlink_path = dir_path.join("symlink.txt");
        #[cfg(unix)]
        std::os::unix::fs::symlink(&target_path, &symlink_path)?;
        #[cfg(windows)]
        std::os::windows::fs::symlink_file(&target_path, &symlink_path)?;

        let dir_symlink_path = dir_path.join("dir_symlink");
        #[cfg(unix)]
        std::os::unix::fs::symlink(&target_dir, &dir_symlink_path)?;
        #[cfg(windows)]
        std::os::windows::fs::symlink_dir(&target_dir, &dir_symlink_path)?;

        // Should only contain the real file, not the symlinks (file or directory)
        let it = FileIterator::new(dir_path.to_path_buf());
        let files: Vec<_> = it.collect();
        assert_eq!(files.len(), 1);
        assert_eq!(files[0].0, PathBuf::from("real_file.txt"));
        assert_eq!(files[0].1, file_path);

        Ok(())
    }

    #[test]
    fn single_file_path() -> anyhow::Result<()> {
        let dir = tempdir()?;
        let dir_path = dir.path();
        let file1_path = dir_path.join("file1.txt");
        fs::write(&file1_path, "content1")?;
        let file2_path = dir_path.join("file2.txt");
        fs::write(&file2_path, "content2")?;

        // Initialize with path to file1.txt directly
        // Should only contain file1.txt, and its relative path should be empty
        let it = FileIterator::new(file1_path.clone());
        let files: Vec<_> = it.collect();
        assert_eq!(files.len(), 1);
        assert_eq!(files[0].0, PathBuf::from(""));
        assert_eq!(files[0].1, file1_path);

        Ok(())
    }
}