use crate::error::{Result, SyncError};
use crossbeam_channel::{Receiver, bounded};
use ignore::{WalkBuilder, WalkState};
use std::collections::HashMap;
use std::path::{Path, PathBuf};
use std::sync::Arc;
use std::time::SystemTime;
#[cfg(unix)]
use std::os::unix::fs::MetadataExt;
#[cfg(target_os = "macos")]
use std::os::darwin::fs::MetadataExt as DarwinMetadataExt;
#[derive(Debug, Clone)]
pub struct FileEntry {
pub path: Arc<PathBuf>,
pub relative_path: Arc<PathBuf>,
pub size: u64,
pub modified: SystemTime,
pub is_dir: bool,
pub is_symlink: bool,
pub symlink_target: Option<Arc<PathBuf>>,
#[allow(dead_code)] pub is_sparse: bool,
#[allow(dead_code)] pub allocated_size: u64, pub xattrs: Option<HashMap<String, Vec<u8>>>, pub inode: Option<u64>, pub nlink: u64, pub acls: Option<Vec<u8>>, #[cfg_attr(not(target_os = "macos"), allow(dead_code))] pub bsd_flags: Option<u32>, }
#[cfg(unix)]
fn detect_sparse_file(_path: &Path, metadata: &std::fs::Metadata) -> (bool, u64) {
let blocks = metadata.blocks();
let file_size = metadata.len();
let allocated_size = blocks * 512;
let threshold = 4096;
let is_sparse = file_size > threshold && allocated_size < file_size.saturating_sub(threshold);
(is_sparse, allocated_size)
}
#[cfg(not(unix))]
fn detect_sparse_file(_path: &Path, metadata: &std::fs::Metadata) -> (bool, u64) {
let file_size = metadata.len();
(false, file_size)
}
#[cfg(unix)]
fn detect_hardlink_info(metadata: &std::fs::Metadata) -> (Option<u64>, u64) {
let inode = metadata.ino();
let nlink = metadata.nlink();
(Some(inode), nlink)
}
#[cfg(not(unix))]
fn detect_hardlink_info(_metadata: &std::fs::Metadata) -> (Option<u64>, u64) {
(None, 1)
}
#[cfg(unix)]
fn read_xattrs(path: &Path) -> Option<HashMap<String, Vec<u8>>> {
let mut xattrs = HashMap::new();
let names = match xattr::list(path) {
Ok(names) => names,
Err(_) => return None, };
for name in names {
if let Ok(Some(value)) = xattr::get(path, &name)
&& let Some(name_str) = name.to_str()
{
xattrs.insert(name_str.to_string(), value);
}
}
if xattrs.is_empty() { None } else { Some(xattrs) }
}
#[cfg(not(unix))]
fn read_xattrs(_path: &Path) -> Option<HashMap<String, Vec<u8>>> {
None
}
#[cfg(all(unix, feature = "acl"))]
fn read_acls(path: &Path) -> Option<Vec<u8>> {
use exacl::getfacl;
match getfacl(path, None) {
Ok(acls) => {
let acl_vec: Vec<_> = acls.into_iter().collect();
if acl_vec.is_empty() {
return None;
}
let acl_text: Vec<String> = acl_vec.iter().map(|e| format!("{}", e)).collect();
let joined = acl_text.join("\n");
if joined.is_empty() { None } else { Some(joined.into_bytes()) }
}
Err(_) => None, }
}
#[cfg(not(all(unix, feature = "acl")))]
fn read_acls(_path: &Path) -> Option<Vec<u8>> {
None
}
#[cfg(target_os = "macos")]
fn read_bsd_flags(metadata: &std::fs::Metadata) -> Option<u32> {
Some(metadata.st_flags())
}
#[cfg(not(target_os = "macos"))]
fn read_bsd_flags(_metadata: &std::fs::Metadata) -> Option<u32> {
None
}
#[derive(Debug, Clone, Copy)]
pub struct ScanOptions {
pub respect_gitignore: bool,
pub include_git_dir: bool,
}
impl Default for ScanOptions {
fn default() -> Self {
Self { respect_gitignore: false, include_git_dir: true }
}
}
fn optimal_thread_count() -> usize {
std::cmp::min(4, num_cpus::get())
}
const PARALLEL_SUBDIR_THRESHOLD: usize = 30;
fn should_use_parallel(root: &Path) -> bool {
match std::fs::read_dir(root) {
Ok(entries) => {
let mut subdir_count = 0;
for e in entries.flatten() {
if e.file_type().map(|t| t.is_dir()).unwrap_or(false) {
subdir_count += 1;
if subdir_count > PARALLEL_SUBDIR_THRESHOLD {
return true;
}
}
}
false
}
Err(_) => false, }
}
fn process_dir_entry(root: &Path, entry: ignore::DirEntry) -> Result<FileEntry> {
let path = entry.path().to_path_buf();
let metadata = std::fs::symlink_metadata(&path).map_err(|e| SyncError::ReadDirError { path: path.clone(), source: e })?;
let relative_path = path.strip_prefix(root).map(|p| p.to_path_buf()).map_err(|_| SyncError::InvalidPath { path: path.clone() })?;
let is_symlink = metadata.is_symlink();
let symlink_target = if is_symlink { std::fs::read_link(&path).ok() } else { None };
let (is_sparse, allocated_size) = if !metadata.is_dir() && !is_symlink { detect_sparse_file(&path, &metadata) } else { (false, 0) };
let (inode, nlink) = detect_hardlink_info(&metadata);
let xattrs = read_xattrs(&path);
let acls = read_acls(&path);
let bsd_flags = read_bsd_flags(&metadata);
let modified = metadata.modified().map_err(|e| SyncError::ReadDirError { path: path.clone(), source: e })?;
Ok(FileEntry {
path: Arc::new(path),
relative_path: Arc::new(relative_path),
size: metadata.len(),
modified,
is_dir: metadata.is_dir(),
is_symlink,
symlink_target: symlink_target.map(Arc::new),
is_sparse,
allocated_size,
xattrs,
inode,
nlink,
acls,
bsd_flags,
})
}
pub struct Scanner {
root: PathBuf,
threads: usize,
follow_links: bool,
options: ScanOptions,
auto_select: bool,
}
impl Scanner {
pub fn new(root: impl Into<PathBuf>) -> Self {
Self {
root: root.into(),
threads: optimal_thread_count(),
follow_links: false,
options: ScanOptions::default(),
auto_select: true,
}
}
#[allow(dead_code)] pub fn with_threads(root: impl Into<PathBuf>, threads: usize) -> Self {
Self { root: root.into(), threads, follow_links: false, options: ScanOptions::default(), auto_select: false }
}
#[allow(dead_code)] pub fn follow_links(mut self, follow: bool) -> Self {
self.follow_links = follow;
self
}
pub fn with_options(mut self, options: ScanOptions) -> Self {
self.options = options;
self
}
#[allow(dead_code)] pub fn respect_gitignore(mut self, respect: bool) -> Self {
self.options.respect_gitignore = respect;
self
}
#[allow(dead_code)] pub fn include_git_dir(mut self, include: bool) -> Self {
self.options.include_git_dir = include;
self
}
pub fn scan(&self) -> Result<Vec<FileEntry>> {
self.scan_streaming()?.collect()
}
pub fn scan_streaming(&self) -> Result<Box<dyn Iterator<Item = Result<FileEntry>> + Send>> {
let mut walker = WalkBuilder::new(&self.root);
walker
.hidden(false) .git_ignore(self.options.respect_gitignore) .git_global(self.options.respect_gitignore) .git_exclude(self.options.respect_gitignore) .threads(self.threads) .follow_links(self.follow_links);
if !self.options.include_git_dir {
walker.filter_entry(|entry| {
entry.file_name() != ".git"
});
}
if self.options.respect_gitignore {
let gitignore_path = self.root.join(".gitignore");
if gitignore_path.exists() {
walker.add_ignore(&gitignore_path);
}
}
let use_parallel = if self.auto_select {
self.threads > 1 && should_use_parallel(&self.root)
} else {
self.threads > 1
};
if use_parallel {
Ok(Box::new(ParallelStreamingScanner::new(self.root.clone(), walker.build_parallel())))
} else {
Ok(Box::new(StreamingScanner { root: self.root.clone(), walker: walker.build() }))
}
}
}
pub struct StreamingScanner {
root: PathBuf,
walker: ignore::Walk,
}
impl Iterator for StreamingScanner {
type Item = Result<FileEntry>;
fn next(&mut self) -> Option<Self::Item> {
loop {
let result = self.walker.next()?;
let entry = match result {
Ok(entry) => entry,
Err(e) => return Some(Err(SyncError::Io(std::io::Error::other(e.to_string())))),
};
if entry.path() == self.root {
continue;
}
return Some(process_dir_entry(&self.root, entry));
}
}
}
unsafe impl Send for StreamingScanner {}
pub struct ParallelStreamingScanner {
receiver: Receiver<Result<FileEntry>>,
_handle: Option<std::thread::JoinHandle<()>>,
}
impl ParallelStreamingScanner {
fn new(root: PathBuf, walker: ignore::WalkParallel) -> Self {
let (sender, receiver) = bounded(1024);
let handle = std::thread::spawn(move || {
walker.run(|| {
let sender = sender.clone();
let root = root.clone();
Box::new(move |result| {
match result {
Ok(entry) => {
if entry.path() == root {
return WalkState::Continue;
}
let file_entry = process_dir_entry(&root, entry);
if sender.send(file_entry).is_err() {
return WalkState::Quit;
}
}
Err(e) => {
let err = SyncError::Io(std::io::Error::other(e.to_string()));
if sender.send(Err(err)).is_err() {
return WalkState::Quit;
}
}
}
WalkState::Continue
})
});
});
Self { receiver, _handle: Some(handle) }
}
}
impl Iterator for ParallelStreamingScanner {
type Item = Result<FileEntry>;
fn next(&mut self) -> Option<Self::Item> {
self.receiver.recv().ok()
}
}
#[cfg(test)]
mod tests {
use super::*;
use std::fs;
use tempfile::TempDir;
#[test]
fn test_scanner_basic() {
let temp = TempDir::new().unwrap();
let root = temp.path();
fs::create_dir(root.join("dir1")).unwrap();
fs::write(root.join("file1.txt"), "content").unwrap();
fs::write(root.join("dir1/file2.txt"), "content").unwrap();
let scanner = Scanner::new(root);
let entries = scanner.scan().unwrap();
assert!(entries.len() >= 3); assert!(entries.iter().any(|e| e.relative_path.as_path() == Path::new("file1.txt")));
}
#[test]
fn test_scanner_gitignore() {
let temp = TempDir::new().unwrap();
let root = temp.path();
std::process::Command::new("git").args(["init"]).current_dir(root).output().unwrap();
fs::write(root.join(".gitignore"), "ignored.txt\n").unwrap();
fs::write(root.join("ignored.txt"), "should be ignored").unwrap();
fs::write(root.join("included.txt"), "should be included").unwrap();
let scanner = Scanner::new(root).with_options(ScanOptions { respect_gitignore: true, include_git_dir: false });
let entries = scanner.scan().unwrap();
assert!(!entries.iter().any(|e| e.relative_path.to_str() == Some("ignored.txt")));
assert!(entries.iter().any(|e| e.relative_path.to_str() == Some("included.txt")));
}
#[test]
fn test_scanner_gitignore_without_git_repo() {
let temp = TempDir::new().unwrap();
let root = temp.path();
fs::write(root.join(".gitignore"), "*.tmp\n*.log\nnode_modules/\n.DS_Store\n").unwrap();
fs::write(root.join("test.tmp"), "should be ignored").unwrap();
fs::write(root.join("debug.log"), "should be ignored").unwrap();
fs::create_dir(root.join("node_modules")).unwrap();
fs::write(root.join("node_modules").join("package.txt"), "should be ignored").unwrap();
fs::write(root.join("normal.txt"), "should be included").unwrap();
fs::write(root.join("important.rs"), "should be included").unwrap();
let scanner = Scanner::new(root).with_options(ScanOptions { respect_gitignore: true, include_git_dir: true });
let entries = scanner.scan().unwrap();
assert!(!entries.iter().any(|e| e.relative_path.to_str() == Some("test.tmp")), "test.tmp should be ignored");
assert!(!entries.iter().any(|e| e.relative_path.to_str() == Some("debug.log")), "debug.log should be ignored");
assert!(!entries.iter().any(|e| e.relative_path.to_str() == Some("node_modules")), "node_modules/ should be ignored");
assert!(entries.iter().any(|e| e.relative_path.to_str() == Some("normal.txt")), "normal.txt should be included");
assert!(entries.iter().any(|e| e.relative_path.to_str() == Some("important.rs")), "important.rs should be included");
}
#[test]
#[cfg(unix)] fn test_scanner_symlinks() {
let temp = TempDir::new().unwrap();
let root = temp.path();
fs::write(root.join("target.txt"), "target content").unwrap();
std::os::unix::fs::symlink(root.join("target.txt"), root.join("link.txt")).unwrap();
let scanner = Scanner::new(root);
let entries = scanner.scan().unwrap();
let link_entry = entries.iter().find(|e| e.relative_path.as_path() == Path::new("link.txt")).expect("Symlink should be in scan results");
assert!(link_entry.is_symlink, "Entry should be marked as symlink");
assert!(link_entry.symlink_target.is_some(), "Symlink should have a target");
let target = link_entry.symlink_target.as_ref().unwrap();
assert_eq!(&**target, &root.join("target.txt"));
let file_entry = entries
.iter()
.find(|e| e.relative_path.as_path() == Path::new("target.txt"))
.expect("Target file should be in scan results");
assert!(!file_entry.is_symlink, "Regular file should not be marked as symlink");
assert!(file_entry.symlink_target.is_none(), "Regular file should have no target");
}
#[test]
#[cfg(unix)] fn test_scanner_symlink_loop_detection() {
use std::os::unix::fs as unix_fs;
use tempfile::TempDir;
let temp = TempDir::new().unwrap();
let root = temp.path();
let dir_a = root.join("a");
std::fs::create_dir(&dir_a).unwrap();
let link = dir_a.join("link");
unix_fs::symlink(&dir_a, &link).unwrap();
std::fs::write(dir_a.join("file.txt"), "test").unwrap();
let scanner = Scanner::new(&dir_a);
let entries = scanner.scan().unwrap();
assert_eq!(entries.len(), 2);
let symlink_entry = entries.iter().find(|e| e.is_symlink).unwrap();
assert_eq!(*symlink_entry.relative_path, PathBuf::from("link"));
let scanner = Scanner::new(&dir_a).follow_links(true);
let result = scanner.scan();
match result {
Ok(entries) => {
assert!(entries.iter().any(|e| e.path.ends_with("file.txt")));
}
Err(_) => {
}
}
}
#[test]
#[cfg(unix)] fn test_scanner_symlink_chain_loop() {
use std::os::unix::fs as unix_fs;
use tempfile::TempDir;
let temp = TempDir::new().unwrap();
let root = temp.path();
let dir_a = root.join("a");
let dir_b = root.join("b");
std::fs::create_dir(&dir_a).unwrap();
std::fs::create_dir(&dir_b).unwrap();
let link1 = dir_a.join("link1");
let link2 = dir_b.join("link2");
unix_fs::symlink(&dir_b, &link1).unwrap();
unix_fs::symlink(&dir_a, &link2).unwrap();
std::fs::write(dir_a.join("file_a.txt"), "a").unwrap();
std::fs::write(dir_b.join("file_b.txt"), "b").unwrap();
let scanner = Scanner::new(root);
let entries = scanner.scan().unwrap();
assert!(entries.len() >= 4);
assert_eq!(entries.iter().filter(|e| e.is_symlink).count(), 2);
let scanner = Scanner::new(root).follow_links(true);
let result = scanner.scan();
match result {
Ok(entries) => {
assert!(entries.iter().any(|e| e.path.ends_with("file_a.txt")));
assert!(entries.iter().any(|e| e.path.ends_with("file_b.txt")));
}
Err(_) => {
}
}
}
#[test]
#[cfg(unix)] fn test_scanner_sparse_files() {
use std::io::Write;
use std::process::Command;
let temp = TempDir::new().unwrap();
let root = temp.path();
let sparse_path = root.join("sparse.dat");
let output = Command::new("dd")
.args([
"if=/dev/zero",
&format!("of={}", sparse_path.display()),
"bs=1024",
"count=0",
"seek=10240", ])
.output()
.expect("Failed to create sparse file with dd");
if !output.status.success() {
panic!("dd command failed: {:?}", String::from_utf8_lossy(&output.stderr));
}
let mut file = std::fs::OpenOptions::new().write(true).open(&sparse_path).unwrap();
let data = vec![0x42; 4096];
file.write_all(&data).unwrap();
file.flush().unwrap();
drop(file);
let scanner = Scanner::new(root);
let entries = scanner.scan().unwrap();
let sparse_entry = entries
.iter()
.find(|e| e.relative_path.as_path() == Path::new("sparse.dat"))
.expect("Sparse file should be in scan results");
assert_eq!(sparse_entry.size, 10 * 1024 * 1024, "File size should be 10MB");
if sparse_entry.allocated_size < sparse_entry.size {
assert!(
sparse_entry.is_sparse,
"File should be detected as sparse (size: {}, allocated: {})",
sparse_entry.size, sparse_entry.allocated_size
);
assert!(
sparse_entry.allocated_size < sparse_entry.size / 2,
"Allocated size ({}) should be much smaller than file size ({})",
sparse_entry.allocated_size,
sparse_entry.size
);
} else {
assert!(!sparse_entry.is_sparse, "Non-sparse file should not be detected as sparse");
}
}
#[test]
fn test_scanner_regular_file_not_sparse() {
let temp = TempDir::new().unwrap();
let root = temp.path();
let file_path = root.join("regular.txt");
let data = vec![0x42; 10 * 1024]; fs::write(&file_path, &data).unwrap();
let scanner = Scanner::new(root);
let entries = scanner.scan().unwrap();
let regular_entry = entries
.iter()
.find(|e| e.relative_path.as_path() == Path::new("regular.txt"))
.expect("Regular file should be in scan results");
assert!(!regular_entry.is_sparse, "Regular file should not be detected as sparse");
assert_eq!(regular_entry.size, 10 * 1024, "File size should be 10KB");
}
#[test]
#[cfg(unix)] fn test_scanner_hardlinks() {
let temp = TempDir::new().unwrap();
let root = temp.path();
let file_path = root.join("original.txt");
fs::write(&file_path, "content").unwrap();
let link1_path = root.join("link1.txt");
fs::hard_link(&file_path, &link1_path).unwrap();
let link2_path = root.join("link2.txt");
fs::hard_link(&file_path, &link2_path).unwrap();
let scanner = Scanner::new(root);
let entries = scanner.scan().unwrap();
let original_entry = entries
.iter()
.find(|e| e.relative_path.as_path() == Path::new("original.txt"))
.expect("Original file should be in scan results");
let link1_entry = entries
.iter()
.find(|e| e.relative_path.as_path() == Path::new("link1.txt"))
.expect("Hardlink 1 should be in scan results");
let link2_entry = entries
.iter()
.find(|e| e.relative_path.as_path() == Path::new("link2.txt"))
.expect("Hardlink 2 should be in scan results");
assert_eq!(original_entry.nlink, 3, "Original should have 3 links");
assert_eq!(link1_entry.nlink, 3, "Link1 should have 3 links");
assert_eq!(link2_entry.nlink, 3, "Link2 should have 3 links");
assert!(original_entry.inode.is_some(), "Original should have inode");
assert!(link1_entry.inode.is_some(), "Link1 should have inode");
assert!(link2_entry.inode.is_some(), "Link2 should have inode");
assert_eq!(original_entry.inode, link1_entry.inode, "Original and link1 should have same inode");
assert_eq!(original_entry.inode, link2_entry.inode, "Original and link2 should have same inode");
}
#[test]
fn test_scanner_regular_file_no_hardlinks() {
let temp = TempDir::new().unwrap();
let root = temp.path();
let file_path = root.join("single.txt");
fs::write(&file_path, "content").unwrap();
let scanner = Scanner::new(root);
let entries = scanner.scan().unwrap();
let entry = entries.iter().find(|e| e.relative_path.as_path() == Path::new("single.txt")).expect("File should be in scan results");
assert_eq!(entry.nlink, 1, "Single file should have nlink = 1");
}
#[test]
fn test_scanner_empty_directory() {
let temp = TempDir::new().unwrap();
let root = temp.path();
let scanner = Scanner::new(root);
let entries = scanner.scan().unwrap();
assert_eq!(entries.len(), 0, "Empty directory should return no entries");
}
#[test]
fn test_scanner_nested_empty_directories() {
let temp = TempDir::new().unwrap();
let root = temp.path();
fs::create_dir_all(root.join("a/b/c/d/e")).unwrap();
let scanner = Scanner::new(root);
let entries = scanner.scan().unwrap();
assert!(entries.iter().all(|e| e.is_dir), "All entries should be directories");
}
#[test]
fn test_scanner_very_long_filename() {
let temp = TempDir::new().unwrap();
let root = temp.path();
let long_name = "a".repeat(250) + ".txt";
let file_path = root.join(&long_name);
fs::write(&file_path, "content").unwrap();
let scanner = Scanner::new(root);
let entries = scanner.scan().unwrap();
assert_eq!(entries.len(), 1);
assert_eq!(*entries[0].relative_path, PathBuf::from(&long_name));
}
#[test]
fn test_scanner_unicode_filenames() {
let temp = TempDir::new().unwrap();
let root = temp.path();
let unicode_names = vec![
"测试.txt", "テスト.txt", "тест.txt", "🦀.txt", "café.txt", ];
for name in &unicode_names {
fs::write(root.join(name), "content").unwrap();
}
let scanner = Scanner::new(root);
let entries = scanner.scan().unwrap();
assert_eq!(entries.len(), unicode_names.len());
for name in unicode_names {
assert!(entries.iter().any(|e| e.relative_path.as_path() == Path::new(name)), "Should find file: {}", name);
}
}
#[test]
fn test_scanner_special_characters() {
let temp = TempDir::new().unwrap();
let root = temp.path();
let special_names = vec![
"file with spaces.txt",
"file-with-dashes.txt",
"file_with_underscores.txt",
"file.multiple.dots.txt",
"file(with)parens.txt",
"file[with]brackets.txt",
];
for name in &special_names {
fs::write(root.join(name), "content").unwrap();
}
let scanner = Scanner::new(root);
let entries = scanner.scan().unwrap();
assert_eq!(entries.len(), special_names.len());
for name in special_names {
assert!(entries.iter().any(|e| e.relative_path.as_path() == Path::new(name)), "Should find file: {}", name);
}
}
#[test]
fn test_scanner_deep_nesting() {
let temp = TempDir::new().unwrap();
let root = temp.path();
let mut path = root.to_path_buf();
for i in 0..50 {
path.push(format!("level{}", i));
}
fs::create_dir_all(&path).unwrap();
fs::write(path.join("deep.txt"), "content").unwrap();
let scanner = Scanner::new(root);
let entries = scanner.scan().unwrap();
assert!(entries.len() >= 51, "Should find deeply nested file and directories");
let deep_file = entries.iter().find(|e| e.relative_path.ends_with("deep.txt"));
assert!(deep_file.is_some(), "Should find deeply nested file");
}
#[test]
#[cfg(unix)]
fn test_scanner_permission_denied_directory() {
use std::os::unix::fs::PermissionsExt;
let temp = TempDir::new().unwrap();
let root = temp.path();
let protected_dir = root.join("protected");
fs::create_dir(&protected_dir).unwrap();
fs::write(protected_dir.join("secret.txt"), "secret").unwrap();
let mut perms = fs::metadata(&protected_dir).unwrap().permissions();
perms.set_mode(0o000);
fs::set_permissions(&protected_dir, perms.clone()).unwrap();
let scanner = Scanner::new(root);
let result = scanner.scan();
perms.set_mode(0o755);
fs::set_permissions(&protected_dir, perms).unwrap();
match result {
Ok(entries) => {
assert!(!entries.iter().any(|e| e.path.starts_with(&protected_dir)), "Should not include files from unreadable directory");
}
Err(_) => {
}
}
}
#[test]
fn test_scanner_zero_byte_file() {
let temp = TempDir::new().unwrap();
let root = temp.path();
let file_path = root.join("empty.txt");
fs::write(&file_path, "").unwrap();
let scanner = Scanner::new(root);
let entries = scanner.scan().unwrap();
assert_eq!(entries.len(), 1);
assert_eq!(entries[0].size, 0);
assert_eq!(*entries[0].relative_path, PathBuf::from("empty.txt"));
}
#[test]
fn test_scanner_large_directory() {
let temp = TempDir::new().unwrap();
let root = temp.path();
for i in 0..1000 {
fs::write(root.join(format!("file{:04}.txt", i)), format!("content{}", i)).unwrap();
}
let scanner = Scanner::new(root);
let entries = scanner.scan().unwrap();
assert_eq!(entries.len(), 1000, "Should find all 1000 files");
}
#[test]
fn test_scanner_mixed_file_types() {
let temp = TempDir::new().unwrap();
let root = temp.path();
fs::write(root.join("file1.txt"), "content1").unwrap();
fs::create_dir(root.join("dir1")).unwrap();
fs::write(root.join("dir1/file2.txt"), "content2").unwrap();
fs::create_dir(root.join("dir2")).unwrap();
fs::write(root.join("file3.txt"), "content3").unwrap();
let scanner = Scanner::new(root);
let entries = scanner.scan().unwrap();
let files: Vec<_> = entries.iter().filter(|e| !e.is_dir).collect();
let dirs: Vec<_> = entries.iter().filter(|e| e.is_dir).collect();
assert_eq!(files.len(), 3, "Should find 3 files");
assert_eq!(dirs.len(), 2, "Should find 2 directories");
}
#[test]
fn test_parallel_scanner_basic() {
let temp = TempDir::new().unwrap();
let root = temp.path();
for i in 0..5 {
let dir = root.join(format!("dir{}", i));
fs::create_dir(&dir).unwrap();
for j in 0..10 {
fs::write(dir.join(format!("file{}.txt", j)), format!("content{}{}", i, j)).unwrap();
}
}
let scanner = Scanner::with_threads(root, 4);
let entries = scanner.scan().unwrap();
assert_eq!(entries.len(), 55, "Should find all 55 entries");
let files: Vec<_> = entries.iter().filter(|e| !e.is_dir).collect();
let dirs: Vec<_> = entries.iter().filter(|e| e.is_dir).collect();
assert_eq!(files.len(), 50, "Should find 50 files");
assert_eq!(dirs.len(), 5, "Should find 5 directories");
}
#[test]
fn test_parallel_scanner_large() {
let temp = TempDir::new().unwrap();
let root = temp.path();
for i in 0..10 {
let dir = root.join(format!("subdir{:02}", i));
fs::create_dir(&dir).unwrap();
for j in 0..100 {
fs::write(dir.join(format!("file{:03}.txt", j)), format!("content{}", j)).unwrap();
}
}
let sequential = Scanner::with_threads(root, 0).scan().unwrap();
let parallel = Scanner::with_threads(root, 4).scan().unwrap();
assert_eq!(sequential.len(), parallel.len(), "Sequential and parallel should find same count");
assert_eq!(sequential.len(), 1010);
}
#[test]
fn test_parallel_scanner_preserves_metadata() {
let temp = TempDir::new().unwrap();
let root = temp.path();
let file_path = root.join("test.txt");
let content = "test content for metadata";
fs::write(&file_path, content).unwrap();
let scanner = Scanner::with_threads(root, 4);
let entries = scanner.scan().unwrap();
assert_eq!(entries.len(), 1);
let entry = &entries[0];
assert_eq!(*entry.relative_path, PathBuf::from("test.txt"));
assert_eq!(entry.size, content.len() as u64);
assert!(!entry.is_dir);
assert!(!entry.is_symlink);
}
#[test]
fn test_sequential_fallback_single_thread() {
let temp = TempDir::new().unwrap();
let root = temp.path();
fs::write(root.join("file.txt"), "content").unwrap();
let scanner = Scanner::with_threads(root, 1);
let entries = scanner.scan().unwrap();
assert_eq!(entries.len(), 1);
}
#[test]
fn test_sequential_fallback_zero_threads() {
let temp = TempDir::new().unwrap();
let root = temp.path();
fs::write(root.join("file.txt"), "content").unwrap();
let scanner = Scanner::with_threads(root, 0);
let entries = scanner.scan().unwrap();
assert_eq!(entries.len(), 1);
}
#[test]
fn test_auto_select_small_directory() {
let temp = TempDir::new().unwrap();
let root = temp.path();
for i in 0..10 {
fs::write(root.join(format!("file{}.txt", i)), "content").unwrap();
}
let scanner = Scanner::new(root);
let entries = scanner.scan().unwrap();
assert_eq!(entries.len(), 10);
}
#[test]
fn test_auto_select_many_subdirs() {
let temp = TempDir::new().unwrap();
let root = temp.path();
for i in 0..50 {
let subdir = root.join(format!("dir{:02}", i));
fs::create_dir(&subdir).unwrap();
fs::write(subdir.join("file.txt"), "content").unwrap();
}
let scanner = Scanner::new(root);
let entries = scanner.scan().unwrap();
assert_eq!(entries.len(), 100);
}
#[test]
fn test_threshold_boundary() {
let temp = TempDir::new().unwrap();
let root = temp.path();
for i in 0..30 {
fs::create_dir(root.join(format!("dir{:02}", i))).unwrap();
}
assert!(!super::should_use_parallel(root));
fs::create_dir(root.join("dir30")).unwrap();
assert!(super::should_use_parallel(root));
}
#[test]
fn test_parallel_sequential_identical_results() {
let temp = TempDir::new().unwrap();
let root = temp.path();
for i in 0..50 {
let subdir = root.join(format!("dir{:02}", i));
fs::create_dir(&subdir).unwrap();
fs::write(subdir.join("file.txt"), format!("content{}", i)).unwrap();
}
let seq_entries = Scanner::with_threads(root, 1).scan().unwrap();
let par_entries = Scanner::with_threads(root, 4).scan().unwrap();
assert_eq!(seq_entries.len(), par_entries.len());
let mut seq_paths: Vec<_> = seq_entries.iter().map(|e| e.relative_path.clone()).collect();
let mut par_paths: Vec<_> = par_entries.iter().map(|e| e.relative_path.clone()).collect();
seq_paths.sort();
par_paths.sort();
assert_eq!(seq_paths, par_paths);
let mut seq_sizes: Vec<_> = seq_entries.iter().map(|e| (e.relative_path.clone(), e.size)).collect();
let mut par_sizes: Vec<_> = par_entries.iter().map(|e| (e.relative_path.clone(), e.size)).collect();
seq_sizes.sort_by_key(|(p, _)| p.clone());
par_sizes.sort_by_key(|(p, _)| p.clone());
assert_eq!(seq_sizes, par_sizes);
}
#[test]
fn test_mixed_files_and_subdirs() {
let temp = TempDir::new().unwrap();
let root = temp.path();
for i in 0..20 {
fs::write(root.join(format!("rootfile{}.txt", i)), "content").unwrap();
}
for i in 0..40 {
let subdir = root.join(format!("subdir{:02}", i));
fs::create_dir(&subdir).unwrap();
fs::write(subdir.join("nested.txt"), "nested").unwrap();
}
assert!(super::should_use_parallel(root));
let scanner = Scanner::new(root);
let entries = scanner.scan().unwrap();
assert_eq!(entries.len(), 100);
}
#[test]
fn test_flat_dir_no_parallel() {
let temp = TempDir::new().unwrap();
let root = temp.path();
for i in 0..1000 {
fs::write(root.join(format!("file{:04}.txt", i)), "content").unwrap();
}
assert!(!super::should_use_parallel(root));
let scanner = Scanner::new(root);
let entries = scanner.scan().unwrap();
assert_eq!(entries.len(), 1000);
}
}