Skip to main content

gravityfile_scan/
scanner.rs

1//! JWalk-based parallel directory scanner.
2
3use std::collections::HashMap;
4use std::path::{Path, PathBuf};
5use std::sync::Arc;
6use std::sync::atomic::{AtomicU64, Ordering};
7use std::time::Instant;
8
9#[cfg(unix)]
10use std::os::unix::fs::MetadataExt;
11
12use compact_str::CompactString;
13use jwalk::{DirEntry, Parallelism, WalkDirGeneric};
14use tokio::sync::broadcast;
15
16use gravityfile_core::{
17    FileNode, FileTree, InodeInfo, NodeId, NodeKind, ScanConfig, ScanError, ScanWarning,
18    Timestamps, TreeStats, WarningKind,
19};
20
21use crate::inode::InodeTracker;
22use crate::progress::ScanProgress;
23
24/// High-performance scanner using jwalk for parallel traversal.
25pub struct JwalkScanner {
26    progress_tx: broadcast::Sender<ScanProgress>,
27}
28
29impl JwalkScanner {
30    /// Create a new scanner.
31    pub fn new() -> Self {
32        let (progress_tx, _) = broadcast::channel(100);
33        Self { progress_tx }
34    }
35
36    /// Subscribe to scan progress updates.
37    pub fn subscribe(&self) -> broadcast::Receiver<ScanProgress> {
38        self.progress_tx.subscribe()
39    }
40
41    /// Perform a scan of the given path.
42    pub fn scan(&self, config: &ScanConfig) -> Result<FileTree, ScanError> {
43        let start = Instant::now();
44        let root_path = config
45            .root
46            .canonicalize()
47            .map_err(|e| ScanError::io(&config.root, e))?;
48
49        // Verify root is a directory
50        if !root_path.is_dir() {
51            return Err(ScanError::NotADirectory { path: root_path });
52        }
53
54        // Get root device for cross-filesystem detection
55        let root_metadata =
56            std::fs::metadata(&root_path).map_err(|e| ScanError::io(&root_path, e))?;
57        let root_device = get_dev(&root_metadata);
58
59        // Set up tracking
60        let mut inode_tracker = InodeTracker::new();
61        let node_id_counter = AtomicU64::new(0);
62        let mut stats = TreeStats::new();
63        let mut warnings = Vec::new();
64
65        // Collect all entries first
66        let entries = self.collect_entries(
67            config,
68            &root_path,
69            root_device,
70            &mut inode_tracker,
71            &mut stats,
72            &mut warnings,
73        )?;
74
75        // Build tree from collected entries
76        let root_node = self.build_tree(&root_path, entries, &node_id_counter, &mut stats);
77
78        let scan_duration = start.elapsed();
79
80        Ok(FileTree::new(
81            root_node,
82            root_path,
83            config.clone(),
84            stats,
85            scan_duration,
86            warnings,
87        ))
88    }
89
90    /// Collect all entries using jwalk.
91    fn collect_entries(
92        &self,
93        config: &ScanConfig,
94        root_path: &Path,
95        root_device: u64,
96        inode_tracker: &mut InodeTracker,
97        stats: &mut TreeStats,
98        warnings: &mut Vec<ScanWarning>,
99    ) -> Result<HashMap<PathBuf, Vec<EntryInfo>>, ScanError> {
100        // Platform-specific thread default: 4 on macOS, rayon default everywhere else.
101        let parallelism = match config.threads {
102            0 => {
103                #[cfg(target_os = "macos")]
104                {
105                    Parallelism::RayonNewPool(4)
106                }
107                #[cfg(not(target_os = "macos"))]
108                {
109                    Parallelism::RayonDefaultPool {
110                        busy_timeout: std::time::Duration::from_millis(100),
111                    }
112                }
113            }
114            n => Parallelism::RayonNewPool(n),
115        };
116
117        // Capture config fields needed in the closure.
118        let cross_filesystems = config.cross_filesystems;
119        let include_hidden = config.include_hidden;
120
121        // Re-use the GlobSet already compiled by ScanConfig when available.
122        // Fall back to compiling on-the-fly if patterns exist but weren't compiled
123        // (e.g. config built via the derive_builder path without calling compile_patterns).
124        let ignore_globset: Option<Arc<globset::GlobSet>> = config
125            .compiled_ignore_set()
126            .cloned()
127            .map(Arc::new)
128            .or_else(|| {
129                if config.ignore_patterns.is_empty() {
130                    return None;
131                }
132                let mut builder = globset::GlobSetBuilder::new();
133                for pattern in &config.ignore_patterns {
134                    if let Ok(glob) = globset::Glob::new(pattern) {
135                        builder.add(glob);
136                    }
137                }
138                builder.build().ok().map(Arc::new)
139            });
140
141        let walker = WalkDirGeneric::<((), ())>::new(root_path)
142            .parallelism(parallelism)
143            .skip_hidden(!include_hidden)
144            .follow_links(config.follow_symlinks)
145            .min_depth(0)
146            .max_depth(config.max_depth.map(|d| d as usize).unwrap_or(usize::MAX))
147            .process_read_dir(move |_depth, _dir_path, _state, children| {
148                // Prune and filter early — before jwalk recurses.
149                children.retain_mut(|entry_result| {
150                    let entry = match entry_result {
151                        Ok(e) => e,
152                        Err(_) => return true, // keep errors so they surface as warnings
153                    };
154
155                    let name = entry.file_name.to_string_lossy();
156
157                    // Apply ignore-pattern filter.
158                    if let Some(ref gs) = ignore_globset
159                        && gs.is_match(name.as_ref())
160                    {
161                        return false;
162                    }
163
164                    // Prune cross-filesystem subtrees for directories.
165                    if !cross_filesystems
166                        && entry.file_type.is_dir()
167                        && let Ok(meta) = entry.metadata()
168                        && get_dev(&meta) != root_device
169                    {
170                        // Setting read_children_path to None stops jwalk
171                        // from descending into this directory.
172                        entry.read_children_path = None;
173                        return false; // drop the dir entry itself too
174                    }
175
176                    true
177                });
178            });
179
180        // Map from parent path to children
181        let mut entries_by_parent: HashMap<PathBuf, Vec<EntryInfo>> = HashMap::new();
182        let mut progress_counter: u64 = 0;
183
184        for entry_result in walker {
185            let entry: DirEntry<((), ())> = match entry_result {
186                Ok(e) => e,
187                Err(err) => {
188                    let path = err.path().map(|p| p.to_path_buf()).unwrap_or_default();
189                    warnings.push(ScanWarning::new(
190                        path,
191                        WarningKind::ReadError,
192                        err.to_string(),
193                    ));
194                    continue;
195                }
196            };
197
198            let path = entry.path();
199            // Use CompactString directly to avoid an extra heap allocation.
200            let file_name = CompactString::new(entry.file_name().to_string_lossy());
201
202            // Get metadata
203            let metadata = match entry.metadata() {
204                Ok(m) => m,
205                Err(err) => {
206                    warnings.push(ScanWarning::new(
207                        &path,
208                        WarningKind::MetadataError,
209                        err.to_string(),
210                    ));
211                    continue;
212                }
213            };
214
215            // Handle different file types
216            let file_type = entry.file_type();
217            let depth = entry.depth() as u32;
218
219            if file_type.is_dir() {
220                stats.record_dir(depth);
221
222                // For directories, track them but size will be aggregated later
223                if let Some(parent) = path.parent() {
224                    let entry_info = EntryInfo {
225                        name: file_name,
226                        path: path.clone(),
227                        size: 0,
228                        blocks: 0,
229                        is_dir: true,
230                        is_symlink: false,
231                        symlink_target: None,
232                        symlink_broken: false,
233                        executable: false,
234                        timestamps: Timestamps::new(
235                            metadata.modified().unwrap_or(std::time::UNIX_EPOCH),
236                            metadata.accessed().ok(),
237                            metadata.created().ok(),
238                        ),
239                        inode: Some(InodeInfo::new(get_ino(&metadata), get_dev(&metadata))),
240                    };
241
242                    entries_by_parent
243                        .entry(parent.to_path_buf())
244                        .or_default()
245                        .push(entry_info);
246                }
247            } else if file_type.is_file() {
248                // Filter non-directory entries on different filesystems at
249                // depth 1 (directly under scan root). Deeper entries are
250                // already excluded by the directory-prune in process_read_dir.
251                if !cross_filesystems && get_dev(&metadata) != root_device {
252                    continue;
253                }
254
255                let nlink = get_nlink(&metadata);
256                let inode_info = InodeInfo::new(get_ino(&metadata), get_dev(&metadata));
257
258                let size = if config.apparent_size {
259                    metadata.len()
260                } else {
261                    // Only count size for first hardlink encounter.
262                    if nlink > 1 && !inode_tracker.track(inode_info, nlink) {
263                        0 // Already counted this inode
264                    } else {
265                        // Use disk blocks for physical size when apparent_size == false.
266                        disk_size(&metadata)
267                    }
268                };
269
270                let blocks = get_blocks(&metadata);
271
272                stats.record_file(
273                    &path,
274                    size,
275                    metadata.modified().unwrap_or(std::time::UNIX_EPOCH),
276                    depth,
277                );
278
279                if let Some(parent) = path.parent() {
280                    let executable = is_executable(&metadata);
281                    let entry_info = EntryInfo {
282                        name: file_name,
283                        path: path.clone(),
284                        size,
285                        blocks,
286                        is_dir: false,
287                        is_symlink: false,
288                        symlink_target: None,
289                        symlink_broken: false,
290                        executable,
291                        timestamps: Timestamps::new(
292                            metadata.modified().unwrap_or(std::time::UNIX_EPOCH),
293                            metadata.accessed().ok(),
294                            metadata.created().ok(),
295                        ),
296                        inode: Some(inode_info),
297                    };
298
299                    entries_by_parent
300                        .entry(parent.to_path_buf())
301                        .or_default()
302                        .push(entry_info);
303                }
304
305                // Update progress periodically
306                progress_counter += 1;
307                if progress_counter.is_multiple_of(1000) {
308                    let _ = self.progress_tx.send(ScanProgress {
309                        files_scanned: stats.total_files,
310                        dirs_scanned: stats.total_dirs,
311                        bytes_scanned: stats.total_size,
312                        current_path: path.clone(),
313                        errors_count: warnings.len() as u64,
314                        elapsed: std::time::Duration::ZERO,
315                    });
316                }
317            } else if file_type.is_symlink() {
318                // Filter symlinks on different filesystems at depth 1.
319                if !cross_filesystems && get_dev(&metadata) != root_device {
320                    continue;
321                }
322
323                stats.record_symlink();
324
325                if let Some(parent) = path.parent() {
326                    // Read symlink target once; re-use for both the warning and
327                    // the EntryInfo so we never call read_link twice.
328                    let (symlink_target, symlink_broken) = match std::fs::read_link(&path) {
329                        Ok(target) => {
330                            // path.exists() follows the link; broken if it fails.
331                            let broken = !path.exists();
332                            let target_str = CompactString::new(target.to_string_lossy());
333                            (target_str, broken)
334                        }
335                        Err(_) => (CompactString::default(), true),
336                    };
337
338                    if symlink_broken {
339                        warnings.push(ScanWarning::broken_symlink(&path, symlink_target.as_str()));
340                    }
341
342                    let entry_info = EntryInfo {
343                        name: file_name,
344                        path: path.clone(),
345                        size: 0,
346                        blocks: 0,
347                        is_dir: false,
348                        is_symlink: true,
349                        symlink_target: Some(symlink_target),
350                        symlink_broken,
351                        executable: false,
352                        timestamps: Timestamps::new(
353                            metadata.modified().unwrap_or(std::time::UNIX_EPOCH),
354                            metadata.accessed().ok(),
355                            metadata.created().ok(),
356                        ),
357                        inode: None,
358                    };
359
360                    entries_by_parent
361                        .entry(parent.to_path_buf())
362                        .or_default()
363                        .push(entry_info);
364                }
365            }
366        }
367
368        Ok(entries_by_parent)
369    }
370
371    /// Build tree structure from collected entries.
372    fn build_tree(
373        &self,
374        root_path: &Path,
375        mut entries_by_parent: HashMap<PathBuf, Vec<EntryInfo>>,
376        node_id_counter: &AtomicU64,
377        _stats: &mut TreeStats,
378    ) -> FileNode {
379        self.build_node(root_path, &mut entries_by_parent, node_id_counter)
380    }
381
382    /// Recursively build a node and its children.
383    fn build_node(
384        &self,
385        path: &Path,
386        entries_by_parent: &mut HashMap<PathBuf, Vec<EntryInfo>>,
387        node_id_counter: &AtomicU64,
388    ) -> FileNode {
389        let id = NodeId::new(node_id_counter.fetch_add(1, Ordering::Relaxed));
390        let name = path
391            .file_name()
392            .map(|n| n.to_string_lossy().to_string())
393            .unwrap_or_else(|| path.to_string_lossy().to_string());
394
395        let metadata = std::fs::metadata(path).ok();
396        let timestamps = metadata
397            .as_ref()
398            .map(|m| {
399                Timestamps::new(
400                    m.modified().unwrap_or(std::time::UNIX_EPOCH),
401                    m.accessed().ok(),
402                    m.created().ok(),
403                )
404            })
405            .unwrap_or_else(|| Timestamps::with_modified(std::time::UNIX_EPOCH));
406
407        let mut node = FileNode::new_directory(id, name, timestamps);
408
409        // Get children for this path
410        let children_entries = entries_by_parent.remove(path).unwrap_or_default();
411
412        let mut total_size: u64 = 0;
413        let mut file_count: u64 = 0;
414        let mut dir_count: u64 = 0;
415
416        for entry in children_entries {
417            if entry.is_dir {
418                // Recursively build directory
419                let child_node = self.build_node(&entry.path, entries_by_parent, node_id_counter);
420                total_size += child_node.size;
421                file_count += child_node.file_count();
422                dir_count += child_node.dir_count() + 1;
423                node.children.push(child_node);
424            } else if entry.is_symlink {
425                // Re-use the already-resolved target and broken status from EntryInfo —
426                // no second syscall needed here.
427                let child_id = NodeId::new(node_id_counter.fetch_add(1, Ordering::Relaxed));
428                let target = entry.symlink_target.unwrap_or_default();
429                let broken = entry.symlink_broken;
430
431                let child_node = FileNode {
432                    id: child_id,
433                    name: entry.name,
434                    kind: NodeKind::Symlink { target, broken },
435                    size: 0,
436                    blocks: 0,
437                    timestamps: entry.timestamps,
438                    inode: None,
439                    content_hash: None,
440                    git_status: None,
441                    children: Vec::new(),
442                };
443                node.children.push(child_node);
444            } else {
445                // Create file node
446                let child_id = NodeId::new(node_id_counter.fetch_add(1, Ordering::Relaxed));
447                let mut child_node = FileNode::new_file(
448                    child_id,
449                    entry.name,
450                    entry.size,
451                    entry.blocks,
452                    entry.timestamps,
453                    entry.executable,
454                );
455                child_node.inode = entry.inode;
456
457                total_size += entry.size;
458                file_count += 1;
459                node.children.push(child_node);
460            }
461        }
462
463        // Update node with aggregated values
464        node.size = total_size;
465        node.kind = NodeKind::Directory {
466            file_count,
467            dir_count,
468        };
469
470        // Sort children by size (descending)
471        node.children.sort_by(|a, b| b.size.cmp(&a.size));
472
473        node
474    }
475}
476
477impl Default for JwalkScanner {
478    fn default() -> Self {
479        Self::new()
480    }
481}
482
483/// Create a quick, non-recursive directory listing for immediate display.
484/// This function reads only the immediate children of a directory without
485/// recursing into subdirectories. Directory sizes will be 0 (unknown).
486///
487/// Warnings encountered during the listing are included in the returned
488/// `FileTree` rather than being silently dropped.
489///
490/// The `config` parameter controls whether hidden files are shown. Pass
491/// `None` to use a default config (hidden files included).
492pub fn quick_list(path: &Path, config: Option<&ScanConfig>) -> Result<FileTree, ScanError> {
493    use std::sync::atomic::{AtomicU64, Ordering};
494    use std::time::Instant;
495
496    let start = Instant::now();
497    let root_path = path.canonicalize().map_err(|e| ScanError::io(path, e))?;
498
499    if !root_path.is_dir() {
500        return Err(ScanError::NotADirectory {
501            path: root_path.clone(),
502        });
503    }
504
505    // Build an owned config for the case where none was supplied.
506    let owned_config;
507    let cfg: &ScanConfig = match config {
508        Some(c) => c,
509        None => {
510            owned_config = ScanConfig::new(&root_path);
511            &owned_config
512        }
513    };
514
515    let node_id_counter = AtomicU64::new(0);
516    let mut stats = TreeStats::new();
517    let mut warnings: Vec<ScanWarning> = Vec::new();
518
519    // Get root directory metadata
520    let root_metadata = std::fs::metadata(&root_path).map_err(|e| ScanError::io(&root_path, e))?;
521    let root_timestamps = Timestamps::new(
522        root_metadata.modified().unwrap_or(std::time::UNIX_EPOCH),
523        root_metadata.accessed().ok(),
524        root_metadata.created().ok(),
525    );
526
527    let root_name = root_path
528        .file_name()
529        .map(|n| n.to_string_lossy().to_string())
530        .unwrap_or_else(|| root_path.to_string_lossy().to_string());
531
532    let root_id = NodeId::new(node_id_counter.fetch_add(1, Ordering::Relaxed));
533    let mut root_node = FileNode::new_directory(root_id, root_name, root_timestamps);
534
535    // Read immediate children
536    let read_dir = match std::fs::read_dir(&root_path) {
537        Ok(rd) => rd,
538        Err(e) => return Err(ScanError::io(&root_path, e)),
539    };
540
541    let mut total_size: u64 = 0;
542    let mut file_count: u64 = 0;
543    let mut dir_count: u64 = 0;
544
545    for entry_result in read_dir {
546        let entry = match entry_result {
547            Ok(e) => e,
548            Err(e) => {
549                warnings.push(ScanWarning::new(
550                    root_path.clone(),
551                    WarningKind::ReadError,
552                    e.to_string(),
553                ));
554                continue;
555            }
556        };
557
558        let entry_path = entry.path();
559        let entry_name = entry.file_name().to_string_lossy().to_string();
560
561        // Respect include_hidden from config.
562        if !cfg.include_hidden && entry_name.starts_with('.') {
563            continue;
564        }
565
566        // Respect ignore patterns from config.
567        if cfg.should_ignore(&entry_name) {
568            continue;
569        }
570
571        let metadata = match entry.metadata() {
572            Ok(m) => m,
573            Err(e) => {
574                warnings.push(ScanWarning::new(
575                    entry_path,
576                    WarningKind::MetadataError,
577                    e.to_string(),
578                ));
579                continue;
580            }
581        };
582
583        let timestamps = Timestamps::new(
584            metadata.modified().unwrap_or(std::time::UNIX_EPOCH),
585            metadata.accessed().ok(),
586            metadata.created().ok(),
587        );
588
589        let child_id = NodeId::new(node_id_counter.fetch_add(1, Ordering::Relaxed));
590
591        if metadata.is_dir() {
592            // Directory - size is unknown (0) until full scan
593            let child_node =
594                FileNode::new_directory(child_id, CompactString::new(&entry_name), timestamps);
595            root_node.children.push(child_node);
596            dir_count += 1;
597            stats.record_dir(1);
598        } else if metadata.is_file() {
599            let size = if cfg.apparent_size {
600                metadata.len()
601            } else {
602                disk_size(&metadata)
603            };
604            let blocks = get_blocks(&metadata);
605            let executable = is_executable(&metadata);
606
607            let mut child_node = FileNode::new_file(
608                child_id,
609                CompactString::new(&entry_name),
610                size,
611                blocks,
612                timestamps,
613                executable,
614            );
615
616            // Set inode info for potential hardlink detection
617            let inode = InodeInfo::new(get_ino(&metadata), get_dev(&metadata));
618            child_node.inode = Some(inode);
619
620            total_size += size;
621            file_count += 1;
622            root_node.children.push(child_node);
623            stats.record_file(&entry_path, size, timestamps.modified, 1);
624        } else if metadata.file_type().is_symlink() {
625            // read_link + exists() in a single pass to avoid the double-syscall.
626            let (target, broken) = match std::fs::read_link(&entry_path) {
627                Ok(t) => {
628                    let broken = !entry_path.exists();
629                    (CompactString::new(t.to_string_lossy()), broken)
630                }
631                Err(_) => (CompactString::default(), true),
632            };
633
634            if broken {
635                warnings.push(ScanWarning::broken_symlink(&entry_path, target.as_str()));
636            }
637
638            let child_node = FileNode {
639                id: child_id,
640                name: CompactString::new(&entry_name),
641                kind: NodeKind::Symlink { target, broken },
642                size: 0,
643                blocks: 0,
644                timestamps,
645                inode: None,
646                content_hash: None,
647                git_status: None,
648                children: Vec::new(),
649            };
650            root_node.children.push(child_node);
651            stats.record_symlink();
652        }
653    }
654
655    // Update root node with aggregated values
656    root_node.size = total_size;
657    root_node.kind = NodeKind::Directory {
658        file_count,
659        dir_count,
660    };
661
662    // Sort children by name for initial display (scan will re-sort by size later)
663    root_node.children.sort_by(|a, b| a.name.cmp(&b.name));
664
665    stats.record_dir(0);
666
667    let scan_config = cfg.clone();
668    let scan_duration = start.elapsed();
669
670    Ok(FileTree::new(
671        root_node,
672        root_path,
673        scan_config,
674        stats,
675        scan_duration,
676        warnings,
677    ))
678}
679
680/// Temporary struct for collecting entry information.
681struct EntryInfo {
682    name: CompactString,
683    path: PathBuf,
684    size: u64,
685    blocks: u64,
686    is_dir: bool,
687    is_symlink: bool,
688    /// Pre-resolved symlink target (avoids re-reading in build_node).
689    symlink_target: Option<CompactString>,
690    /// Pre-computed broken status (avoids re-calling path.exists() in build_node).
691    symlink_broken: bool,
692    executable: bool,
693    timestamps: Timestamps,
694    inode: Option<InodeInfo>,
695}
696
697/// Check if a file is executable (Unix).
698#[cfg(unix)]
699fn is_executable(metadata: &std::fs::Metadata) -> bool {
700    use std::os::unix::fs::PermissionsExt;
701    metadata.permissions().mode() & 0o111 != 0
702}
703
704#[cfg(not(unix))]
705fn is_executable(_metadata: &std::fs::Metadata) -> bool {
706    false
707}
708
709// Cross-platform metadata helpers
710
711/// Get the device ID from metadata.
712#[cfg(unix)]
713fn get_dev(metadata: &std::fs::Metadata) -> u64 {
714    metadata.dev()
715}
716
717#[cfg(windows)]
718fn get_dev(_metadata: &std::fs::Metadata) -> u64 {
719    0 // Windows doesn't expose a simple numeric device ID via MetadataExt
720}
721
722#[cfg(not(any(unix, windows)))]
723fn get_dev(_metadata: &std::fs::Metadata) -> u64 {
724    0
725}
726
727/// Get the inode number from metadata.
728#[cfg(unix)]
729fn get_ino(metadata: &std::fs::Metadata) -> u64 {
730    metadata.ino()
731}
732
733#[cfg(windows)]
734fn get_ino(_metadata: &std::fs::Metadata) -> u64 {
735    // file_index() requires unstable `windows_by_handle` feature.
736    // Hardlink dedup is not supported on Windows; return 0 to treat every
737    // file as unique.
738    0
739}
740
741#[cfg(not(any(unix, windows)))]
742fn get_ino(_metadata: &std::fs::Metadata) -> u64 {
743    0
744}
745
746/// Get the number of hard links from metadata.
747#[cfg(unix)]
748fn get_nlink(metadata: &std::fs::Metadata) -> u64 {
749    metadata.nlink()
750}
751
752#[cfg(windows)]
753fn get_nlink(_metadata: &std::fs::Metadata) -> u64 {
754    // number_of_links() requires unstable `windows_by_handle` feature.
755    // Return 1 to skip hardlink dedup on Windows.
756    1
757}
758
759#[cfg(not(any(unix, windows)))]
760fn get_nlink(_metadata: &std::fs::Metadata) -> u64 {
761    1 // Assume single link on other platforms
762}
763
764/// Get the number of 512-byte blocks from metadata.
765#[cfg(unix)]
766fn get_blocks(metadata: &std::fs::Metadata) -> u64 {
767    metadata.blocks()
768}
769
770#[cfg(not(unix))]
771fn get_blocks(metadata: &std::fs::Metadata) -> u64 {
772    // Estimate blocks from file size (512-byte blocks, rounded up)
773    (metadata.len() + 511) / 512
774}
775
776/// Compute the physical (disk) size of a file.
777///
778/// On Unix this is `blocks * 512` (the kernel-reported allocation unit).
779/// On other platforms we fall back to the apparent size since there is no
780/// portable way to query the on-disk allocation.
781#[cfg(unix)]
782fn disk_size(metadata: &std::fs::Metadata) -> u64 {
783    get_blocks(metadata) * 512
784}
785
786#[cfg(not(unix))]
787fn disk_size(metadata: &std::fs::Metadata) -> u64 {
788    metadata.len()
789}
790
791#[cfg(test)]
792mod tests {
793    use super::*;
794    use std::fs;
795    use tempfile::TempDir;
796
797    fn create_test_tree() -> TempDir {
798        let temp = TempDir::new().unwrap();
799        let root = temp.path();
800
801        // Create directory structure
802        fs::create_dir(root.join("dir1")).unwrap();
803        fs::create_dir(root.join("dir2")).unwrap();
804        fs::create_dir(root.join("dir1/subdir")).unwrap();
805
806        // Create files
807        fs::write(root.join("file1.txt"), "hello").unwrap();
808        fs::write(root.join("dir1/file2.txt"), "world world world").unwrap();
809        fs::write(root.join("dir1/subdir/file3.txt"), "test").unwrap();
810        fs::write(root.join("dir2/file4.txt"), "another file here").unwrap();
811
812        temp
813    }
814
815    #[test]
816    fn test_basic_scan() {
817        let temp = create_test_tree();
818        let config = ScanConfig::new(temp.path());
819
820        let scanner = JwalkScanner::new();
821        let tree = scanner.scan(&config).unwrap();
822
823        assert_eq!(tree.stats.total_files, 4);
824        // dir1, dir2, subdir + root = 4, but root not counted in walker
825        assert!(tree.stats.total_dirs >= 3);
826        assert!(tree.root.size > 0);
827    }
828
829    #[test]
830    fn test_children_sorted_by_size() {
831        let temp = create_test_tree();
832        let config = ScanConfig::new(temp.path());
833
834        let scanner = JwalkScanner::new();
835        let tree = scanner.scan(&config).unwrap();
836
837        // Children should be sorted by size descending
838        for i in 0..tree.root.children.len().saturating_sub(1) {
839            assert!(tree.root.children[i].size >= tree.root.children[i + 1].size);
840        }
841    }
842
843    #[test]
844    fn test_ignore_patterns() {
845        let temp = create_test_tree();
846        let config = ScanConfig::builder()
847            .root(temp.path())
848            .ignore_patterns(vec!["dir2".to_string()])
849            .build()
850            .unwrap();
851
852        let scanner = JwalkScanner::new();
853        let tree = scanner.scan(&config).unwrap();
854
855        // dir2 should be ignored
856        assert!(!tree.root.children.iter().any(|c| c.name.as_str() == "dir2"));
857    }
858
859    #[test]
860    fn test_quick_list_respects_hidden() {
861        let temp = TempDir::new().unwrap();
862        let root = temp.path();
863        fs::write(root.join(".hidden"), "secret").unwrap();
864        fs::write(root.join("visible"), "public").unwrap();
865
866        // Default config hides hidden files.
867        let config = ScanConfig::builder()
868            .root(root)
869            .include_hidden(false)
870            .build()
871            .unwrap();
872
873        let tree = quick_list(root, Some(&config)).unwrap();
874        let names: Vec<_> = tree.root.children.iter().map(|c| c.name.as_str()).collect();
875        assert!(names.contains(&"visible"));
876        assert!(!names.contains(&".hidden"));
877    }
878
879    #[test]
880    fn test_quick_list_includes_hidden_when_configured() {
881        let temp = TempDir::new().unwrap();
882        let root = temp.path();
883        fs::write(root.join(".hidden"), "secret").unwrap();
884        fs::write(root.join("visible"), "public").unwrap();
885
886        let config = ScanConfig::builder()
887            .root(root)
888            .include_hidden(true)
889            .build()
890            .unwrap();
891
892        let tree = quick_list(root, Some(&config)).unwrap();
893        let names: Vec<_> = tree.root.children.iter().map(|c| c.name.as_str()).collect();
894        assert!(names.contains(&"visible"));
895        assert!(names.contains(&".hidden"));
896    }
897}