1use std::collections::HashMap;
4use std::path::{Path, PathBuf};
5use std::sync::Arc;
6use std::sync::atomic::{AtomicU64, Ordering};
7use std::time::Instant;
8
9#[cfg(unix)]
10use std::os::unix::fs::MetadataExt;
11
12use compact_str::CompactString;
13use jwalk::{DirEntry, Parallelism, WalkDirGeneric};
14use tokio::sync::broadcast;
15
16use gravityfile_core::{
17 FileNode, FileTree, InodeInfo, NodeId, NodeKind, ScanConfig, ScanError, ScanWarning,
18 Timestamps, TreeStats, WarningKind,
19};
20
21use crate::inode::InodeTracker;
22use crate::progress::ScanProgress;
23
24pub struct JwalkScanner {
26 progress_tx: broadcast::Sender<ScanProgress>,
27}
28
29impl JwalkScanner {
30 pub fn new() -> Self {
32 let (progress_tx, _) = broadcast::channel(100);
33 Self { progress_tx }
34 }
35
36 pub fn subscribe(&self) -> broadcast::Receiver<ScanProgress> {
38 self.progress_tx.subscribe()
39 }
40
41 pub fn scan(&self, config: &ScanConfig) -> Result<FileTree, ScanError> {
43 let start = Instant::now();
44 let root_path = config
45 .root
46 .canonicalize()
47 .map_err(|e| ScanError::io(&config.root, e))?;
48
49 if !root_path.is_dir() {
51 return Err(ScanError::NotADirectory { path: root_path });
52 }
53
54 let root_metadata =
56 std::fs::metadata(&root_path).map_err(|e| ScanError::io(&root_path, e))?;
57 let root_device = get_dev(&root_metadata);
58
59 let mut inode_tracker = InodeTracker::new();
61 let node_id_counter = AtomicU64::new(0);
62 let mut stats = TreeStats::new();
63 let mut warnings = Vec::new();
64
65 let entries = self.collect_entries(
67 config,
68 &root_path,
69 root_device,
70 &mut inode_tracker,
71 &mut stats,
72 &mut warnings,
73 )?;
74
75 let root_node = self.build_tree(&root_path, entries, &node_id_counter, &mut stats);
77
78 let scan_duration = start.elapsed();
79
80 Ok(FileTree::new(
81 root_node,
82 root_path,
83 config.clone(),
84 stats,
85 scan_duration,
86 warnings,
87 ))
88 }
89
90 fn collect_entries(
92 &self,
93 config: &ScanConfig,
94 root_path: &Path,
95 root_device: u64,
96 inode_tracker: &mut InodeTracker,
97 stats: &mut TreeStats,
98 warnings: &mut Vec<ScanWarning>,
99 ) -> Result<HashMap<PathBuf, Vec<EntryInfo>>, ScanError> {
100 let parallelism = match config.threads {
102 0 => {
103 #[cfg(target_os = "macos")]
104 {
105 Parallelism::RayonNewPool(4)
106 }
107 #[cfg(not(target_os = "macos"))]
108 {
109 Parallelism::RayonDefaultPool {
110 busy_timeout: std::time::Duration::from_millis(100),
111 }
112 }
113 }
114 n => Parallelism::RayonNewPool(n),
115 };
116
117 let cross_filesystems = config.cross_filesystems;
119 let include_hidden = config.include_hidden;
120
121 let ignore_globset: Option<Arc<globset::GlobSet>> = config
125 .compiled_ignore_set()
126 .cloned()
127 .map(Arc::new)
128 .or_else(|| {
129 if config.ignore_patterns.is_empty() {
130 return None;
131 }
132 let mut builder = globset::GlobSetBuilder::new();
133 for pattern in &config.ignore_patterns {
134 if let Ok(glob) = globset::Glob::new(pattern) {
135 builder.add(glob);
136 }
137 }
138 builder.build().ok().map(Arc::new)
139 });
140
141 let walker = WalkDirGeneric::<((), ())>::new(root_path)
142 .parallelism(parallelism)
143 .skip_hidden(!include_hidden)
144 .follow_links(config.follow_symlinks)
145 .min_depth(0)
146 .max_depth(config.max_depth.map(|d| d as usize).unwrap_or(usize::MAX))
147 .process_read_dir(move |_depth, _dir_path, _state, children| {
148 children.retain_mut(|entry_result| {
150 let entry = match entry_result {
151 Ok(e) => e,
152 Err(_) => return true, };
154
155 let name = entry.file_name.to_string_lossy();
156
157 if let Some(ref gs) = ignore_globset
159 && gs.is_match(name.as_ref())
160 {
161 return false;
162 }
163
164 if !cross_filesystems
166 && entry.file_type.is_dir()
167 && let Ok(meta) = entry.metadata()
168 && get_dev(&meta) != root_device
169 {
170 entry.read_children_path = None;
173 return false; }
175
176 true
177 });
178 });
179
180 let mut entries_by_parent: HashMap<PathBuf, Vec<EntryInfo>> = HashMap::new();
182 let mut progress_counter: u64 = 0;
183
184 for entry_result in walker {
185 let entry: DirEntry<((), ())> = match entry_result {
186 Ok(e) => e,
187 Err(err) => {
188 let path = err.path().map(|p| p.to_path_buf()).unwrap_or_default();
189 warnings.push(ScanWarning::new(
190 path,
191 WarningKind::ReadError,
192 err.to_string(),
193 ));
194 continue;
195 }
196 };
197
198 let path = entry.path();
199 let file_name = CompactString::new(entry.file_name().to_string_lossy());
201
202 let metadata = match entry.metadata() {
204 Ok(m) => m,
205 Err(err) => {
206 warnings.push(ScanWarning::new(
207 &path,
208 WarningKind::MetadataError,
209 err.to_string(),
210 ));
211 continue;
212 }
213 };
214
215 let file_type = entry.file_type();
217 let depth = entry.depth() as u32;
218
219 if file_type.is_dir() {
220 stats.record_dir(depth);
221
222 if let Some(parent) = path.parent() {
224 let entry_info = EntryInfo {
225 name: file_name,
226 path: path.clone(),
227 size: 0,
228 blocks: 0,
229 is_dir: true,
230 is_symlink: false,
231 symlink_target: None,
232 symlink_broken: false,
233 executable: false,
234 timestamps: Timestamps::new(
235 metadata.modified().unwrap_or(std::time::UNIX_EPOCH),
236 metadata.accessed().ok(),
237 metadata.created().ok(),
238 ),
239 inode: Some(InodeInfo::new(get_ino(&metadata), get_dev(&metadata))),
240 };
241
242 entries_by_parent
243 .entry(parent.to_path_buf())
244 .or_default()
245 .push(entry_info);
246 }
247 } else if file_type.is_file() {
248 if !cross_filesystems && get_dev(&metadata) != root_device {
252 continue;
253 }
254
255 let nlink = get_nlink(&metadata);
256 let inode_info = InodeInfo::new(get_ino(&metadata), get_dev(&metadata));
257
258 let size = if config.apparent_size {
259 metadata.len()
260 } else {
261 if nlink > 1 && !inode_tracker.track(inode_info, nlink) {
263 0 } else {
265 disk_size(&metadata)
267 }
268 };
269
270 let blocks = get_blocks(&metadata);
271
272 stats.record_file(
273 &path,
274 size,
275 metadata.modified().unwrap_or(std::time::UNIX_EPOCH),
276 depth,
277 );
278
279 if let Some(parent) = path.parent() {
280 let executable = is_executable(&metadata);
281 let entry_info = EntryInfo {
282 name: file_name,
283 path: path.clone(),
284 size,
285 blocks,
286 is_dir: false,
287 is_symlink: false,
288 symlink_target: None,
289 symlink_broken: false,
290 executable,
291 timestamps: Timestamps::new(
292 metadata.modified().unwrap_or(std::time::UNIX_EPOCH),
293 metadata.accessed().ok(),
294 metadata.created().ok(),
295 ),
296 inode: Some(inode_info),
297 };
298
299 entries_by_parent
300 .entry(parent.to_path_buf())
301 .or_default()
302 .push(entry_info);
303 }
304
305 progress_counter += 1;
307 if progress_counter.is_multiple_of(1000) {
308 let _ = self.progress_tx.send(ScanProgress {
309 files_scanned: stats.total_files,
310 dirs_scanned: stats.total_dirs,
311 bytes_scanned: stats.total_size,
312 current_path: path.clone(),
313 errors_count: warnings.len() as u64,
314 elapsed: std::time::Duration::ZERO,
315 });
316 }
317 } else if file_type.is_symlink() {
318 if !cross_filesystems && get_dev(&metadata) != root_device {
320 continue;
321 }
322
323 stats.record_symlink();
324
325 if let Some(parent) = path.parent() {
326 let (symlink_target, symlink_broken) = match std::fs::read_link(&path) {
329 Ok(target) => {
330 let broken = !path.exists();
332 let target_str = CompactString::new(target.to_string_lossy());
333 (target_str, broken)
334 }
335 Err(_) => (CompactString::default(), true),
336 };
337
338 if symlink_broken {
339 warnings.push(ScanWarning::broken_symlink(&path, symlink_target.as_str()));
340 }
341
342 let entry_info = EntryInfo {
343 name: file_name,
344 path: path.clone(),
345 size: 0,
346 blocks: 0,
347 is_dir: false,
348 is_symlink: true,
349 symlink_target: Some(symlink_target),
350 symlink_broken,
351 executable: false,
352 timestamps: Timestamps::new(
353 metadata.modified().unwrap_or(std::time::UNIX_EPOCH),
354 metadata.accessed().ok(),
355 metadata.created().ok(),
356 ),
357 inode: None,
358 };
359
360 entries_by_parent
361 .entry(parent.to_path_buf())
362 .or_default()
363 .push(entry_info);
364 }
365 }
366 }
367
368 Ok(entries_by_parent)
369 }
370
371 fn build_tree(
373 &self,
374 root_path: &Path,
375 mut entries_by_parent: HashMap<PathBuf, Vec<EntryInfo>>,
376 node_id_counter: &AtomicU64,
377 _stats: &mut TreeStats,
378 ) -> FileNode {
379 self.build_node(root_path, &mut entries_by_parent, node_id_counter)
380 }
381
382 fn build_node(
384 &self,
385 path: &Path,
386 entries_by_parent: &mut HashMap<PathBuf, Vec<EntryInfo>>,
387 node_id_counter: &AtomicU64,
388 ) -> FileNode {
389 let id = NodeId::new(node_id_counter.fetch_add(1, Ordering::Relaxed));
390 let name = path
391 .file_name()
392 .map(|n| n.to_string_lossy().to_string())
393 .unwrap_or_else(|| path.to_string_lossy().to_string());
394
395 let metadata = std::fs::metadata(path).ok();
396 let timestamps = metadata
397 .as_ref()
398 .map(|m| {
399 Timestamps::new(
400 m.modified().unwrap_or(std::time::UNIX_EPOCH),
401 m.accessed().ok(),
402 m.created().ok(),
403 )
404 })
405 .unwrap_or_else(|| Timestamps::with_modified(std::time::UNIX_EPOCH));
406
407 let mut node = FileNode::new_directory(id, name, timestamps);
408
409 let children_entries = entries_by_parent.remove(path).unwrap_or_default();
411
412 let mut total_size: u64 = 0;
413 let mut file_count: u64 = 0;
414 let mut dir_count: u64 = 0;
415
416 for entry in children_entries {
417 if entry.is_dir {
418 let child_node = self.build_node(&entry.path, entries_by_parent, node_id_counter);
420 total_size += child_node.size;
421 file_count += child_node.file_count();
422 dir_count += child_node.dir_count() + 1;
423 node.children.push(child_node);
424 } else if entry.is_symlink {
425 let child_id = NodeId::new(node_id_counter.fetch_add(1, Ordering::Relaxed));
428 let target = entry.symlink_target.unwrap_or_default();
429 let broken = entry.symlink_broken;
430
431 let child_node = FileNode {
432 id: child_id,
433 name: entry.name,
434 kind: NodeKind::Symlink { target, broken },
435 size: 0,
436 blocks: 0,
437 timestamps: entry.timestamps,
438 inode: None,
439 content_hash: None,
440 git_status: None,
441 children: Vec::new(),
442 };
443 node.children.push(child_node);
444 } else {
445 let child_id = NodeId::new(node_id_counter.fetch_add(1, Ordering::Relaxed));
447 let mut child_node = FileNode::new_file(
448 child_id,
449 entry.name,
450 entry.size,
451 entry.blocks,
452 entry.timestamps,
453 entry.executable,
454 );
455 child_node.inode = entry.inode;
456
457 total_size += entry.size;
458 file_count += 1;
459 node.children.push(child_node);
460 }
461 }
462
463 node.size = total_size;
465 node.kind = NodeKind::Directory {
466 file_count,
467 dir_count,
468 };
469
470 node.children.sort_by(|a, b| b.size.cmp(&a.size));
472
473 node
474 }
475}
476
477impl Default for JwalkScanner {
478 fn default() -> Self {
479 Self::new()
480 }
481}
482
483pub fn quick_list(path: &Path, config: Option<&ScanConfig>) -> Result<FileTree, ScanError> {
493 use std::sync::atomic::{AtomicU64, Ordering};
494 use std::time::Instant;
495
496 let start = Instant::now();
497 let root_path = path.canonicalize().map_err(|e| ScanError::io(path, e))?;
498
499 if !root_path.is_dir() {
500 return Err(ScanError::NotADirectory {
501 path: root_path.clone(),
502 });
503 }
504
505 let owned_config;
507 let cfg: &ScanConfig = match config {
508 Some(c) => c,
509 None => {
510 owned_config = ScanConfig::new(&root_path);
511 &owned_config
512 }
513 };
514
515 let node_id_counter = AtomicU64::new(0);
516 let mut stats = TreeStats::new();
517 let mut warnings: Vec<ScanWarning> = Vec::new();
518
519 let root_metadata = std::fs::metadata(&root_path).map_err(|e| ScanError::io(&root_path, e))?;
521 let root_timestamps = Timestamps::new(
522 root_metadata.modified().unwrap_or(std::time::UNIX_EPOCH),
523 root_metadata.accessed().ok(),
524 root_metadata.created().ok(),
525 );
526
527 let root_name = root_path
528 .file_name()
529 .map(|n| n.to_string_lossy().to_string())
530 .unwrap_or_else(|| root_path.to_string_lossy().to_string());
531
532 let root_id = NodeId::new(node_id_counter.fetch_add(1, Ordering::Relaxed));
533 let mut root_node = FileNode::new_directory(root_id, root_name, root_timestamps);
534
535 let read_dir = match std::fs::read_dir(&root_path) {
537 Ok(rd) => rd,
538 Err(e) => return Err(ScanError::io(&root_path, e)),
539 };
540
541 let mut total_size: u64 = 0;
542 let mut file_count: u64 = 0;
543 let mut dir_count: u64 = 0;
544
545 for entry_result in read_dir {
546 let entry = match entry_result {
547 Ok(e) => e,
548 Err(e) => {
549 warnings.push(ScanWarning::new(
550 root_path.clone(),
551 WarningKind::ReadError,
552 e.to_string(),
553 ));
554 continue;
555 }
556 };
557
558 let entry_path = entry.path();
559 let entry_name = entry.file_name().to_string_lossy().to_string();
560
561 if !cfg.include_hidden && entry_name.starts_with('.') {
563 continue;
564 }
565
566 if cfg.should_ignore(&entry_name) {
568 continue;
569 }
570
571 let metadata = match entry.metadata() {
572 Ok(m) => m,
573 Err(e) => {
574 warnings.push(ScanWarning::new(
575 entry_path,
576 WarningKind::MetadataError,
577 e.to_string(),
578 ));
579 continue;
580 }
581 };
582
583 let timestamps = Timestamps::new(
584 metadata.modified().unwrap_or(std::time::UNIX_EPOCH),
585 metadata.accessed().ok(),
586 metadata.created().ok(),
587 );
588
589 let child_id = NodeId::new(node_id_counter.fetch_add(1, Ordering::Relaxed));
590
591 if metadata.is_dir() {
592 let child_node =
594 FileNode::new_directory(child_id, CompactString::new(&entry_name), timestamps);
595 root_node.children.push(child_node);
596 dir_count += 1;
597 stats.record_dir(1);
598 } else if metadata.is_file() {
599 let size = if cfg.apparent_size {
600 metadata.len()
601 } else {
602 disk_size(&metadata)
603 };
604 let blocks = get_blocks(&metadata);
605 let executable = is_executable(&metadata);
606
607 let mut child_node = FileNode::new_file(
608 child_id,
609 CompactString::new(&entry_name),
610 size,
611 blocks,
612 timestamps,
613 executable,
614 );
615
616 let inode = InodeInfo::new(get_ino(&metadata), get_dev(&metadata));
618 child_node.inode = Some(inode);
619
620 total_size += size;
621 file_count += 1;
622 root_node.children.push(child_node);
623 stats.record_file(&entry_path, size, timestamps.modified, 1);
624 } else if metadata.file_type().is_symlink() {
625 let (target, broken) = match std::fs::read_link(&entry_path) {
627 Ok(t) => {
628 let broken = !entry_path.exists();
629 (CompactString::new(t.to_string_lossy()), broken)
630 }
631 Err(_) => (CompactString::default(), true),
632 };
633
634 if broken {
635 warnings.push(ScanWarning::broken_symlink(&entry_path, target.as_str()));
636 }
637
638 let child_node = FileNode {
639 id: child_id,
640 name: CompactString::new(&entry_name),
641 kind: NodeKind::Symlink { target, broken },
642 size: 0,
643 blocks: 0,
644 timestamps,
645 inode: None,
646 content_hash: None,
647 git_status: None,
648 children: Vec::new(),
649 };
650 root_node.children.push(child_node);
651 stats.record_symlink();
652 }
653 }
654
655 root_node.size = total_size;
657 root_node.kind = NodeKind::Directory {
658 file_count,
659 dir_count,
660 };
661
662 root_node.children.sort_by(|a, b| a.name.cmp(&b.name));
664
665 stats.record_dir(0);
666
667 let scan_config = cfg.clone();
668 let scan_duration = start.elapsed();
669
670 Ok(FileTree::new(
671 root_node,
672 root_path,
673 scan_config,
674 stats,
675 scan_duration,
676 warnings,
677 ))
678}
679
680struct EntryInfo {
682 name: CompactString,
683 path: PathBuf,
684 size: u64,
685 blocks: u64,
686 is_dir: bool,
687 is_symlink: bool,
688 symlink_target: Option<CompactString>,
690 symlink_broken: bool,
692 executable: bool,
693 timestamps: Timestamps,
694 inode: Option<InodeInfo>,
695}
696
697#[cfg(unix)]
699fn is_executable(metadata: &std::fs::Metadata) -> bool {
700 use std::os::unix::fs::PermissionsExt;
701 metadata.permissions().mode() & 0o111 != 0
702}
703
704#[cfg(not(unix))]
705fn is_executable(_metadata: &std::fs::Metadata) -> bool {
706 false
707}
708
709#[cfg(unix)]
713fn get_dev(metadata: &std::fs::Metadata) -> u64 {
714 metadata.dev()
715}
716
717#[cfg(windows)]
718fn get_dev(_metadata: &std::fs::Metadata) -> u64 {
719 0 }
721
722#[cfg(not(any(unix, windows)))]
723fn get_dev(_metadata: &std::fs::Metadata) -> u64 {
724 0
725}
726
727#[cfg(unix)]
729fn get_ino(metadata: &std::fs::Metadata) -> u64 {
730 metadata.ino()
731}
732
733#[cfg(windows)]
734fn get_ino(_metadata: &std::fs::Metadata) -> u64 {
735 0
739}
740
741#[cfg(not(any(unix, windows)))]
742fn get_ino(_metadata: &std::fs::Metadata) -> u64 {
743 0
744}
745
746#[cfg(unix)]
748fn get_nlink(metadata: &std::fs::Metadata) -> u64 {
749 metadata.nlink()
750}
751
752#[cfg(windows)]
753fn get_nlink(_metadata: &std::fs::Metadata) -> u64 {
754 1
757}
758
759#[cfg(not(any(unix, windows)))]
760fn get_nlink(_metadata: &std::fs::Metadata) -> u64 {
761 1 }
763
764#[cfg(unix)]
766fn get_blocks(metadata: &std::fs::Metadata) -> u64 {
767 metadata.blocks()
768}
769
770#[cfg(not(unix))]
771fn get_blocks(metadata: &std::fs::Metadata) -> u64 {
772 (metadata.len() + 511) / 512
774}
775
776#[cfg(unix)]
782fn disk_size(metadata: &std::fs::Metadata) -> u64 {
783 get_blocks(metadata) * 512
784}
785
786#[cfg(not(unix))]
787fn disk_size(metadata: &std::fs::Metadata) -> u64 {
788 metadata.len()
789}
790
791#[cfg(test)]
792mod tests {
793 use super::*;
794 use std::fs;
795 use tempfile::TempDir;
796
797 fn create_test_tree() -> TempDir {
798 let temp = TempDir::new().unwrap();
799 let root = temp.path();
800
801 fs::create_dir(root.join("dir1")).unwrap();
803 fs::create_dir(root.join("dir2")).unwrap();
804 fs::create_dir(root.join("dir1/subdir")).unwrap();
805
806 fs::write(root.join("file1.txt"), "hello").unwrap();
808 fs::write(root.join("dir1/file2.txt"), "world world world").unwrap();
809 fs::write(root.join("dir1/subdir/file3.txt"), "test").unwrap();
810 fs::write(root.join("dir2/file4.txt"), "another file here").unwrap();
811
812 temp
813 }
814
815 #[test]
816 fn test_basic_scan() {
817 let temp = create_test_tree();
818 let config = ScanConfig::new(temp.path());
819
820 let scanner = JwalkScanner::new();
821 let tree = scanner.scan(&config).unwrap();
822
823 assert_eq!(tree.stats.total_files, 4);
824 assert!(tree.stats.total_dirs >= 3);
826 assert!(tree.root.size > 0);
827 }
828
829 #[test]
830 fn test_children_sorted_by_size() {
831 let temp = create_test_tree();
832 let config = ScanConfig::new(temp.path());
833
834 let scanner = JwalkScanner::new();
835 let tree = scanner.scan(&config).unwrap();
836
837 for i in 0..tree.root.children.len().saturating_sub(1) {
839 assert!(tree.root.children[i].size >= tree.root.children[i + 1].size);
840 }
841 }
842
843 #[test]
844 fn test_ignore_patterns() {
845 let temp = create_test_tree();
846 let config = ScanConfig::builder()
847 .root(temp.path())
848 .ignore_patterns(vec!["dir2".to_string()])
849 .build()
850 .unwrap();
851
852 let scanner = JwalkScanner::new();
853 let tree = scanner.scan(&config).unwrap();
854
855 assert!(!tree.root.children.iter().any(|c| c.name.as_str() == "dir2"));
857 }
858
859 #[test]
860 fn test_quick_list_respects_hidden() {
861 let temp = TempDir::new().unwrap();
862 let root = temp.path();
863 fs::write(root.join(".hidden"), "secret").unwrap();
864 fs::write(root.join("visible"), "public").unwrap();
865
866 let config = ScanConfig::builder()
868 .root(root)
869 .include_hidden(false)
870 .build()
871 .unwrap();
872
873 let tree = quick_list(root, Some(&config)).unwrap();
874 let names: Vec<_> = tree.root.children.iter().map(|c| c.name.as_str()).collect();
875 assert!(names.contains(&"visible"));
876 assert!(!names.contains(&".hidden"));
877 }
878
879 #[test]
880 fn test_quick_list_includes_hidden_when_configured() {
881 let temp = TempDir::new().unwrap();
882 let root = temp.path();
883 fs::write(root.join(".hidden"), "secret").unwrap();
884 fs::write(root.join("visible"), "public").unwrap();
885
886 let config = ScanConfig::builder()
887 .root(root)
888 .include_hidden(true)
889 .build()
890 .unwrap();
891
892 let tree = quick_list(root, Some(&config)).unwrap();
893 let names: Vec<_> = tree.root.children.iter().map(|c| c.name.as_str()).collect();
894 assert!(names.contains(&"visible"));
895 assert!(names.contains(&".hidden"));
896 }
897}