Skip to main content

provenant/scanner/
collect.rs

1use glob::Pattern;
2use std::fs;
3use std::path::{Path, PathBuf};
4
5use crate::utils::file::is_path_excluded;
6
7pub struct CollectedPaths {
8    pub files: Vec<(PathBuf, fs::Metadata)>,
9    pub directories: Vec<(PathBuf, fs::Metadata)>,
10    pub excluded_count: usize,
11    pub total_file_bytes: u64,
12    pub collection_errors: Vec<(PathBuf, String)>,
13}
14
15impl CollectedPaths {
16    pub fn file_count(&self) -> usize {
17        self.files.len()
18    }
19
20    pub fn directory_count(&self) -> usize {
21        self.directories.len()
22    }
23}
24
25pub fn collect_paths<P: AsRef<Path>>(
26    root: P,
27    max_depth: usize,
28    exclude_patterns: &[Pattern],
29) -> CollectedPaths {
30    let depth_limit = depth_limit_from_cli(max_depth);
31
32    if is_path_excluded(root.as_ref(), exclude_patterns) {
33        return CollectedPaths {
34            files: Vec::new(),
35            directories: Vec::new(),
36            excluded_count: 1,
37            total_file_bytes: 0,
38            collection_errors: Vec::new(),
39        };
40    }
41
42    collect_all_paths(root.as_ref(), depth_limit, exclude_patterns)
43}
44
45fn collect_all_paths(
46    root: &Path,
47    depth_limit: Option<usize>,
48    exclude_patterns: &[Pattern],
49) -> CollectedPaths {
50    let mut files = Vec::new();
51    let mut directories = Vec::new();
52    let mut excluded_count = 0;
53    let mut total_file_bytes = 0_u64;
54    let mut collection_errors = Vec::new();
55
56    let mut pending_dirs: Vec<(PathBuf, Option<usize>)> = vec![(root.to_path_buf(), depth_limit)];
57
58    while let Some((dir_path, current_depth)) = pending_dirs.pop() {
59        let entries: Vec<_> = match fs::read_dir(&dir_path) {
60            Ok(entries) => entries.filter_map(Result::ok).collect(),
61            Err(e) => {
62                collection_errors.push((dir_path.clone(), e.to_string()));
63                continue;
64            }
65        };
66
67        for entry in entries {
68            let path = entry.path();
69
70            if is_path_excluded(&path, exclude_patterns) {
71                excluded_count += 1;
72                continue;
73            }
74
75            match entry.metadata() {
76                Ok(metadata) if metadata.is_file() => {
77                    total_file_bytes += metadata.len();
78                    files.push((path, metadata));
79                }
80                Ok(metadata) if metadata.is_dir() => {
81                    directories.push((path.clone(), metadata));
82                    let should_recurse = current_depth.is_none_or(|d| d > 0);
83                    if should_recurse {
84                        let next_depth = current_depth.map(|d| d - 1);
85                        pending_dirs.push((path, next_depth));
86                    }
87                }
88                _ => continue,
89            }
90        }
91    }
92
93    CollectedPaths {
94        files,
95        directories,
96        excluded_count,
97        total_file_bytes,
98        collection_errors,
99    }
100}
101
102fn depth_limit_from_cli(max_depth: usize) -> Option<usize> {
103    if max_depth == 0 {
104        None
105    } else {
106        Some(max_depth)
107    }
108}