Skip to main content

provenant/scanner/
collect.rs

1use glob::Pattern;
2use std::fs;
3use std::path::{Path, PathBuf};
4
5use crate::utils::file::is_path_excluded;
6
7pub struct CollectedPaths {
8    pub files: Vec<(PathBuf, fs::Metadata)>,
9    pub directories: Vec<(PathBuf, fs::Metadata)>,
10    pub excluded_count: usize,
11    pub total_file_bytes: u64,
12    pub collection_errors: Vec<(PathBuf, String)>,
13}
14
15impl CollectedPaths {
16    pub fn file_count(&self) -> usize {
17        self.files.len()
18    }
19
20    pub fn directory_count(&self) -> usize {
21        self.directories.len()
22    }
23}
24
25pub fn collect_paths<P: AsRef<Path>>(
26    root: P,
27    max_depth: usize,
28    exclude_patterns: &[Pattern],
29) -> CollectedPaths {
30    let depth_limit = depth_limit_from_cli(max_depth);
31    let root = root.as_ref();
32
33    if is_path_excluded(root, exclude_patterns) {
34        return CollectedPaths {
35            files: Vec::new(),
36            directories: Vec::new(),
37            excluded_count: 1,
38            total_file_bytes: 0,
39            collection_errors: Vec::new(),
40        };
41    }
42
43    let metadata = match fs::metadata(root) {
44        Ok(metadata) => metadata,
45        Err(error) => {
46            return CollectedPaths {
47                files: Vec::new(),
48                directories: Vec::new(),
49                excluded_count: 0,
50                total_file_bytes: 0,
51                collection_errors: vec![(root.to_path_buf(), error.to_string())],
52            };
53        }
54    };
55
56    if metadata.is_file() {
57        return CollectedPaths {
58            total_file_bytes: metadata.len(),
59            files: vec![(root.to_path_buf(), metadata)],
60            directories: Vec::new(),
61            excluded_count: 0,
62            collection_errors: Vec::new(),
63        };
64    }
65
66    collect_all_paths(root, &metadata, depth_limit, exclude_patterns)
67}
68
69fn collect_all_paths(
70    root: &Path,
71    root_metadata: &fs::Metadata,
72    depth_limit: Option<usize>,
73    exclude_patterns: &[Pattern],
74) -> CollectedPaths {
75    let mut files = Vec::new();
76    let mut directories = vec![(root.to_path_buf(), root_metadata.clone())];
77    let mut excluded_count = 0;
78    let mut total_file_bytes = 0_u64;
79    let mut collection_errors = Vec::new();
80
81    let mut pending_dirs: Vec<(PathBuf, Option<usize>)> = vec![(root.to_path_buf(), depth_limit)];
82
83    while let Some((dir_path, current_depth)) = pending_dirs.pop() {
84        let entries: Vec<_> = match fs::read_dir(&dir_path) {
85            Ok(entries) => entries.filter_map(Result::ok).collect(),
86            Err(e) => {
87                collection_errors.push((dir_path.clone(), e.to_string()));
88                continue;
89            }
90        };
91
92        for entry in entries {
93            let path = entry.path();
94
95            if is_path_excluded(&path, exclude_patterns) {
96                excluded_count += 1;
97                continue;
98            }
99
100            match entry.metadata() {
101                Ok(metadata) if metadata.is_file() => {
102                    total_file_bytes += metadata.len();
103                    files.push((path, metadata));
104                }
105                Ok(metadata) if metadata.is_dir() => {
106                    directories.push((path.clone(), metadata));
107                    let should_recurse = current_depth.is_none_or(|d| d > 0);
108                    if should_recurse {
109                        let next_depth = current_depth.map(|d| d - 1);
110                        pending_dirs.push((path, next_depth));
111                    }
112                }
113                _ => continue,
114            }
115        }
116    }
117
118    CollectedPaths {
119        files,
120        directories,
121        excluded_count,
122        total_file_bytes,
123        collection_errors,
124    }
125}
126
127fn depth_limit_from_cli(max_depth: usize) -> Option<usize> {
128    if max_depth == 0 {
129        None
130    } else {
131        Some(max_depth)
132    }
133}