Skip to main content

provenant/scanner/
collect.rs

1// SPDX-FileCopyrightText: Provenant contributors
2// SPDX-License-Identifier: Apache-2.0
3
4use glob::Pattern;
5use std::fs;
6use std::path::{Path, PathBuf};
7
8use crate::utils::file::is_path_excluded;
9
10pub struct CollectedPaths {
11    pub files: Vec<(PathBuf, fs::Metadata)>,
12    pub directories: Vec<(PathBuf, fs::Metadata)>,
13    pub excluded_count: usize,
14    pub total_file_bytes: u64,
15    pub collection_errors: Vec<(PathBuf, String)>,
16}
17
18impl CollectedPaths {
19    pub fn file_count(&self) -> usize {
20        self.files.len()
21    }
22
23    pub fn directory_count(&self) -> usize {
24        self.directories.len()
25    }
26}
27
28pub fn collect_paths<P: AsRef<Path>>(
29    root: P,
30    max_depth: usize,
31    exclude_patterns: &[Pattern],
32) -> CollectedPaths {
33    let depth_limit = depth_limit_from_cli(max_depth);
34    let root = root.as_ref();
35
36    if is_path_excluded(root, exclude_patterns) {
37        return CollectedPaths {
38            files: Vec::new(),
39            directories: Vec::new(),
40            excluded_count: 1,
41            total_file_bytes: 0,
42            collection_errors: Vec::new(),
43        };
44    }
45
46    let metadata = match fs::metadata(root) {
47        Ok(metadata) => metadata,
48        Err(error) => {
49            return CollectedPaths {
50                files: Vec::new(),
51                directories: Vec::new(),
52                excluded_count: 0,
53                total_file_bytes: 0,
54                collection_errors: vec![(root.to_path_buf(), error.to_string())],
55            };
56        }
57    };
58
59    if metadata.is_file() {
60        return CollectedPaths {
61            total_file_bytes: metadata.len(),
62            files: vec![(root.to_path_buf(), metadata)],
63            directories: Vec::new(),
64            excluded_count: 0,
65            collection_errors: Vec::new(),
66        };
67    }
68
69    collect_all_paths(root, &metadata, depth_limit, exclude_patterns)
70}
71
72fn collect_all_paths(
73    root: &Path,
74    root_metadata: &fs::Metadata,
75    depth_limit: Option<usize>,
76    exclude_patterns: &[Pattern],
77) -> CollectedPaths {
78    let mut files = Vec::new();
79    let mut directories = vec![(root.to_path_buf(), root_metadata.clone())];
80    let mut excluded_count = 0;
81    let mut total_file_bytes = 0_u64;
82    let mut collection_errors = Vec::new();
83
84    let mut pending_dirs: Vec<(PathBuf, Option<usize>)> = vec![(root.to_path_buf(), depth_limit)];
85
86    while let Some((dir_path, current_depth)) = pending_dirs.pop() {
87        let entries: Vec<_> = match fs::read_dir(&dir_path) {
88            Ok(entries) => entries.filter_map(Result::ok).collect(),
89            Err(e) => {
90                collection_errors.push((dir_path.clone(), e.to_string()));
91                continue;
92            }
93        };
94
95        for entry in entries {
96            let path = entry.path();
97
98            if is_path_excluded(&path, exclude_patterns) {
99                excluded_count += 1;
100                continue;
101            }
102
103            match entry.metadata() {
104                Ok(metadata) if metadata.is_file() => {
105                    total_file_bytes += metadata.len();
106                    files.push((path, metadata));
107                }
108                Ok(metadata) if metadata.is_dir() => {
109                    directories.push((path.clone(), metadata));
110                    let should_recurse = current_depth.is_none_or(|d| d > 0);
111                    if should_recurse {
112                        let next_depth = current_depth.map(|d| d - 1);
113                        pending_dirs.push((path, next_depth));
114                    }
115                }
116                _ => continue,
117            }
118        }
119    }
120
121    CollectedPaths {
122        files,
123        directories,
124        excluded_count,
125        total_file_bytes,
126        collection_errors,
127    }
128}
129
130fn depth_limit_from_cli(max_depth: usize) -> Option<usize> {
131    if max_depth == 0 {
132        None
133    } else {
134        Some(max_depth)
135    }
136}