1use crate::crossdev;
2use crate::traverse::{EntryData, Tree, TreeIndex};
3use byte_unit::{ByteUnit, n_gb_bytes, n_gib_bytes, n_mb_bytes, n_mib_bytes};
4use std::collections::BTreeSet;
5use std::path::PathBuf;
6use std::sync::Arc;
7use std::sync::atomic::{AtomicBool, Ordering};
8use std::time::Duration;
9use std::{fmt, path::Path};
10
11pub fn get_entry_or_panic(tree: &Tree, node_idx: TreeIndex) -> &EntryData {
12 tree.node_weight(node_idx)
13 .expect("node should always be retrievable with valid index")
14}
15
16pub(crate) fn get_size_or_panic(tree: &Tree, node_idx: TreeIndex) -> u128 {
17 get_entry_or_panic(tree, node_idx).size
18}
19
20#[derive(Clone, Copy)]
22pub enum ByteFormat {
23 Metric,
25 Binary,
27 Bytes,
29 GB,
31 GiB,
33 MB,
35 MiB,
37}
38
39impl ByteFormat {
40 pub fn width(self) -> usize {
41 use ByteFormat::*;
42 match self {
43 Metric => 10,
44 Binary => 11,
45 Bytes => 12,
46 MiB | MB => 12,
47 _ => 10,
48 }
49 }
50 pub fn total_width(self) -> usize {
51 use ByteFormat::*;
52 const THE_SPACE_BETWEEN_UNIT_AND_NUMBER: usize = 1;
53
54 self.width()
55 + match self {
56 Binary | MiB | GiB => 3,
57 Metric | MB | GB => 2,
58 Bytes => 1,
59 }
60 + THE_SPACE_BETWEEN_UNIT_AND_NUMBER
61 }
62 pub fn display(self, bytes: u128) -> ByteFormatDisplay {
63 ByteFormatDisplay {
64 format: self,
65 bytes,
66 }
67 }
68}
69
70pub struct ByteFormatDisplay {
71 format: ByteFormat,
72 bytes: u128,
73}
74
75impl fmt::Display for ByteFormatDisplay {
76 fn fmt(&self, f: &mut fmt::Formatter<'_>) -> Result<(), fmt::Error> {
77 use ByteFormat::*;
78 use byte_unit::Byte;
79
80 let format = match self.format {
81 Bytes => return write!(f, "{} b", self.bytes),
82 Binary => (true, None),
83 Metric => (false, None),
84 GB => (false, Some((n_gb_bytes!(1), ByteUnit::GB))),
85 GiB => (false, Some((n_gib_bytes!(1), ByteUnit::GiB))),
86 MB => (false, Some((n_mb_bytes!(1), ByteUnit::MB))),
87 MiB => (false, Some((n_mib_bytes!(1), ByteUnit::MiB))),
88 };
89
90 let b = match format {
91 (_, Some((divisor, unit))) => Byte::from_unit(self.bytes as f64 / divisor as f64, unit)
92 .expect("byte count > 0")
93 .get_adjusted_unit(unit),
94 (binary, None) => Byte::from_bytes(self.bytes).get_appropriate_unit(binary),
95 }
96 .format(2);
97 let mut splits = b.split(' ');
98 match (splits.next(), splits.next()) {
99 (Some(bytes), Some(unit)) => write!(
100 f,
101 "{} {:>unit_width$}",
102 bytes,
103 unit,
104 unit_width = match self.format {
105 Binary => 3,
106 Metric => 2,
107 _ => 2,
108 }
109 ),
110 _ => f.write_str(&b),
111 }
112 }
113}
114
115#[derive(Clone)]
117pub enum TraversalSorting {
118 None,
119 AlphabeticalByFileName,
120}
121
122#[derive(Debug)]
124pub struct Throttle {
125 trigger: Arc<AtomicBool>,
126}
127
128impl Throttle {
129 pub fn new(duration: Duration, initial_sleep: Option<Duration>) -> Self {
130 let instance = Self {
131 trigger: Default::default(),
132 };
133
134 let trigger = Arc::downgrade(&instance.trigger);
135 std::thread::spawn(move || {
136 if let Some(duration) = initial_sleep {
137 std::thread::sleep(duration)
138 }
139 while let Some(t) = trigger.upgrade() {
140 t.store(true, Ordering::Relaxed);
141 std::thread::sleep(duration);
142 }
143 });
144
145 instance
146 }
147
148 pub fn throttled<F>(&self, f: F)
149 where
150 F: FnOnce(),
151 {
152 if self.can_update() {
153 f()
154 }
155 }
156
157 pub fn can_update(&self) -> bool {
159 self.trigger.swap(false, Ordering::Relaxed)
160 }
161}
162
163#[derive(Clone)]
165pub struct WalkOptions {
166 pub threads: usize,
169 pub count_hard_links: bool,
170 pub apparent_size: bool,
171 pub sorting: TraversalSorting,
172 pub cross_filesystems: bool,
173 pub ignore_dirs: BTreeSet<PathBuf>,
174}
175
176type WalkDir = jwalk::WalkDirGeneric<((), Option<Result<std::fs::Metadata, jwalk::Error>>)>;
177
178impl WalkOptions {
179 pub fn iter_from_path(&self, root: &Path, root_device_id: u64, skip_root: bool) -> WalkDir {
180 let ignore_dirs = self.ignore_dirs.clone();
181 let cwd = std::env::current_dir().unwrap_or_else(|_| root.to_owned());
182 WalkDir::new(root)
183 .follow_links(false)
184 .min_depth(if skip_root { 1 } else { 0 })
185 .sort(match self.sorting {
186 TraversalSorting::None => false,
187 TraversalSorting::AlphabeticalByFileName => true,
188 })
189 .skip_hidden(false)
190 .process_read_dir({
191 let cross_filesystems = self.cross_filesystems;
192 move |_, _, _, dir_entry_results| {
193 dir_entry_results.iter_mut().for_each(|dir_entry_result| {
194 if let Ok(dir_entry) = dir_entry_result {
195 let metadata = dir_entry.metadata();
196
197 if dir_entry.file_type.is_dir() {
198 let ok_for_fs = cross_filesystems
199 || metadata
200 .as_ref()
201 .map(|m| crossdev::is_same_device(root_device_id, m))
202 .unwrap_or(true);
203 if !ok_for_fs
204 || ignore_directory(&dir_entry.path(), &ignore_dirs, &cwd)
205 {
206 dir_entry.read_children_path = None;
207 }
208 }
209
210 dir_entry.client_state = Some(metadata);
211 }
212 })
213 }
214 })
215 .parallelism(match self.threads {
216 0 => jwalk::Parallelism::RayonDefaultPool {
217 busy_timeout: std::time::Duration::from_secs(1),
218 },
219 1 => jwalk::Parallelism::Serial,
220 _ => jwalk::Parallelism::RayonExistingPool {
221 pool: jwalk::rayon::ThreadPoolBuilder::new()
222 .stack_size(128 * 1024)
223 .num_threads(self.threads)
224 .thread_name(|idx| format!("dua-fs-walk-{idx}"))
225 .build()
226 .expect("fields we set cannot fail")
227 .into(),
228 busy_timeout: None,
229 },
230 })
231 }
232}
233
234#[derive(Default)]
236pub struct WalkResult {
237 pub num_errors: u64,
239}
240
241impl WalkResult {
242 pub fn to_exit_code(&self) -> i32 {
243 i32::from(self.num_errors > 0)
244 }
245}
246
247pub fn canonicalize_ignore_dirs(ignore_dirs: &[PathBuf]) -> BTreeSet<PathBuf> {
248 let dirs = ignore_dirs
249 .iter()
250 .map(gix_path::realpath)
251 .filter_map(Result::ok)
252 .collect();
253 log::info!("Ignoring canonicalized {dirs:?}");
254 dirs
255}
256
257fn ignore_directory(path: &Path, ignore_dirs: &BTreeSet<PathBuf>, cwd: &Path) -> bool {
258 if ignore_dirs.is_empty() {
259 return false;
260 }
261 let path = gix_path::realpath_opts(path, cwd, 32);
262 path.map(|path| {
263 let ignored = ignore_dirs.contains(&path);
264 if ignored {
265 log::debug!("Ignored {path:?}");
266 }
267 ignored
268 })
269 .unwrap_or(false)
270}
271
272#[cfg(test)]
273mod tests {
274 use super::*;
275
276 #[test]
277 fn test_ignore_directories() {
278 let cwd = std::env::current_dir().unwrap();
279 #[cfg(unix)]
280 let mut parameters = vec![
281 ("/usr", vec!["/usr"], true),
282 ("/usr/local", vec!["/usr"], false),
283 ("/smth", vec!["/usr"], false),
284 ("/usr/local/..", vec!["/usr/local/.."], true),
285 ("/usr", vec!["/usr/local/.."], true),
286 ("/usr/local/share/../..", vec!["/usr"], true),
287 ];
288
289 #[cfg(windows)]
290 let mut parameters = vec![
291 ("C:\\Windows", vec!["C:\\Windows"], true),
292 ("C:\\Windows\\System", vec!["C:\\Windows"], false),
293 ("C:\\Smth", vec!["C:\\Windows"], false),
294 (
295 "C:\\Windows\\System\\..",
296 vec!["C:\\Windows\\System\\.."],
297 true,
298 ),
299 ("C:\\Windows", vec!["C:\\Windows\\System\\.."], true),
300 (
301 "C:\\Windows\\System\\Speech\\..\\..",
302 vec!["C:\\Windows"],
303 true,
304 ),
305 ];
306
307 parameters.extend([
308 ("src", vec!["src"], true),
309 ("src/interactive", vec!["src"], false),
310 ("src/interactive/..", vec!["src"], true),
311 ]);
312
313 for (path, ignore_dirs, expected_result) in parameters {
314 let ignore_dirs = canonicalize_ignore_dirs(
315 &ignore_dirs.into_iter().map(Into::into).collect::<Vec<_>>(),
316 );
317 assert_eq!(
318 ignore_directory(path.as_ref(), &ignore_dirs, &cwd),
319 expected_result,
320 "result='{expected_result}' for path='{path}' and ignore_dir='{ignore_dirs:?}' "
321 );
322 }
323 }
324}