1use crate::crossdev;
2use crate::traverse::{EntryData, Tree, TreeIndex};
3use byte_unit::{ByteUnit, n_gb_bytes, n_gib_bytes, n_mb_bytes, n_mib_bytes};
4use std::collections::BTreeSet;
5use std::path::PathBuf;
6use std::sync::Arc;
7use std::sync::atomic::{AtomicBool, Ordering};
8use std::time::Duration;
9use std::{fmt, path::Path};
10
11pub(crate) fn get_entry_or_panic(tree: &Tree, node_idx: TreeIndex) -> &EntryData {
13 tree.node_weight(node_idx)
14 .expect("node should always be retrievable with valid index")
15}
16
17pub(crate) fn get_size_or_panic(tree: &Tree, node_idx: TreeIndex) -> u128 {
18 get_entry_or_panic(tree, node_idx).size
19}
20
21#[derive(Clone, Copy)]
23pub enum ByteFormat {
24 Metric,
26 Binary,
28 Bytes,
30 GB,
32 GiB,
34 MB,
36 MiB,
38}
39
40impl ByteFormat {
41 pub fn width(self) -> usize {
43 use ByteFormat::*;
44 match self {
45 Metric => 10,
46 Binary => 11,
47 Bytes => 12,
48 MiB | MB => 12,
49 _ => 10,
50 }
51 }
52 pub fn total_width(self) -> usize {
54 use ByteFormat::*;
55 const THE_SPACE_BETWEEN_UNIT_AND_NUMBER: usize = 1;
56
57 self.width()
58 + match self {
59 Binary | MiB | GiB => 3,
60 Metric | MB | GB => 2,
61 Bytes => 1,
62 }
63 + THE_SPACE_BETWEEN_UNIT_AND_NUMBER
64 }
65 pub fn display(self, bytes: u128) -> impl fmt::Display {
67 ByteFormatDisplay {
68 format: self,
69 bytes,
70 }
71 }
72}
73
74struct ByteFormatDisplay {
76 format: ByteFormat,
77 bytes: u128,
78}
79
80impl fmt::Display for ByteFormatDisplay {
81 fn fmt(&self, f: &mut fmt::Formatter<'_>) -> Result<(), fmt::Error> {
82 use ByteFormat::*;
83 use byte_unit::Byte;
84
85 let format = match self.format {
86 Bytes => return write!(f, "{} b", self.bytes),
87 Binary => (true, None),
88 Metric => (false, None),
89 GB => (false, Some((n_gb_bytes!(1), ByteUnit::GB))),
90 GiB => (false, Some((n_gib_bytes!(1), ByteUnit::GiB))),
91 MB => (false, Some((n_mb_bytes!(1), ByteUnit::MB))),
92 MiB => (false, Some((n_mib_bytes!(1), ByteUnit::MiB))),
93 };
94
95 let b = match format {
96 (_, Some((divisor, unit))) => Byte::from_unit(self.bytes as f64 / divisor as f64, unit)
97 .expect("byte count > 0")
98 .get_adjusted_unit(unit),
99 (binary, None) => Byte::from_bytes(self.bytes).get_appropriate_unit(binary),
100 }
101 .format(2);
102 let mut splits = b.split(' ');
103 match (splits.next(), splits.next()) {
104 (Some(bytes), Some(unit)) => write!(
105 f,
106 "{} {:>unit_width$}",
107 bytes,
108 unit,
109 unit_width = match self.format {
110 Binary => 3,
111 Metric => 2,
112 _ => 2,
113 }
114 ),
115 _ => f.write_str(&b),
116 }
117 }
118}
119
120#[derive(Clone)]
122pub enum TraversalSorting {
123 None,
125 AlphabeticalByFileName,
127}
128
129#[derive(Debug)]
131pub(crate) struct Throttle {
132 trigger: Arc<AtomicBool>,
133}
134
135impl Throttle {
136 pub(crate) fn new(duration: Duration, initial_sleep: Option<Duration>) -> Self {
140 let instance = Self {
141 trigger: Default::default(),
142 };
143
144 let trigger = Arc::downgrade(&instance.trigger);
145 std::thread::spawn(move || {
146 if let Some(duration) = initial_sleep {
147 std::thread::sleep(duration)
148 }
149 while let Some(t) = trigger.upgrade() {
150 t.store(true, Ordering::Relaxed);
151 std::thread::sleep(duration);
152 }
153 });
154
155 instance
156 }
157
158 pub(crate) fn throttled<F>(&self, f: F)
160 where
161 F: FnOnce(),
162 {
163 if self.can_update() {
164 f()
165 }
166 }
167
168 pub(crate) fn can_update(&self) -> bool {
170 self.trigger.swap(false, Ordering::Relaxed)
171 }
172}
173
174#[derive(Clone)]
176pub struct WalkOptions {
177 pub threads: usize,
180 pub count_hard_links: bool,
182 pub apparent_size: bool,
184 pub sorting: TraversalSorting,
186 pub cross_filesystems: bool,
188 pub ignore_dirs: BTreeSet<PathBuf>,
190}
191
192type WalkDir = jwalk::WalkDirGeneric<((), Option<Result<std::fs::Metadata, jwalk::Error>>)>;
193
194impl WalkOptions {
195 pub(crate) fn iter_from_path(
200 &self,
201 root: &Path,
202 root_device_id: u64,
203 skip_root: bool,
204 ) -> WalkDir {
205 let ignore_dirs = self.ignore_dirs.clone();
206 let cwd = std::env::current_dir().unwrap_or_else(|_| root.to_owned());
207 WalkDir::new(root)
208 .follow_links(false)
209 .min_depth(if skip_root { 1 } else { 0 })
210 .sort(match self.sorting {
211 TraversalSorting::None => false,
212 TraversalSorting::AlphabeticalByFileName => true,
213 })
214 .skip_hidden(false)
215 .process_read_dir({
216 let cross_filesystems = self.cross_filesystems;
217 move |_, _, _, dir_entry_results| {
218 dir_entry_results.iter_mut().for_each(|dir_entry_result| {
219 if let Ok(dir_entry) = dir_entry_result {
220 let metadata = dir_entry.metadata();
221
222 if dir_entry.file_type.is_dir() {
223 let ok_for_fs = cross_filesystems
224 || metadata
225 .as_ref()
226 .map(|m| crossdev::is_same_device(root_device_id, m))
227 .unwrap_or(true);
228 if !ok_for_fs
229 || ignore_directory(&dir_entry.path(), &ignore_dirs, &cwd)
230 {
231 dir_entry.read_children_path = None;
232 }
233 }
234
235 dir_entry.client_state = Some(metadata);
236 }
237 })
238 }
239 })
240 .parallelism(match self.threads {
241 0 => jwalk::Parallelism::RayonDefaultPool {
242 busy_timeout: std::time::Duration::from_secs(1),
243 },
244 1 => jwalk::Parallelism::Serial,
245 _ => jwalk::Parallelism::RayonExistingPool {
246 pool: jwalk::rayon::ThreadPoolBuilder::new()
247 .stack_size(128 * 1024)
248 .num_threads(self.threads)
249 .thread_name(|idx| format!("dua-fs-walk-{idx}"))
250 .build()
251 .expect("fields we set cannot fail")
252 .into(),
253 busy_timeout: None,
254 },
255 })
256 }
257}
258
259#[derive(Default)]
261pub struct WalkResult {
262 pub num_errors: u64,
264}
265
266impl WalkResult {
267 pub fn to_exit_code(&self) -> i32 {
271 i32::from(self.num_errors > 0)
272 }
273}
274
275pub fn canonicalize_ignore_dirs(ignore_dirs: &[PathBuf]) -> BTreeSet<PathBuf> {
279 let dirs = ignore_dirs
280 .iter()
281 .map(gix_path::realpath)
282 .filter_map(Result::ok)
283 .collect();
284 log::info!("Ignoring canonicalized {dirs:?}");
285 dirs
286}
287
288fn ignore_directory(path: &Path, ignore_dirs: &BTreeSet<PathBuf>, cwd: &Path) -> bool {
289 if ignore_dirs.is_empty() {
290 return false;
291 }
292 let path = gix_path::realpath_opts(path, cwd, 32);
293 path.map(|path| {
294 let ignored = ignore_dirs.contains(&path);
295 if ignored {
296 log::debug!("Ignored {path:?}");
297 }
298 ignored
299 })
300 .unwrap_or(false)
301}
302
303#[cfg(test)]
304mod tests {
305 use super::*;
306
307 #[test]
308 fn test_ignore_directories() {
309 let cwd = std::env::current_dir().unwrap();
310 #[cfg(unix)]
311 let mut parameters = vec![
312 ("/usr", vec!["/usr"], true),
313 ("/usr/local", vec!["/usr"], false),
314 ("/smth", vec!["/usr"], false),
315 ("/usr/local/..", vec!["/usr/local/.."], true),
316 ("/usr", vec!["/usr/local/.."], true),
317 ("/usr/local/share/../..", vec!["/usr"], true),
318 ];
319
320 #[cfg(windows)]
321 let mut parameters = vec![
322 ("C:\\Windows", vec!["C:\\Windows"], true),
323 ("C:\\Windows\\System", vec!["C:\\Windows"], false),
324 ("C:\\Smth", vec!["C:\\Windows"], false),
325 (
326 "C:\\Windows\\System\\..",
327 vec!["C:\\Windows\\System\\.."],
328 true,
329 ),
330 ("C:\\Windows", vec!["C:\\Windows\\System\\.."], true),
331 (
332 "C:\\Windows\\System\\Speech\\..\\..",
333 vec!["C:\\Windows"],
334 true,
335 ),
336 ];
337
338 parameters.extend([
339 ("src", vec!["src"], true),
340 ("src/interactive", vec!["src"], false),
341 ("src/interactive/..", vec!["src"], true),
342 ]);
343
344 for (path, ignore_dirs, expected_result) in parameters {
345 let ignore_dirs = canonicalize_ignore_dirs(
346 &ignore_dirs.into_iter().map(Into::into).collect::<Vec<_>>(),
347 );
348 assert_eq!(
349 ignore_directory(path.as_ref(), &ignore_dirs, &cwd),
350 expected_result,
351 "result='{expected_result}' for path='{path}' and ignore_dir='{ignore_dirs:?}' "
352 );
353 }
354 }
355}