Skip to main content

dua/
common.rs

1use crate::crossdev;
2use crate::traverse::{EntryData, Tree, TreeIndex};
3use byte_unit::{ByteUnit, n_gb_bytes, n_gib_bytes, n_mb_bytes, n_mib_bytes};
4use std::collections::BTreeSet;
5use std::path::PathBuf;
6use std::sync::Arc;
7use std::sync::atomic::{AtomicBool, Ordering};
8use std::time::Duration;
9use std::{fmt, path::Path};
10
11/// Return the entry at `node_idx` or panic if the index is invalid for `tree`.
12pub(crate) fn get_entry_or_panic(tree: &Tree, node_idx: TreeIndex) -> &EntryData {
13    tree.node_weight(node_idx)
14        .expect("node should always be retrievable with valid index")
15}
16
17pub(crate) fn get_size_or_panic(tree: &Tree, node_idx: TreeIndex) -> u128 {
18    get_entry_or_panic(tree, node_idx).size
19}
20
21/// Specifies a way to format bytes
22#[derive(Clone, Copy)]
23pub enum ByteFormat {
24    /// metric format, based on 1000.
25    Metric,
26    /// binary format, based on 1024
27    Binary,
28    /// raw bytes, without additional formatting
29    Bytes,
30    /// only gigabytes without smart-unit
31    GB,
32    /// only gibibytes without smart-unit
33    GiB,
34    /// only megabytes without smart-unit
35    MB,
36    /// only mebibytes without smart-unit
37    MiB,
38}
39
40impl ByteFormat {
41    /// Return the content width (without unit suffix) needed to display values in this format.
42    pub fn width(self) -> usize {
43        use ByteFormat::*;
44        match self {
45            Metric => 10,
46            Binary => 11,
47            Bytes => 12,
48            MiB | MB => 12,
49            _ => 10,
50        }
51    }
52    /// Return the full width (value plus unit and separator) used by this format.
53    pub fn total_width(self) -> usize {
54        use ByteFormat::*;
55        const THE_SPACE_BETWEEN_UNIT_AND_NUMBER: usize = 1;
56
57        self.width()
58            + match self {
59                Binary | MiB | GiB => 3,
60                Metric | MB | GB => 2,
61                Bytes => 1,
62            }
63            + THE_SPACE_BETWEEN_UNIT_AND_NUMBER
64    }
65    /// Create a display adapter for `bytes` using this format.
66    pub fn display(self, bytes: u128) -> impl fmt::Display {
67        ByteFormatDisplay {
68            format: self,
69            bytes,
70        }
71    }
72}
73
74/// A lightweight display adapter created by [`ByteFormat::display`].
75struct ByteFormatDisplay {
76    format: ByteFormat,
77    bytes: u128,
78}
79
80impl fmt::Display for ByteFormatDisplay {
81    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> Result<(), fmt::Error> {
82        use ByteFormat::*;
83        use byte_unit::Byte;
84
85        let format = match self.format {
86            Bytes => return write!(f, "{} b", self.bytes),
87            Binary => (true, None),
88            Metric => (false, None),
89            GB => (false, Some((n_gb_bytes!(1), ByteUnit::GB))),
90            GiB => (false, Some((n_gib_bytes!(1), ByteUnit::GiB))),
91            MB => (false, Some((n_mb_bytes!(1), ByteUnit::MB))),
92            MiB => (false, Some((n_mib_bytes!(1), ByteUnit::MiB))),
93        };
94
95        let b = match format {
96            (_, Some((divisor, unit))) => Byte::from_unit(self.bytes as f64 / divisor as f64, unit)
97                .expect("byte count > 0")
98                .get_adjusted_unit(unit),
99            (binary, None) => Byte::from_bytes(self.bytes).get_appropriate_unit(binary),
100        }
101        .format(2);
102        let mut splits = b.split(' ');
103        match (splits.next(), splits.next()) {
104            (Some(bytes), Some(unit)) => write!(
105                f,
106                "{} {:>unit_width$}",
107                bytes,
108                unit,
109                unit_width = match self.format {
110                    Binary => 3,
111                    Metric => 2,
112                    _ => 2,
113                }
114            ),
115            _ => f.write_str(&b),
116        }
117    }
118}
119
120/// Identify the kind of sorting to apply during filesystem iteration
121#[derive(Clone)]
122pub enum TraversalSorting {
123    /// Keep filesystem iteration order as provided by the walker.
124    None,
125    /// Sort entries alphabetically by file name during iteration.
126    AlphabeticalByFileName,
127}
128
129/// Throttle access to an optional `io::Write` to the specified `Duration`
130#[derive(Debug)]
131pub(crate) struct Throttle {
132    trigger: Arc<AtomicBool>,
133}
134
135impl Throttle {
136    /// Create a new throttle that allows updates at most once per `duration`.
137    ///
138    /// If `initial_sleep` is set, the first update is delayed by that amount.
139    pub(crate) fn new(duration: Duration, initial_sleep: Option<Duration>) -> Self {
140        let instance = Self {
141            trigger: Default::default(),
142        };
143
144        let trigger = Arc::downgrade(&instance.trigger);
145        std::thread::spawn(move || {
146            if let Some(duration) = initial_sleep {
147                std::thread::sleep(duration)
148            }
149            while let Some(t) = trigger.upgrade() {
150                t.store(true, Ordering::Relaxed);
151                std::thread::sleep(duration);
152            }
153        });
154
155        instance
156    }
157
158    /// Execute `f` only if the throttle currently allows an update.
159    pub(crate) fn throttled<F>(&self, f: F)
160    where
161        F: FnOnce(),
162    {
163        if self.can_update() {
164            f()
165        }
166    }
167
168    /// Return `true` if we are not currently throttled.
169    pub(crate) fn can_update(&self) -> bool {
170        self.trigger.swap(false, Ordering::Relaxed)
171    }
172}
173
174/// Configures a filesystem walk, including output and formatting options.
175#[derive(Clone)]
176pub struct WalkOptions {
177    /// The amount of threads to use. Refer to [`WalkDir::num_threads()`](https://docs.rs/jwalk/0.4.0/jwalk/struct.WalkDir.html#method.num_threads)
178    /// for more information.
179    pub threads: usize,
180    /// If `true`, count every hard-link occurrence independently.
181    pub count_hard_links: bool,
182    /// If `true`, use apparent size (`metadata.len()`), not allocated blocks on disk.
183    pub apparent_size: bool,
184    /// Sorting mode applied by the filesystem walker.
185    pub sorting: TraversalSorting,
186    /// If `false`, traversal is constrained to the root filesystem/device.
187    pub cross_filesystems: bool,
188    /// Canonicalized directories to skip from traversal.
189    pub ignore_dirs: BTreeSet<PathBuf>,
190}
191
192type WalkDir = jwalk::WalkDirGeneric<((), Option<Result<std::fs::Metadata, jwalk::Error>>)>;
193
194impl WalkOptions {
195    /// Create an iterator over `root` honoring this walk configuration.
196    ///
197    /// `root_device_id` is used to filter entries when `cross_filesystems == false`.
198    /// If `skip_root` is `true`, the root directory itself is omitted from yielded entries.
199    pub(crate) fn iter_from_path(
200        &self,
201        root: &Path,
202        root_device_id: u64,
203        skip_root: bool,
204    ) -> WalkDir {
205        let ignore_dirs = self.ignore_dirs.clone();
206        let cwd = std::env::current_dir().unwrap_or_else(|_| root.to_owned());
207        WalkDir::new(root)
208            .follow_links(false)
209            .min_depth(if skip_root { 1 } else { 0 })
210            .sort(match self.sorting {
211                TraversalSorting::None => false,
212                TraversalSorting::AlphabeticalByFileName => true,
213            })
214            .skip_hidden(false)
215            .process_read_dir({
216                let cross_filesystems = self.cross_filesystems;
217                move |_, _, _, dir_entry_results| {
218                    dir_entry_results.iter_mut().for_each(|dir_entry_result| {
219                        if let Ok(dir_entry) = dir_entry_result {
220                            let metadata = dir_entry.metadata();
221
222                            if dir_entry.file_type.is_dir() {
223                                let ok_for_fs = cross_filesystems
224                                    || metadata
225                                        .as_ref()
226                                        .map(|m| crossdev::is_same_device(root_device_id, m))
227                                        .unwrap_or(true);
228                                if !ok_for_fs
229                                    || ignore_directory(&dir_entry.path(), &ignore_dirs, &cwd)
230                                {
231                                    dir_entry.read_children_path = None;
232                                }
233                            }
234
235                            dir_entry.client_state = Some(metadata);
236                        }
237                    })
238                }
239            })
240            .parallelism(match self.threads {
241                0 => jwalk::Parallelism::RayonDefaultPool {
242                    busy_timeout: std::time::Duration::from_secs(1),
243                },
244                1 => jwalk::Parallelism::Serial,
245                _ => jwalk::Parallelism::RayonExistingPool {
246                    pool: jwalk::rayon::ThreadPoolBuilder::new()
247                        .stack_size(128 * 1024)
248                        .num_threads(self.threads)
249                        .thread_name(|idx| format!("dua-fs-walk-{idx}"))
250                        .build()
251                        .expect("fields we set cannot fail")
252                        .into(),
253                    busy_timeout: None,
254                },
255            })
256    }
257}
258
259/// Information we gather during a filesystem walk
260#[derive(Default)]
261pub struct WalkResult {
262    /// The amount of io::errors we encountered. Can happen when fetching meta-data, or when reading the directory contents.
263    pub num_errors: u64,
264}
265
266impl WalkResult {
267    /// Convert traversal result into a process exit code.
268    ///
269    /// Returns `0` if no I/O errors occurred, otherwise `1`.
270    pub fn to_exit_code(&self) -> i32 {
271        i32::from(self.num_errors > 0)
272    }
273}
274
275/// Canonicalize user-provided ignore directory paths.
276///
277/// Non-canonicalizable paths are ignored.
278pub fn canonicalize_ignore_dirs(ignore_dirs: &[PathBuf]) -> BTreeSet<PathBuf> {
279    let dirs = ignore_dirs
280        .iter()
281        .map(gix_path::realpath)
282        .filter_map(Result::ok)
283        .collect();
284    log::info!("Ignoring canonicalized {dirs:?}");
285    dirs
286}
287
288fn ignore_directory(path: &Path, ignore_dirs: &BTreeSet<PathBuf>, cwd: &Path) -> bool {
289    if ignore_dirs.is_empty() {
290        return false;
291    }
292    let path = gix_path::realpath_opts(path, cwd, 32);
293    path.map(|path| {
294        let ignored = ignore_dirs.contains(&path);
295        if ignored {
296            log::debug!("Ignored {path:?}");
297        }
298        ignored
299    })
300    .unwrap_or(false)
301}
302
303#[cfg(test)]
304mod tests {
305    use super::*;
306
307    #[test]
308    fn test_ignore_directories() {
309        let cwd = std::env::current_dir().unwrap();
310        #[cfg(unix)]
311        let mut parameters = vec![
312            ("/usr", vec!["/usr"], true),
313            ("/usr/local", vec!["/usr"], false),
314            ("/smth", vec!["/usr"], false),
315            ("/usr/local/..", vec!["/usr/local/.."], true),
316            ("/usr", vec!["/usr/local/.."], true),
317            ("/usr/local/share/../..", vec!["/usr"], true),
318        ];
319
320        #[cfg(windows)]
321        let mut parameters = vec![
322            ("C:\\Windows", vec!["C:\\Windows"], true),
323            ("C:\\Windows\\System", vec!["C:\\Windows"], false),
324            ("C:\\Smth", vec!["C:\\Windows"], false),
325            (
326                "C:\\Windows\\System\\..",
327                vec!["C:\\Windows\\System\\.."],
328                true,
329            ),
330            ("C:\\Windows", vec!["C:\\Windows\\System\\.."], true),
331            (
332                "C:\\Windows\\System\\Speech\\..\\..",
333                vec!["C:\\Windows"],
334                true,
335            ),
336        ];
337
338        parameters.extend([
339            ("src", vec!["src"], true),
340            ("src/interactive", vec!["src"], false),
341            ("src/interactive/..", vec!["src"], true),
342        ]);
343
344        for (path, ignore_dirs, expected_result) in parameters {
345            let ignore_dirs = canonicalize_ignore_dirs(
346                &ignore_dirs.into_iter().map(Into::into).collect::<Vec<_>>(),
347            );
348            assert_eq!(
349                ignore_directory(path.as_ref(), &ignore_dirs, &cwd),
350                expected_result,
351                "result='{expected_result}' for path='{path}' and ignore_dir='{ignore_dirs:?}' "
352            );
353        }
354    }
355}