dua/
common.rs

1use crate::crossdev;
2use crate::traverse::{EntryData, Tree, TreeIndex};
3use byte_unit::{ByteUnit, n_gb_bytes, n_gib_bytes, n_mb_bytes, n_mib_bytes};
4use std::collections::BTreeSet;
5use std::path::PathBuf;
6use std::sync::Arc;
7use std::sync::atomic::{AtomicBool, Ordering};
8use std::time::Duration;
9use std::{fmt, path::Path};
10
11pub fn get_entry_or_panic(tree: &Tree, node_idx: TreeIndex) -> &EntryData {
12    tree.node_weight(node_idx)
13        .expect("node should always be retrievable with valid index")
14}
15
16pub(crate) fn get_size_or_panic(tree: &Tree, node_idx: TreeIndex) -> u128 {
17    get_entry_or_panic(tree, node_idx).size
18}
19
20/// Specifies a way to format bytes
21#[derive(Clone, Copy)]
22pub enum ByteFormat {
23    /// metric format, based on 1000.
24    Metric,
25    /// binary format, based on 1024
26    Binary,
27    /// raw bytes, without additional formatting
28    Bytes,
29    /// only gigabytes without smart-unit
30    GB,
31    /// only gibibytes without smart-unit
32    GiB,
33    /// only megabytes without smart-unit
34    MB,
35    /// only mebibytes without smart-unit
36    MiB,
37}
38
39impl ByteFormat {
40    pub fn width(self) -> usize {
41        use ByteFormat::*;
42        match self {
43            Metric => 10,
44            Binary => 11,
45            Bytes => 12,
46            MiB | MB => 12,
47            _ => 10,
48        }
49    }
50    pub fn total_width(self) -> usize {
51        use ByteFormat::*;
52        const THE_SPACE_BETWEEN_UNIT_AND_NUMBER: usize = 1;
53
54        self.width()
55            + match self {
56                Binary | MiB | GiB => 3,
57                Metric | MB | GB => 2,
58                Bytes => 1,
59            }
60            + THE_SPACE_BETWEEN_UNIT_AND_NUMBER
61    }
62    pub fn display(self, bytes: u128) -> ByteFormatDisplay {
63        ByteFormatDisplay {
64            format: self,
65            bytes,
66        }
67    }
68}
69
70pub struct ByteFormatDisplay {
71    format: ByteFormat,
72    bytes: u128,
73}
74
75impl fmt::Display for ByteFormatDisplay {
76    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> Result<(), fmt::Error> {
77        use ByteFormat::*;
78        use byte_unit::Byte;
79
80        let format = match self.format {
81            Bytes => return write!(f, "{} b", self.bytes),
82            Binary => (true, None),
83            Metric => (false, None),
84            GB => (false, Some((n_gb_bytes!(1), ByteUnit::GB))),
85            GiB => (false, Some((n_gib_bytes!(1), ByteUnit::GiB))),
86            MB => (false, Some((n_mb_bytes!(1), ByteUnit::MB))),
87            MiB => (false, Some((n_mib_bytes!(1), ByteUnit::MiB))),
88        };
89
90        let b = match format {
91            (_, Some((divisor, unit))) => Byte::from_unit(self.bytes as f64 / divisor as f64, unit)
92                .expect("byte count > 0")
93                .get_adjusted_unit(unit),
94            (binary, None) => Byte::from_bytes(self.bytes).get_appropriate_unit(binary),
95        }
96        .format(2);
97        let mut splits = b.split(' ');
98        match (splits.next(), splits.next()) {
99            (Some(bytes), Some(unit)) => write!(
100                f,
101                "{} {:>unit_width$}",
102                bytes,
103                unit,
104                unit_width = match self.format {
105                    Binary => 3,
106                    Metric => 2,
107                    _ => 2,
108                }
109            ),
110            _ => f.write_str(&b),
111        }
112    }
113}
114
115/// Identify the kind of sorting to apply during filesystem iteration
116#[derive(Clone)]
117pub enum TraversalSorting {
118    None,
119    AlphabeticalByFileName,
120}
121
122/// Throttle access to an optional `io::Write` to the specified `Duration`
123#[derive(Debug)]
124pub struct Throttle {
125    trigger: Arc<AtomicBool>,
126}
127
128impl Throttle {
129    pub fn new(duration: Duration, initial_sleep: Option<Duration>) -> Self {
130        let instance = Self {
131            trigger: Default::default(),
132        };
133
134        let trigger = Arc::downgrade(&instance.trigger);
135        std::thread::spawn(move || {
136            if let Some(duration) = initial_sleep {
137                std::thread::sleep(duration)
138            }
139            while let Some(t) = trigger.upgrade() {
140                t.store(true, Ordering::Relaxed);
141                std::thread::sleep(duration);
142            }
143        });
144
145        instance
146    }
147
148    pub fn throttled<F>(&self, f: F)
149    where
150        F: FnOnce(),
151    {
152        if self.can_update() {
153            f()
154        }
155    }
156
157    /// Return `true` if we are not currently throttled.
158    pub fn can_update(&self) -> bool {
159        self.trigger.swap(false, Ordering::Relaxed)
160    }
161}
162
163/// Configures a filesystem walk, including output and formatting options.
164#[derive(Clone)]
165pub struct WalkOptions {
166    /// The amount of threads to use. Refer to [`WalkDir::num_threads()`](https://docs.rs/jwalk/0.4.0/jwalk/struct.WalkDir.html#method.num_threads)
167    /// for more information.
168    pub threads: usize,
169    pub count_hard_links: bool,
170    pub apparent_size: bool,
171    pub sorting: TraversalSorting,
172    pub cross_filesystems: bool,
173    pub ignore_dirs: BTreeSet<PathBuf>,
174}
175
176type WalkDir = jwalk::WalkDirGeneric<((), Option<Result<std::fs::Metadata, jwalk::Error>>)>;
177
178impl WalkOptions {
179    pub fn iter_from_path(&self, root: &Path, root_device_id: u64, skip_root: bool) -> WalkDir {
180        let ignore_dirs = self.ignore_dirs.clone();
181        let cwd = std::env::current_dir().unwrap_or_else(|_| root.to_owned());
182        WalkDir::new(root)
183            .follow_links(false)
184            .min_depth(if skip_root { 1 } else { 0 })
185            .sort(match self.sorting {
186                TraversalSorting::None => false,
187                TraversalSorting::AlphabeticalByFileName => true,
188            })
189            .skip_hidden(false)
190            .process_read_dir({
191                let cross_filesystems = self.cross_filesystems;
192                move |_, _, _, dir_entry_results| {
193                    dir_entry_results.iter_mut().for_each(|dir_entry_result| {
194                        if let Ok(dir_entry) = dir_entry_result {
195                            let metadata = dir_entry.metadata();
196
197                            if dir_entry.file_type.is_dir() {
198                                let ok_for_fs = cross_filesystems
199                                    || metadata
200                                        .as_ref()
201                                        .map(|m| crossdev::is_same_device(root_device_id, m))
202                                        .unwrap_or(true);
203                                if !ok_for_fs
204                                    || ignore_directory(&dir_entry.path(), &ignore_dirs, &cwd)
205                                {
206                                    dir_entry.read_children_path = None;
207                                }
208                            }
209
210                            dir_entry.client_state = Some(metadata);
211                        }
212                    })
213                }
214            })
215            .parallelism(match self.threads {
216                0 => jwalk::Parallelism::RayonDefaultPool {
217                    busy_timeout: std::time::Duration::from_secs(1),
218                },
219                1 => jwalk::Parallelism::Serial,
220                _ => jwalk::Parallelism::RayonExistingPool {
221                    pool: jwalk::rayon::ThreadPoolBuilder::new()
222                        .stack_size(128 * 1024)
223                        .num_threads(self.threads)
224                        .thread_name(|idx| format!("dua-fs-walk-{idx}"))
225                        .build()
226                        .expect("fields we set cannot fail")
227                        .into(),
228                    busy_timeout: None,
229                },
230            })
231    }
232}
233
234/// Information we gather during a filesystem walk
235#[derive(Default)]
236pub struct WalkResult {
237    /// The amount of io::errors we encountered. Can happen when fetching meta-data, or when reading the directory contents.
238    pub num_errors: u64,
239}
240
241impl WalkResult {
242    pub fn to_exit_code(&self) -> i32 {
243        i32::from(self.num_errors > 0)
244    }
245}
246
247pub fn canonicalize_ignore_dirs(ignore_dirs: &[PathBuf]) -> BTreeSet<PathBuf> {
248    let dirs = ignore_dirs
249        .iter()
250        .map(gix_path::realpath)
251        .filter_map(Result::ok)
252        .collect();
253    log::info!("Ignoring canonicalized {dirs:?}");
254    dirs
255}
256
257fn ignore_directory(path: &Path, ignore_dirs: &BTreeSet<PathBuf>, cwd: &Path) -> bool {
258    if ignore_dirs.is_empty() {
259        return false;
260    }
261    let path = gix_path::realpath_opts(path, cwd, 32);
262    path.map(|path| {
263        let ignored = ignore_dirs.contains(&path);
264        if ignored {
265            log::debug!("Ignored {path:?}");
266        }
267        ignored
268    })
269    .unwrap_or(false)
270}
271
272#[cfg(test)]
273mod tests {
274    use super::*;
275
276    #[test]
277    fn test_ignore_directories() {
278        let cwd = std::env::current_dir().unwrap();
279        #[cfg(unix)]
280        let mut parameters = vec![
281            ("/usr", vec!["/usr"], true),
282            ("/usr/local", vec!["/usr"], false),
283            ("/smth", vec!["/usr"], false),
284            ("/usr/local/..", vec!["/usr/local/.."], true),
285            ("/usr", vec!["/usr/local/.."], true),
286            ("/usr/local/share/../..", vec!["/usr"], true),
287        ];
288
289        #[cfg(windows)]
290        let mut parameters = vec![
291            ("C:\\Windows", vec!["C:\\Windows"], true),
292            ("C:\\Windows\\System", vec!["C:\\Windows"], false),
293            ("C:\\Smth", vec!["C:\\Windows"], false),
294            (
295                "C:\\Windows\\System\\..",
296                vec!["C:\\Windows\\System\\.."],
297                true,
298            ),
299            ("C:\\Windows", vec!["C:\\Windows\\System\\.."], true),
300            (
301                "C:\\Windows\\System\\Speech\\..\\..",
302                vec!["C:\\Windows"],
303                true,
304            ),
305        ];
306
307        parameters.extend([
308            ("src", vec!["src"], true),
309            ("src/interactive", vec!["src"], false),
310            ("src/interactive/..", vec!["src"], true),
311        ]);
312
313        for (path, ignore_dirs, expected_result) in parameters {
314            let ignore_dirs = canonicalize_ignore_dirs(
315                &ignore_dirs.into_iter().map(Into::into).collect::<Vec<_>>(),
316            );
317            assert_eq!(
318                ignore_directory(path.as_ref(), &ignore_dirs, &cwd),
319                expected_result,
320                "result='{expected_result}' for path='{path}' and ignore_dir='{ignore_dirs:?}' "
321            );
322        }
323    }
324}