spacehog/
lib.rs

1//! Find large files on your system.
2mod bytes;
3
4use std::collections::BTreeMap;
5use std::fmt::Display;
6use std::fs::ReadDir;
7use std::io;
8use std::path::{Path, PathBuf};
9use std::sync::mpsc;
10use std::time::Instant;
11
12/// Stream the top `n` largest files under the provided path.
13///
14/// # Errors
15///
16/// Returns an I/O error if unable to scan the provided path.
17///
18/// # Examples
19///
20/// ```
21/// use spacehog::find_top_n_largest_files;
22///
23/// let rx = find_top_n_largest_files("testdata", 5, true).unwrap();
24///
25/// let results = rx.recv().unwrap();
26///
27/// assert_eq!(results.len(), 4);
28/// ```
29pub fn find_top_n_largest_files(
30    path: &str,
31    limit: usize,
32    ignore_hidden: bool,
33) -> io::Result<mpsc::Receiver<Vec<(FileSize, PathBuf)>>> {
34    let path = path.to_string();
35    let (tx, rx) = mpsc::channel();
36    let file_iter = find_files_in_path(&path, ignore_hidden)?;
37
38    std::thread::spawn(move || {
39        let mut timer = Instant::now();
40        let mut results = BTreeMap::new();
41        for entry in file_iter {
42            results.insert(entry.clone(), entry);
43            if timer.elapsed().as_millis() >= 16 {
44                send_snapshot(&tx, &results, limit);
45                timer = Instant::now();
46            }
47        }
48        send_snapshot(&tx, &results, limit);
49    });
50
51    Ok(rx)
52}
53
54fn send_snapshot(
55    tx: &mpsc::Sender<Vec<(FileSize, PathBuf)>>,
56    results: &BTreeMap<(FileSize, PathBuf), (FileSize, PathBuf)>,
57    limit: usize,
58) {
59    let snapshot = results.values().rev().take(limit).cloned().collect();
60    if let Err(e) = tx.send(snapshot) {
61        eprintln!("failed to send entry: {e:?}");
62    };
63}
64
65/// The size of a file in bytes.
66#[derive(Clone, Debug, Eq, Ord, PartialEq, PartialOrd)]
67pub struct FileSize(u64);
68
69impl From<u64> for FileSize {
70    fn from(value: u64) -> Self {
71        Self(value)
72    }
73}
74
75impl Display for FileSize {
76    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
77        write!(f, "{}", bytes::humanize(self.0))
78    }
79}
80
81fn find_files_in_path(path: &str, ignore_hidden: bool) -> io::Result<FileIter> {
82    let dir = std::fs::read_dir(path)?;
83    Ok(FileIter {
84        ignore_hidden,
85        stack: vec![dir],
86    })
87}
88
89#[derive(Default)]
90struct FileIter {
91    ignore_hidden: bool,
92    stack: Vec<ReadDir>,
93}
94
95impl Iterator for FileIter {
96    type Item = (FileSize, PathBuf);
97
98    fn next(&mut self) -> Option<Self::Item> {
99        loop {
100            let dir = self.stack.last_mut()?;
101            if let Some(entry) = dir.next() {
102                let entry = entry.ok()?;
103                let path = entry.path();
104                if self.ignore_hidden && is_hidden_path(&path) {
105                    continue;
106                }
107                if path.is_dir() {
108                    self.stack.push(std::fs::read_dir(path).ok()?);
109                } else {
110                    let size = entry.metadata().ok()?.len();
111                    return Some((FileSize(size), path));
112                }
113            } else {
114                self.stack.pop();
115            }
116        }
117    }
118}
119
120fn is_hidden_path<P: AsRef<Path>>(path: P) -> bool {
121    if let Some(name) = path.as_ref().file_name() {
122        name.to_str().map_or(false, |s| s.starts_with('.'))
123    } else {
124        false
125    }
126}
127
128#[cfg(test)]
129mod tests {
130    use crate::FileSize;
131
132    #[test]
133    fn file_sizes_can_be_formatted_as_a_string() {
134        struct Case {
135            file: FileSize,
136            want: &'static str,
137        }
138        let cases = vec![
139            Case {
140                file: FileSize(1000),
141                want: "1 KB",
142            },
143            Case {
144                file: FileSize(34_250),
145                want: "34 KB",
146            },
147        ];
148        for case in cases {
149            let got = case.file.to_string();
150            assert_eq!(case.want, got);
151        }
152    }
153}