putzen_cli/caches/
scan.rs1use crate::caches::model::{Cache, TopFile};
4use jwalk::WalkDir;
5use std::cmp::Reverse;
6use std::collections::{BinaryHeap, HashSet};
7use std::path::{Path, PathBuf};
8use std::time::SystemTime;
9
10const TOP_K: usize = 64;
11
12pub fn stat_dir(root: &Path) -> Cache {
15 stat_dir_with_progress(root, &mut || {})
16}
17
18pub fn stat_dir_with_progress(root: &Path, on_dir: &mut dyn FnMut()) -> Cache {
22 let mut size_bytes = 0u64;
23 let mut newest = None::<SystemTime>;
24 let mut file_count = 0u64;
25 let mut dir_count = 0u64;
26 let mut unreadable = 0u64;
27 let mut heap: BinaryHeap<Reverse<(u64, String, Option<SystemTime>)>> = BinaryHeap::new();
28
29 for entry in WalkDir::new(root)
34 .follow_links(false)
35 .skip_hidden(false)
36 .into_iter()
37 .flatten()
38 {
39 let meta = match entry.metadata() {
40 Ok(m) => m,
41 Err(_) => {
42 unreadable += 1;
43 continue;
44 }
45 };
46 if meta.is_dir() {
47 dir_count += 1;
48 on_dir();
49 continue;
50 }
51 if !meta.is_file() {
52 continue;
53 }
54 file_count += 1;
55 size_bytes += meta.len();
56 let file_mtime = meta.modified().ok();
57 if let Some(m) = file_mtime {
58 newest = Some(newest.map_or(m, |prev| prev.max(m)));
59 }
60 let name = entry.file_name().to_string_lossy().to_string();
61 heap.push(Reverse((meta.len(), name, file_mtime)));
62 if heap.len() > TOP_K {
63 heap.pop();
64 }
65 }
66
67 let dir_count = dir_count.saturating_sub(1);
69
70 let label = root
75 .file_name()
76 .map(|s| s.to_string_lossy().to_string())
77 .unwrap_or_default();
78
79 let mut top_files: Vec<TopFile> = heap
80 .into_iter()
81 .map(|Reverse((size, name, mtime))| TopFile {
82 name,
83 size_bytes: size,
84 mtime,
85 })
86 .collect();
87 top_files.sort_by_key(|f| Reverse(f.size_bytes));
88
89 Cache {
90 label,
91 path: root.to_path_buf(),
92 size_bytes,
93 newest_mtime: newest,
94 file_count,
95 dir_count,
96 top_files,
97 unreadable,
98 }
99}
100
101pub fn enumerate_seed(seed: &Path) -> Vec<Cache> {
104 enumerate_seed_with_progress(seed, &mut || {})
105}
106
107pub fn enumerate_seed_with_progress(seed: &Path, on_dir: &mut dyn FnMut()) -> Vec<Cache> {
112 let Ok(read) = std::fs::read_dir(seed) else {
113 return Vec::new();
114 };
115 read.flatten()
116 .filter(|e| e.file_type().map(|t| t.is_dir()).unwrap_or(false))
117 .map(|e| stat_dir_with_progress(&e.path(), on_dir))
118 .collect()
119}
120
121pub fn collect(seeds: &[PathBuf]) -> Vec<Cache> {
124 collect_with_progress(seeds, &mut || {})
125}
126
127pub fn collect_with_progress(seeds: &[PathBuf], on_dir: &mut dyn FnMut()) -> Vec<Cache> {
131 let mut seen = HashSet::new();
132 let mut out = Vec::new();
133 for s in seeds {
134 let Ok(canonical) = s.canonicalize() else {
135 continue;
136 };
137 let Ok(read) = std::fs::read_dir(&canonical) else {
138 continue;
139 };
140 for entry in read.flatten() {
141 if !entry.file_type().map(|t| t.is_dir()).unwrap_or(false) {
142 continue;
143 }
144 let c = stat_dir_with_progress(&entry.path(), on_dir);
145 let canon = c.path.canonicalize().unwrap_or_else(|_| c.path.clone());
146 if seen.insert(canon) {
147 out.push(c);
148 }
149 }
150 }
151 out
152}
153
154#[cfg(test)]
155mod tests {
156 use super::*;
157 use std::fs::{self, File};
158 use std::io::Write;
159
160 #[test]
161 fn stat_empty_dir() {
162 let tmp = tempfile::tempdir().unwrap();
163 let c = stat_dir(tmp.path());
164 assert_eq!(c.size_bytes, 0);
165 assert_eq!(c.file_count, 0);
166 assert_eq!(c.dir_count, 0);
167 assert!(c.newest_mtime.is_none());
168 }
169
170 #[test]
171 fn stat_sums_sizes_and_counts() {
172 let tmp = tempfile::tempdir().unwrap();
173 let nested = tmp.path().join("a/b");
174 fs::create_dir_all(&nested).unwrap();
175 File::create(tmp.path().join("a/one"))
176 .unwrap()
177 .write_all(&[0u8; 100])
178 .unwrap();
179 File::create(tmp.path().join("a/b/two"))
180 .unwrap()
181 .write_all(&[0u8; 200])
182 .unwrap();
183
184 let c = stat_dir(tmp.path());
185 assert_eq!(c.size_bytes, 300);
186 assert_eq!(c.file_count, 2);
187 assert_eq!(c.dir_count, 2);
189 assert!(c.newest_mtime.is_some());
190 }
191
192 #[test]
193 fn newest_mtime_picks_max_across_files() {
194 let tmp = tempfile::tempdir().unwrap();
195 let old = tmp.path().join("old");
197 File::create(&old).unwrap().write_all(&[0u8; 10]).unwrap();
198 let new = tmp.path().join("new");
200 File::create(&new).unwrap().write_all(&[0u8; 10]).unwrap();
201 let later = std::time::SystemTime::now() + std::time::Duration::from_secs(60);
202 filetime::set_file_mtime(
203 &old,
204 filetime::FileTime::from_system_time(
205 std::time::SystemTime::now() - std::time::Duration::from_secs(86_400),
206 ),
207 )
208 .ok();
209 filetime::set_file_mtime(&new, filetime::FileTime::from_system_time(later)).ok();
210
211 let c = stat_dir(tmp.path());
212 let nm = c.newest_mtime.expect("expected a newest_mtime");
214 assert!(nm >= later - std::time::Duration::from_secs(1));
215 }
216
217 #[test]
218 fn hidden_files_count_toward_newest_mtime() {
219 let tmp = tempfile::tempdir().unwrap();
220 let old = tmp.path().join("old");
225 File::create(&old).unwrap().write_all(&[0u8; 1]).unwrap();
226 filetime::set_file_mtime(
227 &old,
228 filetime::FileTime::from_system_time(
229 std::time::SystemTime::UNIX_EPOCH + std::time::Duration::from_secs(60),
230 ),
231 )
232 .ok();
233
234 let hidden = tmp.path().join(".lock");
235 File::create(&hidden).unwrap().write_all(&[0u8; 1]).unwrap();
236 let later = std::time::SystemTime::now();
237 filetime::set_file_mtime(&hidden, filetime::FileTime::from_system_time(later)).ok();
238
239 let c = stat_dir(tmp.path());
240 let nm = c.newest_mtime.expect("expected a newest_mtime");
241 assert!(
243 nm > std::time::SystemTime::UNIX_EPOCH
244 + std::time::Duration::from_secs(3600 * 24 * 365)
245 );
246 }
247
248 #[test]
249 fn label_preserves_leading_dot() {
250 let tmp = tempfile::tempdir().unwrap();
251 let hidden = tmp.path().join(".npm");
252 fs::create_dir(&hidden).unwrap();
253 let c = stat_dir(&hidden);
254 assert_eq!(c.label, ".npm");
255 }
256
257 #[test]
258 fn enumerate_returns_immediate_children() {
259 let tmp = tempfile::tempdir().unwrap();
260 fs::create_dir(tmp.path().join("alpha")).unwrap();
261 fs::create_dir(tmp.path().join("beta")).unwrap();
262 File::create(tmp.path().join("alpha/file"))
263 .unwrap()
264 .write_all(&[0u8; 50])
265 .unwrap();
266
267 let mut caches = super::enumerate_seed(tmp.path());
268 caches.sort_by(|a, b| a.label.cmp(&b.label));
269 let labels: Vec<_> = caches.iter().map(|c| c.label.as_str()).collect();
270 assert_eq!(labels, ["alpha", "beta"]);
271 }
272
273 #[test]
274 fn enumerate_seed_skips_missing() {
275 let path = std::path::PathBuf::from("/nonexistent/putzen/should/never/exist");
276 assert!(super::enumerate_seed(&path).is_empty());
277 }
278
279 #[test]
280 fn top_files_lists_largest_files_sorted_desc() {
281 let tmp = tempfile::tempdir().unwrap();
282 fs::create_dir_all(tmp.path()).unwrap();
283 fs::write(tmp.path().join("small"), [0u8; 10]).unwrap();
284 fs::write(tmp.path().join("big"), [0u8; 1_000_000]).unwrap();
285 fs::write(tmp.path().join("medium"), [0u8; 5_000]).unwrap();
286 let c = stat_dir(tmp.path());
287 let names: Vec<_> = c.top_files.iter().map(|f| f.name.as_str()).collect();
288 assert_eq!(names, ["big", "medium", "small"]);
289 }
290
291 #[test]
292 fn top_files_capped_at_64() {
293 let tmp = tempfile::tempdir().unwrap();
294 fs::create_dir_all(tmp.path()).unwrap();
295 for i in 0..100 {
296 fs::write(
297 tmp.path().join(format!("f{:03}", i)),
298 vec![0u8; (i + 1) as usize],
299 )
300 .unwrap();
301 }
302 let c = stat_dir(tmp.path());
303 assert_eq!(c.top_files.len(), 64);
304 assert!(c.top_files.iter().any(|f| f.name == "f099"));
306 }
307
308 #[test]
309 fn collect_dedups_by_canonical_path() {
310 let tmp = tempfile::tempdir().unwrap();
311 fs::create_dir(tmp.path().join("alpha")).unwrap();
312 let seeds = vec![tmp.path().to_path_buf(), tmp.path().to_path_buf()];
314 let caches = super::collect(&seeds);
315 assert_eq!(caches.len(), 1, "duplicate seed should yield one cache");
316 assert_eq!(caches[0].label, "alpha");
317 }
318}