use crate::caches::model::{Cache, TopFile};
use jwalk::WalkDir;
use std::cmp::Reverse;
use std::collections::{BinaryHeap, HashSet};
use std::path::{Path, PathBuf};
use std::time::SystemTime;
const TOP_K: usize = 64;
pub fn stat_dir(root: &Path) -> Cache {
stat_dir_with_progress(root, &mut || {})
}
pub fn stat_dir_with_progress(root: &Path, on_dir: &mut dyn FnMut()) -> Cache {
let mut size_bytes = 0u64;
let mut newest = None::<SystemTime>;
let mut file_count = 0u64;
let mut dir_count = 0u64;
let mut unreadable = 0u64;
let mut heap: BinaryHeap<Reverse<(u64, String, Option<SystemTime>)>> = BinaryHeap::new();
for entry in WalkDir::new(root)
.follow_links(false)
.skip_hidden(false)
.into_iter()
.flatten()
{
let meta = match entry.metadata() {
Ok(m) => m,
Err(_) => {
unreadable += 1;
continue;
}
};
if meta.is_dir() {
dir_count += 1;
on_dir();
continue;
}
if !meta.is_file() {
continue;
}
file_count += 1;
size_bytes += meta.len();
let file_mtime = meta.modified().ok();
if let Some(m) = file_mtime {
newest = Some(newest.map_or(m, |prev| prev.max(m)));
}
let name = entry.file_name().to_string_lossy().to_string();
heap.push(Reverse((meta.len(), name, file_mtime)));
if heap.len() > TOP_K {
heap.pop();
}
}
let dir_count = dir_count.saturating_sub(1);
let label = root
.file_name()
.map(|s| s.to_string_lossy().to_string())
.unwrap_or_default();
let mut top_files: Vec<TopFile> = heap
.into_iter()
.map(|Reverse((size, name, mtime))| TopFile {
name,
size_bytes: size,
mtime,
})
.collect();
top_files.sort_by_key(|f| Reverse(f.size_bytes));
Cache {
label,
path: root.to_path_buf(),
size_bytes,
newest_mtime: newest,
file_count,
dir_count,
top_files,
unreadable,
}
}
pub fn enumerate_seed(seed: &Path) -> Vec<Cache> {
enumerate_seed_with_progress(seed, &mut || {})
}
pub fn enumerate_seed_with_progress(seed: &Path, on_dir: &mut dyn FnMut()) -> Vec<Cache> {
let Ok(read) = std::fs::read_dir(seed) else {
return Vec::new();
};
read.flatten()
.filter(|e| e.file_type().map(|t| t.is_dir()).unwrap_or(false))
.map(|e| stat_dir_with_progress(&e.path(), on_dir))
.collect()
}
pub fn collect(seeds: &[PathBuf]) -> Vec<Cache> {
collect_with_progress(seeds, &mut || {})
}
pub fn collect_with_progress(seeds: &[PathBuf], on_dir: &mut dyn FnMut()) -> Vec<Cache> {
let mut seen = HashSet::new();
let mut out = Vec::new();
for s in seeds {
let Ok(canonical) = s.canonicalize() else {
continue;
};
let Ok(read) = std::fs::read_dir(&canonical) else {
continue;
};
for entry in read.flatten() {
if !entry.file_type().map(|t| t.is_dir()).unwrap_or(false) {
continue;
}
let c = stat_dir_with_progress(&entry.path(), on_dir);
let canon = c.path.canonicalize().unwrap_or_else(|_| c.path.clone());
if seen.insert(canon) {
out.push(c);
}
}
}
out
}
#[cfg(test)]
mod tests {
use super::*;
use std::fs::{self, File};
use std::io::Write;
#[test]
fn stat_empty_dir() {
let tmp = tempfile::tempdir().unwrap();
let c = stat_dir(tmp.path());
assert_eq!(c.size_bytes, 0);
assert_eq!(c.file_count, 0);
assert_eq!(c.dir_count, 0);
assert!(c.newest_mtime.is_none());
}
#[test]
fn stat_sums_sizes_and_counts() {
let tmp = tempfile::tempdir().unwrap();
let nested = tmp.path().join("a/b");
fs::create_dir_all(&nested).unwrap();
File::create(tmp.path().join("a/one"))
.unwrap()
.write_all(&[0u8; 100])
.unwrap();
File::create(tmp.path().join("a/b/two"))
.unwrap()
.write_all(&[0u8; 200])
.unwrap();
let c = stat_dir(tmp.path());
assert_eq!(c.size_bytes, 300);
assert_eq!(c.file_count, 2);
assert_eq!(c.dir_count, 2);
assert!(c.newest_mtime.is_some());
}
#[test]
fn newest_mtime_picks_max_across_files() {
let tmp = tempfile::tempdir().unwrap();
let old = tmp.path().join("old");
File::create(&old).unwrap().write_all(&[0u8; 10]).unwrap();
let new = tmp.path().join("new");
File::create(&new).unwrap().write_all(&[0u8; 10]).unwrap();
let later = std::time::SystemTime::now() + std::time::Duration::from_secs(60);
filetime::set_file_mtime(
&old,
filetime::FileTime::from_system_time(
std::time::SystemTime::now() - std::time::Duration::from_secs(86_400),
),
)
.ok();
filetime::set_file_mtime(&new, filetime::FileTime::from_system_time(later)).ok();
let c = stat_dir(tmp.path());
let nm = c.newest_mtime.expect("expected a newest_mtime");
assert!(nm >= later - std::time::Duration::from_secs(1));
}
#[test]
fn hidden_files_count_toward_newest_mtime() {
let tmp = tempfile::tempdir().unwrap();
let old = tmp.path().join("old");
File::create(&old).unwrap().write_all(&[0u8; 1]).unwrap();
filetime::set_file_mtime(
&old,
filetime::FileTime::from_system_time(
std::time::SystemTime::UNIX_EPOCH + std::time::Duration::from_secs(60),
),
)
.ok();
let hidden = tmp.path().join(".lock");
File::create(&hidden).unwrap().write_all(&[0u8; 1]).unwrap();
let later = std::time::SystemTime::now();
filetime::set_file_mtime(&hidden, filetime::FileTime::from_system_time(later)).ok();
let c = stat_dir(tmp.path());
let nm = c.newest_mtime.expect("expected a newest_mtime");
assert!(
nm > std::time::SystemTime::UNIX_EPOCH
+ std::time::Duration::from_secs(3600 * 24 * 365)
);
}
#[test]
fn label_preserves_leading_dot() {
let tmp = tempfile::tempdir().unwrap();
let hidden = tmp.path().join(".npm");
fs::create_dir(&hidden).unwrap();
let c = stat_dir(&hidden);
assert_eq!(c.label, ".npm");
}
#[test]
fn enumerate_returns_immediate_children() {
let tmp = tempfile::tempdir().unwrap();
fs::create_dir(tmp.path().join("alpha")).unwrap();
fs::create_dir(tmp.path().join("beta")).unwrap();
File::create(tmp.path().join("alpha/file"))
.unwrap()
.write_all(&[0u8; 50])
.unwrap();
let mut caches = super::enumerate_seed(tmp.path());
caches.sort_by(|a, b| a.label.cmp(&b.label));
let labels: Vec<_> = caches.iter().map(|c| c.label.as_str()).collect();
assert_eq!(labels, ["alpha", "beta"]);
}
#[test]
fn enumerate_seed_skips_missing() {
let path = std::path::PathBuf::from("/nonexistent/putzen/should/never/exist");
assert!(super::enumerate_seed(&path).is_empty());
}
#[test]
fn top_files_lists_largest_files_sorted_desc() {
let tmp = tempfile::tempdir().unwrap();
fs::create_dir_all(tmp.path()).unwrap();
fs::write(tmp.path().join("small"), [0u8; 10]).unwrap();
fs::write(tmp.path().join("big"), [0u8; 1_000_000]).unwrap();
fs::write(tmp.path().join("medium"), [0u8; 5_000]).unwrap();
let c = stat_dir(tmp.path());
let names: Vec<_> = c.top_files.iter().map(|f| f.name.as_str()).collect();
assert_eq!(names, ["big", "medium", "small"]);
}
#[test]
fn top_files_capped_at_64() {
let tmp = tempfile::tempdir().unwrap();
fs::create_dir_all(tmp.path()).unwrap();
for i in 0..100 {
fs::write(
tmp.path().join(format!("f{:03}", i)),
vec![0u8; (i + 1) as usize],
)
.unwrap();
}
let c = stat_dir(tmp.path());
assert_eq!(c.top_files.len(), 64);
assert!(c.top_files.iter().any(|f| f.name == "f099"));
}
#[test]
fn collect_dedups_by_canonical_path() {
let tmp = tempfile::tempdir().unwrap();
fs::create_dir(tmp.path().join("alpha")).unwrap();
let seeds = vec![tmp.path().to_path_buf(), tmp.path().to_path_buf()];
let caches = super::collect(&seeds);
assert_eq!(caches.len(), 1, "duplicate seed should yield one cache");
assert_eq!(caches[0].label, "alpha");
}
}