use anyhow::Result;
use duckdb::Connection;
use serde::Serialize;
pub const TARGETS: &[(&str, &[&str])] = &[
("node_modules", &["node_modules"]),
("target", &["target"]),
("pycache", &["__pycache__"]),
("next", &[".next"]),
("dist", &["dist"]),
("build", &["build"]),
("venv", &[".venv", "venv"]),
("gradle", &[".gradle"]),
("pytest", &[".pytest_cache"]),
];
pub const ALWAYS_SKIP_SUBSTRINGS: &[&str] = &[
"/.cargo/registry/",
"/.cargo/git/",
"/.rustup/",
"/.npm/",
"/.pnpm-store/",
"/.yarn/cache/",
"/site-packages/",
"/.cache/pip/",
"/.cache/uv/",
"/Library/Caches/",
"/Library/Group Containers/",
"/Library/Application Support/",
"/.vscode/extensions/",
"/.windsurf/extensions/",
"/.antigravity/",
];
#[inline]
fn is_protected(path: &str) -> bool {
ALWAYS_SKIP_SUBSTRINGS.iter().any(|s| path.contains(s))
}
pub fn default_target_names() -> Vec<&'static str> {
TARGETS.iter().map(|(name, _)| *name).collect()
}
#[derive(Debug, Clone, Serialize)]
pub struct CleanupHit {
pub category: String,
pub path: String,
pub bytes: u64,
pub files: u64,
}
#[derive(Debug, Clone, Serialize)]
pub struct CategorySummary {
pub category: String,
pub paths: u64,
pub bytes: u64,
pub files: u64,
}
pub fn summarise(hits: &[CleanupHit]) -> Vec<CategorySummary> {
use std::collections::BTreeMap;
let mut acc: BTreeMap<String, CategorySummary> = BTreeMap::new();
for h in hits {
let entry = acc
.entry(h.category.clone())
.or_insert_with(|| CategorySummary {
category: h.category.clone(),
paths: 0,
bytes: 0,
files: 0,
});
entry.paths += 1;
entry.bytes += h.bytes;
entry.files += h.files;
}
let mut out: Vec<CategorySummary> = acc.into_values().collect();
out.sort_by_key(|s| std::cmp::Reverse(s.bytes));
out
}
fn basenames_for(targets: &[String]) -> Vec<(&'static str, &'static str)> {
let mut out = Vec::new();
for t in targets {
if let Some((name, basenames)) = TARGETS.iter().find(|(n, _)| n == t) {
for b in *basenames {
out.push((*name, *b));
}
}
}
out
}
#[cfg(test)]
#[allow(clippy::items_after_test_module)]
mod tests {
use super::*;
fn hit(category: &str, bytes: u64, files: u64) -> CleanupHit {
CleanupHit {
category: category.to_string(),
path: format!("/tmp/{}-{}", category, bytes),
bytes,
files,
}
}
#[test]
fn default_target_names_covers_all_categories() {
let names = default_target_names();
assert_eq!(names.len(), TARGETS.len());
assert!(names.contains(&"node_modules"));
assert!(names.contains(&"target"));
}
#[test]
fn summarise_aggregates_and_sorts_by_bytes_desc() {
let hits = vec![
hit("node_modules", 1024, 5),
hit("node_modules", 2048, 10),
hit("target", 8192, 3),
];
let summary = summarise(&hits);
assert_eq!(summary.len(), 2);
assert_eq!(summary[0].category, "target");
assert_eq!(summary[0].bytes, 8192);
assert_eq!(summary[1].category, "node_modules");
assert_eq!(summary[1].bytes, 3072);
assert_eq!(summary[1].paths, 2);
assert_eq!(summary[1].files, 15);
}
#[test]
fn summarise_empty_input_returns_empty() {
let summary = summarise(&[]);
assert!(summary.is_empty());
}
#[test]
fn basenames_for_skips_unknown_targets() {
let targets = vec!["node_modules".to_string(), "totally-fake".to_string()];
let pairs = basenames_for(&targets);
assert!(pairs.iter().all(|(n, _)| *n == "node_modules"));
assert_eq!(pairs.len(), 1);
}
#[test]
fn basenames_for_expands_multi_basename_categories() {
let targets = vec!["venv".to_string()];
let pairs = basenames_for(&targets);
let names: Vec<_> = pairs.iter().map(|(_, b)| *b).collect();
assert!(names.contains(&".venv"));
assert!(names.contains(&"venv"));
}
#[test]
fn is_protected_catches_cargo_registry() {
assert!(is_protected(
"/Users/me/.cargo/registry/src/index.crates.io-1949cf8c6b5b557f/cc-1.2.62/src/target"
));
assert!(is_protected("/Users/me/.cargo/git/checkouts/foo"));
assert!(is_protected(
"/Users/me/.rustup/toolchains/stable-aarch64-apple-darwin/lib"
));
}
#[test]
fn is_protected_catches_python_site_packages() {
assert!(is_protected(
"/Users/me/venv/lib/python3.13/site-packages/foo/build"
));
}
#[test]
fn is_protected_catches_os_caches() {
assert!(is_protected("/Users/me/Library/Caches/Slack/build"));
assert!(is_protected(
"/Users/me/Library/Application Support/disky/build"
));
assert!(is_protected(
"/Users/me/Library/Group Containers/HUAQ24HBR6.dev.orbstack/data"
));
}
#[test]
fn is_protected_passes_real_dev_dirs() {
assert!(!is_protected("/Users/me/src/myapp/target"));
assert!(!is_protected("/Users/me/src/myapp/node_modules"));
assert!(!is_protected("/Users/me/src/myapp/.venv"));
assert!(!is_protected("/Users/me/src/myapp/build"));
assert!(!is_protected("/Users/me/src/myapp/dist"));
}
}
pub fn scan(conn: &Connection, targets: &[String], limit: usize) -> Result<Vec<CleanupHit>> {
let pairs = basenames_for(targets);
if pairs.is_empty() {
return Ok(vec![]);
}
let basenames: Vec<String> = pairs.iter().map(|(_, b)| (*b).to_string()).collect();
let placeholders: Vec<String> = (0..basenames.len()).map(|_| "?".to_string()).collect();
let target_sql = format!(
"SELECT path, name FROM files WHERE is_dir AND name IN ({})",
placeholders.join(",")
);
let target_params: Vec<duckdb::types::Value> = basenames
.iter()
.map(|b| duckdb::types::Value::Text(b.clone()))
.collect();
let target_refs: Vec<&dyn duckdb::ToSql> = target_params
.iter()
.map(|v| v as &dyn duckdb::ToSql)
.collect();
let mut stmt = conn.prepare(&target_sql)?;
let target_rows = stmt.query_map(target_refs.as_slice(), |row| {
Ok((row.get::<_, String>(0)?, row.get::<_, String>(1)?))
})?;
let target_dirs: Vec<(String, String)> = target_rows
.flatten()
.filter(|(p, _)| !is_protected(p))
.collect();
drop(stmt);
if target_dirs.is_empty() {
return Ok(vec![]);
}
conn.execute_batch(
"DROP TABLE IF EXISTS _cleanup_targets;
CREATE TEMP TABLE _cleanup_targets (
path TEXT NOT NULL,
lo TEXT NOT NULL,
hi TEXT NOT NULL,
name TEXT NOT NULL
);",
)?;
{
let mut app = conn.appender("_cleanup_targets")?;
for (path, name) in &target_dirs {
let lo = format!("{}/", path);
let hi = format!("{}0", path);
app.append_row(duckdb::params![path, lo, hi, name])?;
}
app.flush()?;
}
let mut grouped = conn.prepare(
"SELECT t.path,
t.name,
COALESCE(SUM(f.size), 0) AS bytes,
COUNT(f.path) AS files
FROM _cleanup_targets t
LEFT JOIN files f
ON f.is_dir = false
AND f.path >= t.lo
AND f.path < t.hi
GROUP BY t.path, t.name",
)?;
let basename_to_category: std::collections::HashMap<&'static str, &'static str> =
pairs.iter().map(|(cat, base)| (*base, *cat)).collect();
let rows = grouped.query_map([], |row| {
Ok((
row.get::<_, String>(0)?,
row.get::<_, String>(1)?,
row.get::<_, i64>(2)?,
row.get::<_, i64>(3)?,
))
})?;
let mut out: Vec<CleanupHit> = Vec::with_capacity(target_dirs.len());
for row in rows.flatten() {
let (path, name, bytes, files) = row;
let category = basename_to_category
.get(name.as_str())
.copied()
.unwrap_or("unknown")
.to_string();
out.push(CleanupHit {
category,
path,
bytes: bytes as u64,
files: files as u64,
});
}
drop(grouped);
let _ = conn.execute_batch("DROP TABLE IF EXISTS _cleanup_targets;");
out.sort_by_key(|h| std::cmp::Reverse(h.bytes));
out.truncate(limit);
Ok(out)
}
#[derive(Debug, Clone, Copy, PartialEq, Eq)]
pub enum ApplyMode {
Delete,
Trash,
}
pub fn apply(hits: &[CleanupHit], mode: ApplyMode) -> Result<Vec<String>> {
let mut handled = Vec::new();
for h in hits {
match mode {
ApplyMode::Delete => match std::fs::remove_dir_all(&h.path) {
Ok(()) => handled.push(h.path.clone()),
Err(e) if e.kind() == std::io::ErrorKind::NotFound => {}
Err(e) => return Err(e.into()),
},
ApplyMode::Trash => match move_to_trash(&h.path) {
Ok(_) => handled.push(h.path.clone()),
Err(_) => {
if !std::path::Path::new(&h.path).exists() {
continue;
}
eprintln!("cleanup: skip {} (move-to-trash failed)", h.path);
continue;
}
},
}
}
Ok(handled)
}
fn move_to_trash(path: &str) -> Result<std::path::PathBuf> {
let trash = dirs::home_dir()
.ok_or_else(|| anyhow::anyhow!("home dir unavailable"))?
.join(".Trash");
std::fs::create_dir_all(&trash)?;
let src = std::path::Path::new(path);
let name = src
.file_name()
.map(|s| s.to_string_lossy().into_owned())
.unwrap_or_else(|| "disky-trashed".into());
let now = std::time::SystemTime::now()
.duration_since(std::time::UNIX_EPOCH)
.unwrap_or_default();
let stamp = format!("{}-{}", now.as_secs(), now.subsec_nanos());
let mut dest = trash.join(format!("{}-{}", name, stamp));
let mut n = 0u32;
while dest.exists() {
n += 1;
dest = trash.join(format!("{}-{}-{}", name, stamp, n));
}
std::fs::rename(src, &dest)?;
Ok(dest)
}