use serde::Serialize;
use std::fmt;
#[derive(Debug, Clone, Copy, PartialEq, Eq)]
pub enum Tier {
Core,
Extended,
}
#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash, Serialize, clap::ValueEnum)]
#[serde(rename_all = "snake_case")]
#[clap(rename_all = "snake_case")]
pub enum Category {
Cache,
Build,
Log,
Media,
Vcs,
Ide,
Other,
Archive,
Installer,
VmImage,
ModelCache,
Backup,
}
impl Category {
pub fn label(&self) -> &'static str {
match self {
Category::Cache => "cache",
Category::Build => "build",
Category::Log => "log",
Category::Media => "media",
Category::Vcs => "vcs",
Category::Ide => "ide",
Category::Other => "other",
Category::Archive => "archive",
Category::Installer => "installer",
Category::VmImage => "vm_image",
Category::ModelCache => "model_cache",
Category::Backup => "backup",
}
}
pub fn tier(&self) -> Tier {
match self {
Category::Cache
| Category::Build
| Category::Log
| Category::Media
| Category::Vcs
| Category::Ide
| Category::Other => Tier::Core,
Category::Archive
| Category::Installer
| Category::VmImage
| Category::ModelCache
| Category::Backup => Tier::Extended,
}
}
}
impl fmt::Display for Category {
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
write!(f, "{}", self.label())
}
}
pub fn classify_dir(name: &str) -> Category {
let lower = name.to_lowercase();
if matches!(lower.as_str(), ".ollama" | ".lmstudio" | ".huggingface") {
return Category::ModelCache;
}
if matches!(lower.as_str(), "time machine backups" | "backups.backupdb") {
return Category::Backup;
}
if matches!(
lower.as_str(),
"node_modules"
| ".cache"
| "__pycache__"
| ".npm"
| ".yarn"
| ".pnpm-store"
| "caches"
| ".gradle"
| ".nuget"
| ".pub-cache"
| "pods"
| ".cocoapods"
| ".cargo"
| "bower_components"
| ".tmp"
| "tmp"
| "temp"
| ".temp"
| ".trash"
| ".rustup"
| ".pyenv"
| ".rbenv"
| ".nvm"
| ".volta"
| ".asdf"
| "mise"
| ".pipx"
| "pipx"
| ".poetry"
| ".composer"
| ".m2"
| ".ivy2"
| ".sbt"
| ".stack"
| ".cabal"
| ".deno"
| ".bun"
| ".docker"
| "vm_bundles"
) {
return Category::Cache;
}
if lower.contains("cache") {
return Category::Cache;
}
if matches!(
lower.as_str(),
"target"
| "dist"
| "build"
| "out"
| ".next"
| ".nuxt"
| ".output"
| ".turbo"
| ".angular"
| "_build"
| "cmake-build-debug"
| "cmake-build-release"
) {
return Category::Build;
}
if matches!(lower.as_str(), "logs" | "log" | ".logs") {
return Category::Log;
}
if matches!(
lower.as_str(),
".git" | ".svn" | ".hg" | ".jj" | ".bzr" | "_darcs" | ".fossil"
) {
return Category::Vcs;
}
if matches!(
lower.as_str(),
".idea"
| ".vscode"
| ".vscode-insiders"
| ".vscode-server"
| ".vs"
| ".eclipse"
| ".settings"
| ".cursor"
| ".cursor-server"
| ".windsurf"
| ".zed"
| ".fleet"
) {
return Category::Ide;
}
Category::Other
}
pub fn classify_file(name: &str) -> Category {
let lower = name.to_lowercase();
if lower.ends_with(".log") {
return Category::Log;
}
if lower.ends_with("data.img.raw") {
return Category::VmImage;
}
if is_vm_image_extension(&lower) {
return Category::VmImage;
}
if is_installer_extension(&lower) {
return Category::Installer;
}
if is_archive_extension(&lower) {
return Category::Archive;
}
if is_backup_extension(&lower) {
return Category::Backup;
}
if is_media_extension(&lower) {
return Category::Media;
}
Category::Other
}
fn is_media_extension(lower_name: &str) -> bool {
const MEDIA_EXTENSIONS: &[&str] = &[
".jpg", ".jpeg", ".png", ".gif", ".bmp", ".svg", ".webp", ".ico", ".tiff", ".heic", ".heif",
".psd", ".raw", ".arw", ".cr2", ".nef", ".dng",
".mp4", ".avi", ".mkv", ".mov", ".wmv", ".flv", ".webm", ".m4v", ".3gp",
".mp3", ".wav", ".flac", ".aac", ".ogg", ".wma", ".m4a", ".opus", ".aiff",
];
MEDIA_EXTENSIONS.iter().any(|ext| lower_name.ends_with(ext))
}
fn is_vm_image_extension(lower_name: &str) -> bool {
const VM_IMAGE_EXTENSIONS: &[&str] = &[".vdi", ".vmdk", ".qcow2", ".vhd", ".vhdx", ".iso"];
VM_IMAGE_EXTENSIONS
.iter()
.any(|ext| lower_name.ends_with(ext))
}
fn is_installer_extension(lower_name: &str) -> bool {
const INSTALLER_EXTENSIONS: &[&str] = &[
".dmg", ".pkg", ".msi", ".exe", ".deb", ".rpm", ".appimage", ".snap", ".flatpak", ".apk", ];
INSTALLER_EXTENSIONS
.iter()
.any(|ext| lower_name.ends_with(ext))
}
fn is_archive_extension(lower_name: &str) -> bool {
const ARCHIVE_EXTENSIONS: &[&str] = &[
".zip", ".tar", ".tgz", ".tbz2", ".txz", ".gz", ".bz2", ".xz", ".7z", ".rar", ".zst",
];
ARCHIVE_EXTENSIONS
.iter()
.any(|ext| lower_name.ends_with(ext))
}
fn is_backup_extension(lower_name: &str) -> bool {
const BACKUP_EXTENSIONS: &[&str] = &[".bak", ".backup", ".old"];
BACKUP_EXTENSIONS
.iter()
.any(|ext| lower_name.ends_with(ext))
}
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn classifies_known_directories() {
assert_eq!(classify_dir("node_modules"), Category::Cache);
assert_eq!(classify_dir("__pycache__"), Category::Cache);
assert_eq!(classify_dir("target"), Category::Build);
assert_eq!(classify_dir("dist"), Category::Build);
assert_eq!(classify_dir("logs"), Category::Log);
assert_eq!(classify_dir(".git"), Category::Vcs);
assert_eq!(classify_dir(".idea"), Category::Ide);
assert_eq!(classify_dir("src"), Category::Other);
}
#[test]
fn classifies_directory_names_case_insensitively() {
assert_eq!(classify_dir("Node_Modules"), Category::Cache);
assert_eq!(classify_dir(".GIT"), Category::Vcs);
}
#[test]
fn classifies_language_toolchains_as_cache() {
assert_eq!(classify_dir(".rustup"), Category::Cache);
assert_eq!(classify_dir(".pyenv"), Category::Cache);
assert_eq!(classify_dir(".nvm"), Category::Cache);
assert_eq!(classify_dir("mise"), Category::Cache);
assert_eq!(classify_dir("pipx"), Category::Cache);
assert_eq!(classify_dir(".docker"), Category::Cache);
assert_eq!(classify_dir("vm_bundles"), Category::Cache);
}
#[test]
fn classifies_additional_ide_and_vcs() {
assert_eq!(classify_dir(".vscode-insiders"), Category::Ide);
assert_eq!(classify_dir(".cursor"), Category::Ide);
assert_eq!(classify_dir(".zed"), Category::Ide);
assert_eq!(classify_dir(".jj"), Category::Vcs);
}
#[test]
fn partial_match_catches_cache_directories() {
assert_eq!(classify_dir("GPUCache"), Category::Cache);
assert_eq!(classify_dir("Code Cache"), Category::Cache);
}
#[test]
fn classifies_files_by_extension() {
assert_eq!(classify_file("debug.log"), Category::Log);
assert_eq!(classify_file("photo.JPG"), Category::Media);
assert_eq!(classify_file("video.mp4"), Category::Media);
assert_eq!(classify_file("song.mp3"), Category::Media);
assert_eq!(classify_file("main.rs"), Category::Other);
}
#[test]
fn typescript_files_are_not_media() {
assert_eq!(classify_file("index.ts"), Category::Other);
assert_eq!(classify_file("App.tsx"), Category::Other);
assert_eq!(classify_file("eleventy.config.ts"), Category::Other);
}
#[test]
fn ai_model_stores_classify_as_model_cache() {
assert_eq!(classify_dir(".ollama"), Category::ModelCache);
assert_eq!(classify_dir(".lmstudio"), Category::ModelCache);
assert_eq!(classify_dir(".huggingface"), Category::ModelCache);
}
#[test]
fn time_machine_backup_directories_classify_as_backup() {
assert_eq!(classify_dir("Time Machine Backups"), Category::Backup);
assert_eq!(classify_dir("Backups.backupdb"), Category::Backup);
}
#[test]
fn installer_files_classify_as_installer() {
assert_eq!(classify_file("Codex.dmg"), Category::Installer);
assert_eq!(classify_file("googlechrome.dmg"), Category::Installer);
assert_eq!(classify_file("setup.exe"), Category::Installer);
assert_eq!(classify_file("package.deb"), Category::Installer);
assert_eq!(classify_file("MyApp.AppImage"), Category::Installer);
}
#[test]
fn vm_images_classify_as_vm_image() {
assert_eq!(classify_file("disk.vdi"), Category::VmImage);
assert_eq!(classify_file("disk.vmdk"), Category::VmImage);
assert_eq!(classify_file("disk.qcow2"), Category::VmImage);
assert_eq!(classify_file("data.img.raw"), Category::VmImage);
}
#[test]
fn raw_photo_classifies_as_media() {
assert_eq!(classify_file("DSC0001.raw"), Category::Media);
assert_eq!(classify_file("DSC0001.arw"), Category::Media);
assert_eq!(classify_file("data.img.raw"), Category::VmImage);
}
#[test]
fn archive_files_classify_as_archive() {
assert_eq!(classify_file("snapshot.zip"), Category::Archive);
assert_eq!(classify_file("source.tar.gz"), Category::Archive);
assert_eq!(classify_file("source.tgz"), Category::Archive);
assert_eq!(classify_file("blob.7z"), Category::Archive);
assert_eq!(classify_file("data.zst"), Category::Archive);
}
#[test]
fn backup_files_classify_as_backup() {
assert_eq!(classify_file("config.bak"), Category::Backup);
assert_eq!(classify_file("notes.old"), Category::Backup);
}
#[test]
fn tier_split_matches_intent() {
for c in [
Category::Cache,
Category::Build,
Category::Log,
Category::Media,
Category::Vcs,
Category::Ide,
Category::Other,
] {
assert_eq!(c.tier(), Tier::Core, "{c:?} should be Core");
}
for c in [
Category::Archive,
Category::Installer,
Category::VmImage,
Category::ModelCache,
Category::Backup,
] {
assert_eq!(c.tier(), Tier::Extended, "{c:?} should be Extended");
}
}
}