disky 0.11.0

Fast macOS disk analyzer and cleanup CLI in Rust — ncdu / dust / GrandPerspective alternative with snapshot diff, agent-native JSON, and Trash-restorable cleanup.
Documentation
//! Static JSON-Schema-ish descriptor for disky's machine-facing surface.
//! Emitted via `disky schema` so agents can bind to commands and output
//! shapes without prompt-engineering.

use serde_json::{json, Value};

use crate::cleanup::TARGETS;
use crate::query::SCHEMA_VERSION;

pub fn document() -> Value {
    json!({
        "schema_version": SCHEMA_VERSION,
        "tool": "disky",
        "version": env!("CARGO_PKG_VERSION"),
        "commands": commands(),
        "records": records(),
        "errors": errors(),
        "snapshot_refs": {
            "description": "Accepted by every query command and MCP tool",
            "forms": ["@latest", "<id like 2026-05-15_11-56>", "<filesystem path>"],
        }
    })
}

fn commands() -> Value {
    json!([
        {
            "name": "scan", "args": {
                "path": "string (default '/')",
                "db": "string? (default auto-named in data dir)",
                "emit_top": "int?",
                "emit_dirs": "int?",
                "emit_ext": "int?",
                "emit_stats": "bool"
            },
            "stderr": "NDJSON {start,progress,done} when stderr piped, else spinner",
            "output": "scan_bundle when any emit_* flag is set"
        },
        { "name": "top",    "args": snapshot_with(&["limit:int=50", "min_size:int=0"]), "output": "FileRow[]" },
        { "name": "dirs",   "args": snapshot_with(&["limit:int=30"]),                   "output": "DirRow[]" },
        { "name": "ext",    "args": snapshot_with(&["limit:int=30"]),                   "output": "ExtRow[]" },
        { "name": "find",   "args": snapshot_with(&["pattern:string", "limit:int=50"]), "output": "FileRow[]" },
        { "name": "stats",  "args": snapshot_with(&["summarize:bool", "raw:bool"]),     "output": "Stats | Scalar (with --summarize) | raw u64 (with --raw)" },
        { "name": "query",  "args": snapshot_with(&["sql:string", "limit:int=1000"]),    "output": "Object[]" },
        { "name": "list",   "args": {},                                                  "output": "Snapshot[]" },
        {
            "name": "diff",
            "args": { "a": "@latest|<id>|<path>", "b": "@latest|<id>|<path>", "limit": "int=100" },
            "output": "DiffRow[]"
        },
        {
            "name": "filter",
            "args": {
                "where": "predicate string (e.g. \"size > 1GB AND ext = 'log'\")",
                "limit": "int=1000"
            },
            "input": "JSON envelope on stdin — kinds: top, find, dirs, ext, empty, old, filter, growth",
            "output": "kind='filter' envelope: {schema_version, kind, input_kind, records}"
        },
        {
            "name": "growth",
            "args": { "since": "@latest|@latest~N|<id>|<path> (default @latest~1)", "until": "@latest|<id>|<path> (default @latest)", "over": "duration? (e.g. 7d) — overrides --since", "over_n": "int? — N>=3 enables OLS fit over N most-recent snapshots (kind='growth_n')", "fill_target": "u64? — free-byte budget for projected_fill_date (only with --over-n)", "limit": "int=50" },
            "output": "Two shapes. Default (no --over-n): GrowthRow[] under kind='growth'. Records: {path, kind: grew|shrank|added|removed, size_a:u64, size_b:u64, delta_bytes:i64, rate_bytes_per_day:f64, days_between:f64}. With --over-n N: GrowthNRow[] under kind='growth_n' with envelope fields {schema_version, kind, n_snapshots, fill_target, records}. Records: {path, slope_bytes_per_day:i64, r2:f64, latest_bytes:u64, n_snapshots, sample_paths_ts:[(unix_secs:i64, bytes:u64)], projected_fill_date?:rfc3339}"
        },
        {
            "name": "churn",
            "args": { "over": "duration (default 24h)", "snapshot": "@latest|<id>|<path>", "limit": "int=50" },
            "output": "ChurnRow[] under kind='churn'. Records: {path, recent_files:u64, recent_bytes:u64, total_files:u64, total_bytes:u64, churn_score:f64 (0..1)}"
        },
        { "name": "empty", "args": snapshot_with(&["limit:int=100"]), "output": "FileRow[] under kind='empty'" },
        { "name": "old", "args": snapshot_with(&["older_than:duration", "limit:int=100"]), "output": "FileRow[] under kind='old' (older_than: 30d|2w|6mo|1y syntax)" },
        {
            "name": "forget",
            "args": {
                "keep_last": "int?",
                "keep_daily": "int?",
                "keep_weekly": "int?",
                "keep_monthly": "int?",
                "keep_yearly": "int?",
                "apply": "bool (default false — dry-run)"
            },
            "output": "forget envelope: {kind:'forget', applied:bool, kept:[KeptSnapshot], removed:[SnapshotMeta], skipped_unparseable:[string], total_removed_bytes:u64}"
        },
        {
            "name": "cleanup",
            "args": {
                "target": "string[] (comma-separated)",
                "snapshot": "@latest|<id>|<path>",
                "apply": "bool (default false — dry-run unless set)",
                "reversible": "bool (default false — with apply, trash instead of rm)"
            },
            "output": "CleanupHit[]",
            "targets": TARGETS.iter().map(|(n, b)| json!({"name": n, "basenames": b})).collect::<Vec<_>>()
        }
    ])
}

fn snapshot_with(extras: &[&str]) -> Value {
    let mut m = serde_json::Map::new();
    m.insert(
        "snapshot".into(),
        Value::String("@latest|<id>|<path>".into()),
    );
    for e in extras {
        if let Some((k, v)) = e.split_once(':') {
            m.insert(k.into(), Value::String(v.into()));
        }
    }
    Value::Object(m)
}

fn records() -> Value {
    json!({
        "FileRow":    { "path": "string", "size": "u64", "ext": "string?", "mtime": "string? (RFC3339 UTC)" },
        "files_table": { "columns": "path, name, ext, size, physical_size, mtime, is_dir, depth", "note": "physical_size = st_blocks*512 on Unix; differs wildly from size for APFS sparse files (e.g. OrbStack data.img: 8.8TB logical vs 13GB physical)" },
        "DirRow":     { "path": "string", "total_size": "u64" },
        "ExtRow":     { "ext": "string", "files": "u64", "total_size": "u64" },
        "Stats":      {
            "files": "u64", "dirs": "u64", "total_bytes": "u64",
            "largest_bytes": "u64", "avg_bytes": "u64",
            "partial": "bool",
            "scan_root": "string?", "scan_duration_s": "i64?", "scanned_at": "string? (RFC3339 UTC)"
        },
        "Scalar":     { "bytes": "u64", "files": "u64" },
        "Snapshot":   { "path": "string", "id": "string?", "bytes": "u64" },
        "CleanupHit":      { "category": "string", "path": "string", "bytes": "u64", "files": "u64" },
        "CategorySummary": { "category": "string", "paths": "u64", "bytes": "u64", "files": "u64" },
        "DiffRow":    { "path": "string", "kind": "added|removed|grew|shrank", "size_a": "u64", "size_b": "u64", "delta": "i64" },
        "GrowthRow":  { "path": "string", "kind": "grew|shrank|added|removed", "size_a": "u64", "size_b": "u64", "delta_bytes": "i64", "rate_bytes_per_day": "f64", "days_between": "f64" },
        "ChurnRow":   { "path": "string", "recent_files": "u64", "recent_bytes": "u64", "total_files": "u64", "total_bytes": "u64", "churn_score": "f64 (0..1)" },
        "envelope":   { "schema_version": "u32", "kind": "string", "records": "T[]" },
        "error":      { "schema_version": "u32", "type": "string (URI)", "title": "string", "status": "i32", "detail": "string", "retryable": "bool", "instance": "string (per-error UUID, RFC 9457 instance)" }
    })
}

#[cfg(test)]
#[allow(clippy::items_after_test_module)]
mod tests {
    use super::*;

    #[test]
    fn document_top_level_keys_are_stable() {
        let doc = document();
        assert_eq!(doc["tool"], "disky");
        assert_eq!(doc["schema_version"], SCHEMA_VERSION);
        assert!(doc["commands"].is_array());
        assert!(doc["records"].is_object());
        assert!(doc["errors"].is_array());
        assert!(doc["snapshot_refs"]["forms"].is_array());
    }

    #[test]
    fn every_exit_code_has_schema_entry() {
        let doc = document();
        let codes: std::collections::HashSet<i64> = doc["errors"]
            .as_array()
            .unwrap()
            .iter()
            .map(|e| e["code"].as_i64().unwrap())
            .collect();
        // ExitCode 0..=6 inclusive
        for c in 0..=6 {
            assert!(codes.contains(&c), "missing error code {} in schema", c);
        }
    }

    #[test]
    fn record_shapes_include_core_types() {
        let doc = document();
        let records = &doc["records"];
        for name in [
            "FileRow",
            "ExtRow",
            "DirRow",
            "Stats",
            "CleanupHit",
            "DiffRow",
        ] {
            assert!(records[name].is_object(), "missing record {}", name);
        }
    }
}

fn errors() -> Value {
    json!([
        { "code": 0, "slug": "ok",           "type": "https://disky.dev/errors/ok",           "retryable": false },
        { "code": 1, "slug": "generic",      "type": "https://disky.dev/errors/generic",      "retryable": false },
        { "code": 2, "slug": "usage",        "type": "https://disky.dev/errors/usage",        "retryable": false },
        { "code": 3, "slug": "io",           "type": "https://disky.dev/errors/io",           "retryable": true  },
        { "code": 4, "slug": "not-found",    "type": "https://disky.dev/errors/not-found",    "retryable": false },
        { "code": 5, "slug": "partial-scan", "type": "https://disky.dev/errors/partial-scan", "retryable": false },
        { "code": 6, "slug": "lock-held",    "type": "https://disky.dev/errors/lock-held",    "retryable": true  }
    ])
}