1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
use clap::{Parser, Subcommand, ValueEnum};
use disky::render::Format;
#[derive(Parser)]
#[command(name = "disky", about = "Fast macOS disk analyzer", version)]
pub struct Cli {
/// Output format. Auto = JSON when stdout is piped, text on a TTY.
#[arg(long, value_enum, global = true)]
pub format: Option<FormatArg>,
/// Use physical disk usage (st_blocks * 512) instead of logical size
/// (st_size). Critical for APFS sparse files — OrbStack disk images
/// report 8.8 TB logical on a 256 GB SSD; physical shows ~13 GB.
/// Applies to top, dirs, ext, stats. Falls back to logical when
/// physical isn't captured (older snapshots, non-Unix scans).
#[arg(long, global = true)]
pub physical: bool,
#[command(subcommand)]
pub command: Option<Command>,
}
#[derive(Copy, Clone, Debug, ValueEnum)]
pub enum FormatArg {
Text,
Json,
Ndjson,
}
impl From<FormatArg> for Format {
fn from(f: FormatArg) -> Self {
match f {
FormatArg::Text => Format::Text,
FormatArg::Json => Format::Json,
FormatArg::Ndjson => Format::Ndjson,
}
}
}
/// Snapshot reference accepted by every query subcommand: `@latest`, a
/// snapshot ID (`2026-05-15_11-56`), or a filesystem path.
const SNAPSHOT_HELP: &str = "Snapshot to query: @latest, an ID, or a path";
#[derive(Subcommand)]
pub enum Command {
/// Scan a directory and store results
Scan {
/// Path to scan
#[arg(default_value = "/")]
path: String,
/// Output DuckDB file path (default: auto-named in data dir)
#[arg(short, long)]
db: Option<String>,
/// Also emit the top N largest files in the result (cuts a round-trip
/// for agents — avoids needing a separate `disky top` call).
#[arg(long, value_name = "N")]
emit_top: Option<usize>,
/// Also emit the top N directories by aggregated size.
#[arg(long, value_name = "N")]
emit_dirs: Option<usize>,
/// Also emit the top N extensions by total size.
#[arg(long, value_name = "N")]
emit_ext: Option<usize>,
/// Also emit overall stats (root, totals, duration). Implied by any
/// other `--emit-*` flag.
#[arg(long, default_value_t = false)]
emit_stats: bool,
},
/// Show largest files
Top {
#[arg(short, long, default_value = "@latest", help = SNAPSHOT_HELP)]
snapshot: String,
#[arg(short, long, default_value_t = 50)]
limit: usize,
/// Minimum size in bytes
#[arg(short, long, default_value_t = 0)]
min_size: u64,
},
/// Show disk usage by extension
Ext {
#[arg(short, long, default_value = "@latest", help = SNAPSHOT_HELP)]
snapshot: String,
#[arg(short, long, default_value_t = 30)]
limit: usize,
},
/// Show top directories by size
Dirs {
#[arg(short, long, default_value = "@latest", help = SNAPSHOT_HELP)]
snapshot: String,
#[arg(short, long, default_value_t = 30)]
limit: usize,
},
/// Find files matching pattern
Find {
/// Glob pattern (e.g. "*.log")
pattern: String,
#[arg(short, long, default_value = "@latest", help = SNAPSHOT_HELP)]
snapshot: String,
#[arg(short, long, default_value_t = 50)]
limit: usize,
},
/// Show overall disk stats
Stats {
#[arg(short, long, default_value = "@latest", help = SNAPSHOT_HELP)]
snapshot: String,
/// Emit minimal scalar envelope `{kind:"scalar", records:[{bytes,files}]}`.
/// Cuts tokens for agents that only need totals.
#[arg(long)]
summarize: bool,
/// Print only the bare total-bytes integer to stdout (overrides --format).
/// Implies --summarize.
#[arg(long)]
raw: bool,
},
/// Run an arbitrary SQL query against a snapshot
Query {
/// SQL — references the `files` table (`path, name, ext, size, mtime, is_dir, depth`)
sql: String,
#[arg(short, long, default_value = "@latest", help = SNAPSHOT_HELP)]
snapshot: String,
/// Cap on returned rows
#[arg(short, long, default_value_t = 1000)]
limit: usize,
},
/// Find well-known disk-hoggy directories (node_modules, target, …).
/// Defaults to dry-run; pass `--apply` to delete.
Cleanup {
/// Comma-separated target categories (default: all known)
#[arg(short, long, value_delimiter = ',')]
target: Vec<String>,
#[arg(short, long, default_value = "@latest", help = SNAPSHOT_HELP)]
snapshot: String,
#[arg(short, long, default_value_t = 100)]
limit: usize,
/// Actually delete the listed paths (default: dry-run)
#[arg(long, default_value_t = false)]
apply: bool,
/// With `--apply`, move paths to ~/.Trash instead of permanently
/// deleting them so they can be restored.
#[arg(long, default_value_t = false)]
reversible: bool,
},
/// Diff two snapshots — added / removed / grew / shrank files
Diff {
/// Snapshot A (the "before"). Accepts @latest, ID, or path.
a: String,
/// Snapshot B (the "after"). Accepts @latest, ID, or path.
b: String,
#[arg(short, long, default_value_t = 100)]
limit: usize,
},
/// Emit a JSON descriptor of every command, record shape, and error type
Schema,
/// Open interactive TUI (default when no subcommand given)
Tui {
/// Snapshot to load (default: @latest)
#[arg(short, long)]
snapshot: Option<String>,
},
/// List available snapshots
List,
/// Filter records from a prior disky JSON envelope (stdin) by a
/// simple predicate. Composes with any command that emits records.
///
/// Example: `disky top --format json | disky filter --where "size > 1GB"`
///
/// Supported fields: size (u64), ext (str), name (str), path (str).
/// Supported ops: =, !=, >, <, >=, <=, LIKE.
/// Literals: integers (with optional KB/MB/GB/TB suffix) or quoted
/// strings. Chain with AND. Mutually exclusive with `--snapshot`.
Filter {
/// Predicate string. Example: `size > 1GB AND ext = 'log'`.
#[arg(long = "where")]
where_: Option<String>,
/// Cap records returned (after filtering).
#[arg(short, long, default_value_t = 1000)]
limit: usize,
},
/// Per-directory growth between two snapshots. Default compares
/// `@latest` against `@latest~1` so agents see "what grew since the
/// previous scan". Rate is bytes/day computed from snapshot timestamps.
/// Pass `--over <DURATION>` to auto-pick the oldest snapshot within
/// the window (e.g. `--over 7d`).
Growth {
/// Earlier snapshot. Accepts @latest, @latest~N, ID, or path.
#[arg(long, default_value = "@latest~1")]
since: String,
/// Later snapshot (default @latest).
#[arg(long, default_value = "@latest")]
until: String,
/// Auto-pick oldest snapshot whose age >= this duration. Overrides
/// --since. Format: `7d`, `2w`, `6mo`, `1y` (see duration.rs).
#[arg(long, value_name = "DURATION")]
over: Option<String>,
/// N-snapshot OLS fit. When set (>= 3), runs ordinary-least-squares
/// against the N most-recent snapshots instead of a 2-snapshot diff.
/// Emits `kind="growth_n"` envelope with slope/R²/projected fill date.
#[arg(long, value_name = "N")]
over_n: Option<usize>,
/// Free-byte budget the projection extrapolates against. Only used
/// with `--over-n`. Default: free bytes on the volume that holds
/// `$HOME` (best-effort; may be 0 if `statvfs` fails).
#[arg(long, value_name = "BYTES")]
fill_target: Option<u64>,
#[arg(short, long, default_value_t = 50)]
limit: usize,
},
/// Predict when the disk fills based on linear regression over all
/// snapshots in the data dir. Pass `--free-bytes <N>` so the fit
/// can compute a fill-by date.
Predict {
/// Bytes currently free on the volume. Get via
/// `df -k / | tail -1 | awk '{print $4*1024}'`.
#[arg(long, value_name = "BYTES")]
free_bytes: Option<u64>,
},
/// Per-directory churn — files modified within the last N hours/days.
/// Identifies log generators and hot working directories.
Churn {
/// Time window (e.g. `24h`, `7d`, `30d`).
#[arg(long, default_value = "24h")]
over: String,
#[arg(short, long, default_value = "@latest", help = SNAPSHOT_HELP)]
snapshot: String,
#[arg(short, long, default_value_t = 50)]
limit: usize,
},
/// List empty files in a snapshot (size = 0). Useful for finding
/// placeholders, leftover lockfiles, and interrupted-write detritus.
Empty {
#[arg(short, long, default_value = "@latest", help = SNAPSHOT_HELP)]
snapshot: String,
#[arg(short, long, default_value_t = 100)]
limit: usize,
},
/// List files older than the given duration (e.g. `365d`, `6mo`, `2y`).
/// Excludes files whose mtime is unknown.
Old {
/// Cutoff age — `30d`, `2w`, `6mo`, `1y`.
#[arg(long, value_name = "DURATION")]
older_than: String,
#[arg(short, long, default_value = "@latest", help = SNAPSHOT_HELP)]
snapshot: String,
#[arg(short, long, default_value_t = 100)]
limit: usize,
},
/// Apply restic-style retention policy to snapshots. Default is dry-run;
/// pass `--apply` to delete. Refuses to run with no `--keep-*` flag.
Forget {
#[arg(long, value_name = "N")]
keep_last: Option<usize>,
#[arg(long, value_name = "N")]
keep_daily: Option<usize>,
#[arg(long, value_name = "N")]
keep_weekly: Option<usize>,
#[arg(long, value_name = "N")]
keep_monthly: Option<usize>,
#[arg(long, value_name = "N")]
keep_yearly: Option<usize>,
#[arg(long, default_value_t = false)]
apply: bool,
},
}