Skip to main content

hexz_cli/
args.rs

1use clap::{Parser, Subcommand};
2use std::path::PathBuf;
3
4/// Hexz - High-performance snapshot and streaming engine
5#[derive(Parser)]
6#[command(name = "hexz", version, about, long_about = None)]
7#[command(disable_help_flag = true)] // We handle help manually
8#[command(styles = get_styles())]
9pub struct Cli {
10    #[arg(short, long, action = clap::ArgAction::SetTrue)]
11    pub help: bool,
12
13    #[command(subcommand)]
14    pub command: Option<Commands>,
15}
16
17fn get_styles() -> clap::builder::Styles {
18    use clap::builder::styling::{AnsiColor, Effects, Styles};
19    Styles::styled()
20        .header(AnsiColor::Yellow.on_default() | Effects::BOLD)
21        .usage(AnsiColor::Green.on_default() | Effects::BOLD)
22        .literal(AnsiColor::Cyan.on_default() | Effects::BOLD)
23        .placeholder(AnsiColor::Cyan.on_default())
24}
25
26/// Top-level command categories
27#[derive(Subcommand)]
28pub enum Commands {
29    // ------------------------------------------------------------------------
30    // Archive Operations
31    // ------------------------------------------------------------------------
32    /// Pack data into a Hexz archive
33    #[command(display_order = 1)]
34    #[command(
35        long_about = "Creates a highly compressed, encrypted, and deduplicated archive from a disk image or memory dump.\n\nIt uses Content-Defined Chunking (CDC) to ensure that only changed weights are stored when archiving multiple versions of a model. This is the primary way to ingest data into Hexz."
36    )]
37    #[command(after_help = "hexz pack model.hxz --disk ./model.bin --compression zstd")]
38    Pack {
39        /// Output archive path (.hxz)
40        output: PathBuf,
41
42        /// Path to disk image to pack
43        #[arg(long)]
44        disk: Option<PathBuf>,
45
46        /// Path to memory dump to pack
47        #[arg(long)]
48        memory: Option<PathBuf>,
49
50        /// Compression algorithm (lz4, zstd, none)
51        #[arg(long, default_value = "lz4")]
52        compression: String,
53
54        /// Enable encryption
55        #[arg(long)]
56        encrypt: bool,
57
58        /// Train compression dictionary
59        #[arg(long)]
60        train_dict: bool,
61
62        /// Block size in bytes (must be > 0)
63        #[arg(long, default_value_t = 65536, value_parser = clap::value_parser!(u32).range(1..))]
64        block_size: u32,
65
66        /// Minimum CDC chunk size (auto-detected if not specified)
67        #[arg(long, value_parser = clap::value_parser!(u32).range(1..))]
68        min_chunk: Option<u32>,
69
70        /// Average CDC chunk size (auto-detected if not specified)
71        #[arg(long, value_parser = clap::value_parser!(u32).range(1..))]
72        avg_chunk: Option<u32>,
73
74        /// Maximum CDC chunk size (auto-detected if not specified)
75        #[arg(long, value_parser = clap::value_parser!(u32).range(1..))]
76        max_chunk: Option<u32>,
77
78        /// Number of compression worker threads (0 = auto)
79        #[arg(long)]
80        workers: Option<usize>,
81
82        /// Suppress all output and progress bars
83        #[arg(long, short)]
84        silent: bool,
85    },
86
87    /// Inspect archive metadata
88    #[command(display_order = 2)]
89    #[command(
90        long_about = "Reads the header and index of a Hexz archive without decompressing the full body.\n\nUse this to verify archive integrity, check compression ratios, or view metadata about the stored snapshot."
91    )]
92    #[command(after_help = "hexz inspect ./model.hxz --json")]
93    Inspect {
94        /// Path to archive
95        snap: PathBuf,
96
97        /// Output as JSON
98        #[arg(long)]
99        json: bool,
100    },
101
102    /// Compare block hashes between two archives
103    #[command(display_order = 3)]
104    #[command(
105        long_about = "Compares the BLAKE3 block hashes of two Hexz archives.\n\nReports how much data is shared between them, unique to each, and the storage savings achieved through deduplication. Useful for understanding how much a fine-tuned checkpoint differs from its base."
106    )]
107    #[command(after_help = "hexz diff base.hxz finetuned.hxz")]
108    Diff {
109        /// First archive
110        a: PathBuf,
111
112        /// Second archive
113        b: PathBuf,
114    },
115
116    /// List archives in a directory as a lineage tree
117    #[command(display_order = 4)]
118    #[command(
119        long_about = "Scans a directory for .hxz archives and renders their parent-child relationships as a tree.\n\nParent links are read from each archive's header. Archives whose declared parent lives outside the scanned directory are annotated as external."
120    )]
121    #[command(after_help = "hexz ls ./checkpoints/")]
122    Ls {
123        /// Directory to scan
124        dir: PathBuf,
125    },
126
127    /// Pack with profile-based presets
128    #[command(display_order = 4)]
129    #[command(
130        long_about = "Creates a Hexz archive using a named build profile.\n\nProfiles automatically select compression, block size, and dictionary training settings optimized for different workloads (ML, EDA, embedded, generic)."
131    )]
132    #[command(after_help = "hexz build disk.img archive.hxz --profile ml")]
133    Build {
134        /// Source disk image
135        source: PathBuf,
136
137        /// Output archive path
138        output: PathBuf,
139
140        /// Optional memory dump
141        #[arg(long)]
142        memory: Option<PathBuf>,
143
144        /// Build profile
145        #[arg(long)]
146        profile: Option<String>,
147
148        /// Enable encryption
149        #[arg(long)]
150        encrypt: bool,
151    },
152
153    /// Convert external formats to Hexz snapshot
154    #[command(display_order = 6)]
155    #[command(
156        long_about = "Ingests external formats like tar, HDF5, or WebDataset into a Hexz snapshot.\n\nThis allows legacy datasets to benefit from Hexz's random access and deduplication features."
157    )]
158    #[command(after_help = "hexz convert tar data.tar data.hxz")]
159    Convert {
160        /// Source format (tar, hdf5, webdataset)
161        format: String,
162
163        /// Input file path
164        input: PathBuf,
165
166        /// Output snapshot path (.hxz)
167        output: PathBuf,
168
169        /// Compression algorithm (lz4, zstd)
170        #[arg(long, default_value = "lz4")]
171        compression: String,
172
173        /// Block size in bytes
174        #[arg(long, default_value_t = 65536)]
175        block_size: u32,
176
177        /// Build profile (ml, eda, embedded, generic)
178        #[arg(long)]
179        profile: Option<String>,
180
181        /// Suppress output
182        #[arg(long, short)]
183        silent: bool,
184    },
185
186    /// Estimate space savings before packing
187    #[command(display_order = 7)]
188    #[command(
189        long_about = "Quickly estimates the compression and deduplication savings if a raw data file\nwere packed into the Hexz format. Samples blocks without reading the whole file,\nso it completes in seconds even on multi-GB inputs."
190    )]
191    #[command(after_help = "hexz predict model.bin --block-size 65536 --json")]
192    Predict {
193        /// Path to the raw data file to analyze
194        file: PathBuf,
195
196        /// Block size in bytes
197        #[arg(long, default_value_t = 65536)]
198        block_size: u32,
199
200        /// Minimum CDC chunk size (auto-detected if not specified)
201        #[arg(long)]
202        min_chunk: Option<u32>,
203
204        /// Average CDC chunk size (auto-detected if not specified)
205        #[arg(long)]
206        avg_chunk: Option<u32>,
207
208        /// Maximum CDC chunk size (auto-detected if not specified)
209        #[arg(long)]
210        max_chunk: Option<u32>,
211
212        /// Output as JSON
213        #[arg(long)]
214        json: bool,
215    },
216
217    // ------------------------------------------------------------------------
218    // Virtual Machine Operations
219    // ------------------------------------------------------------------------
220    /// Boot a virtual machine from snapshot
221    #[cfg(feature = "fuse")]
222    #[command(display_order = 10)]
223    #[command(
224        long_about = "Boots a transient Virtual Machine directly from a Hexz snapshot.\n\nThe VM uses a copy-on-write overlay, meaning the original snapshot remains immutable. Changes are lost on shutdown unless --persist is used."
225    )]
226    #[command(after_help = "hexz boot ubuntu.hxz --ram 4G --no-graphics")]
227    Boot {
228        /// Snapshot to boot from
229        snap: String,
230
231        /// RAM size (e.g., "4G")
232        #[arg(long)]
233        ram: Option<String>,
234
235        /// Disable KVM acceleration
236        #[arg(long)]
237        no_kvm: bool,
238
239        /// Network mode (user, bridge, none)
240        #[arg(long, default_value = "user")]
241        network: String,
242
243        /// Hypervisor backend (qemu, firecracker)
244        #[arg(long, default_value = "qemu")]
245        backend: String,
246
247        /// Persistent overlay path
248        #[arg(long)]
249        persist: Option<PathBuf>,
250
251        /// QMP socket path for control
252        #[arg(long)]
253        qmp_socket: Option<PathBuf>,
254
255        /// Disable graphics (headless mode)
256        #[arg(long)]
257        no_graphics: bool,
258
259        /// Enable VNC server
260        #[arg(long)]
261        vnc: bool,
262    },
263
264    /// Install OS from ISO to snapshot
265    #[cfg(feature = "fuse")]
266    #[command(display_order = 11)]
267    #[command(
268        long_about = "Runs an OS installer from an ISO and captures the result into a new Hexz snapshot.\n\nThis automates the process of creating base images for VMs."
269    )]
270    #[command(after_help = "hexz install alpine.iso alpine-base.hxz")]
271    Install {
272        /// Path to ISO image
273        iso: PathBuf,
274
275        /// Output snapshot path
276        output: PathBuf,
277
278        /// Virtual disk size (e.g., "10G")
279        #[arg(long, default_value = "10G")]
280        primary_size: String,
281
282        /// RAM size (e.g., "4G")
283        #[arg(long, default_value = "4G")]
284        ram: String,
285
286        /// Disable graphics
287        #[arg(long)]
288        no_graphics: bool,
289
290        /// Enable VNC
291        #[arg(long)]
292        vnc: bool,
293    },
294
295    /// Create snapshot via QMP
296    #[cfg(unix)]
297    #[command(display_order = 12)]
298    #[command(
299        long_about = "Triggers a live snapshot of a running VM via the QMP socket.\n\nThis allows for capturing the state of a running system without shutting it down."
300    )]
301    #[command(after_help = "hexz snap /tmp/qmp.sock base.hxz overlay.bin live.hxz")]
302    Snap {
303        /// QMP socket path
304        socket: PathBuf,
305
306        /// Base snapshot
307        base: PathBuf,
308
309        /// Overlay path
310        overlay: PathBuf,
311
312        /// Output snapshot
313        output: PathBuf,
314    },
315
316    /// Commit overlay changes to new snapshot
317    #[command(display_order = 13)]
318    #[command(
319        long_about = "Finalizes a writable overlay into a new immutable snapshot.\n\nSupports 'thin' snapshots which only store the deltas referencing the parent, ideal for iterative model fine-tuning."
320    )]
321    #[command(after_help = "hexz commit base.hxz overlay.bin new_model.hxz --thin")]
322    Commit {
323        /// Base snapshot
324        base: PathBuf,
325
326        /// Overlay with changes
327        overlay: PathBuf,
328
329        /// Output snapshot
330        output: PathBuf,
331
332        /// Compression algorithm
333        #[arg(long, default_value = "lz4")]
334        compression: String,
335
336        /// Block size (must be > 0)
337        #[arg(long, default_value_t = 65536, value_parser = clap::value_parser!(u32).range(1..))]
338        block_size: u32,
339
340        /// Keep overlay file after commit
341        #[arg(long)]
342        keep_overlay: bool,
343
344        /// Path to memory dump to include
345        #[arg(long)]
346        memory: Option<PathBuf>,
347
348        /// Commit message
349        #[arg(long)]
350        message: Option<String>,
351
352        /// Create thin snapshot (reference base)
353        #[arg(long)]
354        thin: bool,
355    },
356
357    /// Mount snapshot as filesystem
358    #[cfg(feature = "fuse")]
359    #[command(display_order = 14)]
360    #[command(
361        long_about = "Mounts a Hexz snapshot as a FUSE filesystem.\n\nAllows standard tools to read data from the snapshot as if it were a normal directory."
362    )]
363    #[command(after_help = "hexz mount model.hxz /mnt/model --rw")]
364    Mount {
365        /// Snapshot to mount
366        snap: String,
367
368        /// Mount point directory
369        mountpoint: PathBuf,
370
371        /// Overlay for writes
372        #[arg(long)]
373        overlay: Option<PathBuf>,
374
375        /// Run as daemon
376        #[arg(short, long)]
377        daemon: bool,
378
379        /// Enable read-write mode
380        #[arg(long)]
381        rw: bool,
382
383        /// Cache size (e.g., "1G")
384        #[arg(long)]
385        cache_size: Option<String>,
386
387        /// User ID for files
388        #[arg(long, default_value_t = 1000)]
389        uid: u32,
390
391        /// Group ID for files
392        #[arg(long, default_value_t = 1000)]
393        gid: u32,
394
395        /// Export as NBD device
396        #[arg(long)]
397        nbd: bool,
398
399        /// Prefetch window size (number of blocks to read ahead)
400        #[arg(long)]
401        prefetch: Option<u32>,
402    },
403
404    /// Unmount filesystem
405    #[cfg(feature = "fuse")]
406    #[command(display_order = 15)]
407    #[command(long_about = "Unmounts a previously mounted Hexz filesystem.")]
408    #[command(after_help = "hexz unmount /mnt/model")]
409    Unmount {
410        /// Mount point to unmount
411        mountpoint: PathBuf,
412    },
413
414    // ------------------------------------------------------------------------
415    // System & Diagnostics
416    // ------------------------------------------------------------------------
417    /// Run system diagnostics
418    #[cfg(feature = "diagnostics")]
419    #[command(display_order = 20)]
420    #[command(
421        long_about = "Checks the system for compatibility with Hexz features (FUSE, QEMU, network)."
422    )]
423    #[command(after_help = "hexz doctor")]
424    Doctor,
425
426    /// Serve archive over network
427    #[cfg(feature = "server")]
428    #[command(display_order = 22)]
429    #[command(
430        long_about = "Starts an HTTP server to stream the snapshot over the network.\n\nClients can fetch specific byte ranges efficiently."
431    )]
432    #[command(after_help = "hexz serve model.hxz --port 8080")]
433    Serve {
434        /// Snapshot to serve
435        snap: String,
436
437        /// Server port
438        #[arg(long, default_value_t = 8080)]
439        port: u16,
440
441        /// Bind address
442        #[arg(long, default_value = "127.0.0.1")]
443        bind: String,
444
445        /// Run as daemon
446        #[arg(short, long)]
447        daemon: bool,
448
449        /// Enable NBD protocol
450        #[arg(long)]
451        nbd: bool,
452    },
453
454    /// Generate signing keys
455    #[cfg(feature = "signing")]
456    #[command(display_order = 23)]
457    #[command(long_about = "Generates an Ed25519 keypair for signing Hexz archives.")]
458    #[command(after_help = "hexz keygen --output-dir ~/.hexz/keys")]
459    Keygen {
460        /// Output directory for keys
461        #[arg(short, long)]
462        output_dir: Option<PathBuf>,
463    },
464
465    /// Sign archive
466    #[cfg(feature = "signing")]
467    #[command(display_order = 24)]
468    #[command(long_about = "Cryptographically signs a Hexz archive using a private key.")]
469    #[command(after_help = "hexz sign private.pem model.hxz")]
470    Sign {
471        /// Private key path
472        key: PathBuf,
473
474        /// Archive to sign
475        image: PathBuf,
476    },
477
478    /// Verify archive signature
479    #[cfg(feature = "signing")]
480    #[command(display_order = 25)]
481    #[command(
482        long_about = "Verifies the cryptographic signature of an archive using a public key."
483    )]
484    #[command(after_help = "hexz verify public.pem model.hxz")]
485    Verify {
486        /// Public key path
487        key: PathBuf,
488
489        /// Archive to verify
490        image: PathBuf,
491    },
492}