Skip to main content

hexz_cli/
args.rs

1use clap::{Parser, Subcommand};
2use std::path::PathBuf;
3
4/// Hexz - High-performance snapshot and streaming engine
5#[derive(Parser)]
6#[command(name = "hexz", version, about, long_about = None)]
7#[command(disable_help_flag = true)] // We handle help manually
8#[command(styles = get_styles())]
9pub struct Cli {
10    #[arg(short, long, action = clap::ArgAction::SetTrue)]
11    pub help: bool,
12
13    #[command(subcommand)]
14    pub command: Option<Commands>,
15}
16
17fn get_styles() -> clap::builder::Styles {
18    use clap::builder::styling::{AnsiColor, Effects, Styles};
19    Styles::styled()
20        .header(AnsiColor::Yellow.on_default() | Effects::BOLD)
21        .usage(AnsiColor::Green.on_default() | Effects::BOLD)
22        .literal(AnsiColor::Cyan.on_default() | Effects::BOLD)
23        .placeholder(AnsiColor::Cyan.on_default())
24}
25
26/// Top-level command categories
27#[derive(Subcommand)]
28pub enum Commands {
29    // ------------------------------------------------------------------------
30    // Archive Operations
31    // ------------------------------------------------------------------------
32    /// Pack data into a Hexz archive
33    #[command(display_order = 1)]
34    #[command(
35        long_about = "Creates a highly compressed, encrypted, and deduplicated archive from a disk image or memory dump.\n\nIt uses Content-Defined Chunking (CDC) to ensure that only changed weights are stored when archiving multiple versions of a model. This is the primary way to ingest data into Hexz."
36    )]
37    #[command(after_help = "hexz pack model.hxz --disk ./model.bin --compression zstd --cdc")]
38    Pack {
39        /// Output archive path (.hxz)
40        output: PathBuf,
41
42        /// Path to disk image to pack
43        #[arg(long)]
44        disk: Option<PathBuf>,
45
46        /// Path to memory dump to pack
47        #[arg(long)]
48        memory: Option<PathBuf>,
49
50        /// Compression algorithm (lz4, zstd, none)
51        #[arg(long, default_value = "lz4")]
52        compression: String,
53
54        /// Enable encryption
55        #[arg(long)]
56        encrypt: bool,
57
58        /// Train compression dictionary
59        #[arg(long)]
60        train_dict: bool,
61
62        /// Block size in bytes (must be > 0)
63        #[arg(long, default_value_t = 65536, value_parser = clap::value_parser!(u32).range(1..))]
64        block_size: u32,
65
66        /// Enable content-defined chunking (CDC)
67        #[arg(long)]
68        cdc: bool,
69
70        /// Minimum chunk size for CDC
71        #[arg(long, default_value_t = 16384, value_parser = clap::value_parser!(u32).range(1..))]
72        min_chunk: u32,
73
74        /// Average chunk size for CDC
75        #[arg(long, default_value_t = 65536, value_parser = clap::value_parser!(u32).range(1..))]
76        avg_chunk: u32,
77
78        /// Maximum chunk size for CDC
79        #[arg(long, default_value_t = 131072, value_parser = clap::value_parser!(u32).range(1..))]
80        max_chunk: u32,
81
82        /// Suppress all output and progress bars
83        #[arg(long, short)]
84        silent: bool,
85    },
86
87    /// Inspect archive metadata
88    #[command(display_order = 2)]
89    #[command(
90        long_about = "Reads the header and index of a Hexz archive without decompressing the full body.\n\nUse this to verify archive integrity, check compression ratios, or view metadata about the stored snapshot."
91    )]
92    #[command(after_help = "hexz inspect ./model.hxz --json")]
93    Inspect {
94        /// Path to archive
95        snap: PathBuf,
96
97        /// Output as JSON
98        #[arg(long)]
99        json: bool,
100    },
101
102    /// Compare block hashes between two archives
103    #[command(display_order = 3)]
104    #[command(
105        long_about = "Compares the BLAKE3 block hashes of two Hexz archives.\n\nReports how much data is shared between them, unique to each, and the storage savings achieved through deduplication. Useful for understanding how much a fine-tuned checkpoint differs from its base."
106    )]
107    #[command(after_help = "hexz diff base.hxz finetuned.hxz")]
108    Diff {
109        /// First archive
110        a: PathBuf,
111
112        /// Second archive
113        b: PathBuf,
114    },
115
116    /// List archives in a directory as a lineage tree
117    #[command(display_order = 4)]
118    #[command(
119        long_about = "Scans a directory for .hxz archives and renders their parent-child relationships as a tree.\n\nParent links are read from each archive's header. Archives whose declared parent lives outside the scanned directory are annotated as external."
120    )]
121    #[command(after_help = "hexz ls ./checkpoints/")]
122    Ls {
123        /// Directory to scan
124        dir: PathBuf,
125    },
126
127    /// Inspect a FUSE overlay file
128    #[cfg(feature = "diagnostics")]
129    #[command(display_order = 30)]
130    #[command(
131        long_about = "Analyzes the overlay file created by a FUSE read-write mount.\n\nShows which 4 KiB blocks were written during the session and the total amount of changed data."
132    )]
133    #[command(after_help = "hexz overlay vm-state.overlay --blocks")]
134    Overlay {
135        /// Path to overlay file
136        overlay: PathBuf,
137
138        /// Show block count and total changed size
139        #[arg(long)]
140        blocks: bool,
141
142        /// List individual modified block indices
143        #[arg(long)]
144        files: bool,
145    },
146
147    /// Build archive from source directory
148    #[command(display_order = 4)]
149    #[command(
150        long_about = "Recursively builds a Hexz archive from a local directory structure.\n\nUnlike 'pack' which handles raw disk images, 'build' is designed for file-system level packing."
151    )]
152    #[command(after_help = "hexz build ./checkpoints/ archive.hxz")]
153    Build {
154        /// Source directory
155        source: PathBuf,
156
157        /// Output archive path
158        output: PathBuf,
159
160        /// Optional memory dump
161        #[arg(long)]
162        memory: Option<PathBuf>,
163
164        /// Build profile
165        #[arg(long)]
166        profile: Option<String>,
167
168        /// Enable encryption
169        #[arg(long)]
170        encrypt: bool,
171
172        /// Enable CDC
173        #[arg(long)]
174        cdc: bool,
175    },
176
177    /// Analyze archive structure
178    #[cfg(feature = "diagnostics")]
179    #[command(display_order = 5)]
180    #[command(
181        long_about = "Performs a deep structural analysis of the archive format.\n\nUsed primarily for debugging corruption issues or optimizing block alignment strategies."
182    )]
183    #[command(after_help = "hexz analyze ./corrupt_image.hxz")]
184    Analyze {
185        /// Archive to analyze
186        input: PathBuf,
187    },
188
189    /// Convert external formats to Hexz snapshot
190    #[command(display_order = 6)]
191    #[command(
192        long_about = "Ingests external formats like tar, HDF5, or WebDataset into a Hexz snapshot.\n\nThis allows legacy datasets to benefit from Hexz's random access and deduplication features."
193    )]
194    #[command(after_help = "hexz convert tar data.tar data.hxz")]
195    Convert {
196        /// Source format (tar, hdf5, webdataset)
197        format: String,
198
199        /// Input file path
200        input: PathBuf,
201
202        /// Output snapshot path (.hxz)
203        output: PathBuf,
204
205        /// Compression algorithm (lz4, zstd)
206        #[arg(long, default_value = "lz4")]
207        compression: String,
208
209        /// Block size in bytes
210        #[arg(long, default_value_t = 65536)]
211        block_size: u32,
212
213        /// Build profile (ml, eda, embedded, generic, archival)
214        #[arg(long)]
215        profile: Option<String>,
216
217        /// Suppress output
218        #[arg(long, short)]
219        silent: bool,
220    },
221
222    // ------------------------------------------------------------------------
223    // Virtual Machine Operations
224    // ------------------------------------------------------------------------
225    /// Boot a virtual machine from snapshot
226    #[cfg(feature = "fuse")]
227    #[command(display_order = 10)]
228    #[command(
229        long_about = "Boots a transient Virtual Machine directly from a Hexz snapshot.\n\nThe VM uses a copy-on-write overlay, meaning the original snapshot remains immutable. Changes are lost on shutdown unless --persist is used."
230    )]
231    #[command(after_help = "hexz boot ubuntu.hxz --ram 4G --no-graphics")]
232    Boot {
233        /// Snapshot to boot from
234        snap: String,
235
236        /// RAM size (e.g., "4G")
237        #[arg(long)]
238        ram: Option<String>,
239
240        /// Disable KVM acceleration
241        #[arg(long)]
242        no_kvm: bool,
243
244        /// Network mode (user, bridge, none)
245        #[arg(long, default_value = "user")]
246        network: String,
247
248        /// Hypervisor backend (qemu, firecracker)
249        #[arg(long, default_value = "qemu")]
250        backend: String,
251
252        /// Persistent overlay path
253        #[arg(long)]
254        persist: Option<PathBuf>,
255
256        /// QMP socket path for control
257        #[arg(long)]
258        qmp_socket: Option<PathBuf>,
259
260        /// Disable graphics (headless mode)
261        #[arg(long)]
262        no_graphics: bool,
263
264        /// Enable VNC server
265        #[arg(long)]
266        vnc: bool,
267    },
268
269    /// Install OS from ISO to snapshot
270    #[cfg(feature = "fuse")]
271    #[command(display_order = 11)]
272    #[command(
273        long_about = "Runs an OS installer from an ISO and captures the result into a new Hexz snapshot.\n\nThis automates the process of creating base images for VMs."
274    )]
275    #[command(after_help = "hexz install alpine.iso alpine-base.hxz")]
276    Install {
277        /// Path to ISO image
278        iso: PathBuf,
279
280        /// Output snapshot path
281        output: PathBuf,
282
283        /// Virtual disk size (e.g., "10G")
284        #[arg(long, default_value = "10G")]
285        primary_size: String,
286
287        /// RAM size (e.g., "4G")
288        #[arg(long, default_value = "4G")]
289        ram: String,
290
291        /// Disable graphics
292        #[arg(long)]
293        no_graphics: bool,
294
295        /// Enable VNC
296        #[arg(long)]
297        vnc: bool,
298
299        /// Enable CDC
300        #[arg(long)]
301        cdc: bool,
302    },
303
304    /// Create snapshot via QMP
305    #[cfg(unix)]
306    #[command(display_order = 12)]
307    #[command(
308        long_about = "Triggers a live snapshot of a running VM via the QMP socket.\n\nThis allows for capturing the state of a running system without shutting it down."
309    )]
310    #[command(after_help = "hexz snap /tmp/qmp.sock base.hxz overlay.bin live.hxz")]
311    Snap {
312        /// QMP socket path
313        socket: PathBuf,
314
315        /// Base snapshot
316        base: PathBuf,
317
318        /// Overlay path
319        overlay: PathBuf,
320
321        /// Output snapshot
322        output: PathBuf,
323    },
324
325    /// Commit overlay changes to new snapshot
326    #[command(display_order = 13)]
327    #[command(
328        long_about = "Finalizes a writable overlay into a new immutable snapshot.\n\nSupports 'thin' snapshots which only store the deltas referencing the parent, ideal for iterative model fine-tuning."
329    )]
330    #[command(after_help = "hexz commit base.hxz overlay.bin new_model.hxz --thin")]
331    Commit {
332        /// Base snapshot
333        base: PathBuf,
334
335        /// Overlay with changes
336        overlay: PathBuf,
337
338        /// Output snapshot
339        output: PathBuf,
340
341        /// Compression algorithm
342        #[arg(long, default_value = "lz4")]
343        compression: String,
344
345        /// Block size (must be > 0)
346        #[arg(long, default_value_t = 65536, value_parser = clap::value_parser!(u32).range(1..))]
347        block_size: u32,
348
349        /// Keep overlay file after commit
350        #[arg(long)]
351        keep_overlay: bool,
352
353        /// Flatten all layers into single archive
354        #[arg(long)]
355        flatten: bool,
356
357        /// Commit message
358        #[arg(long)]
359        message: Option<String>,
360
361        /// Create thin snapshot (reference base)
362        #[arg(long)]
363        thin: bool,
364    },
365
366    /// Mount snapshot as filesystem
367    #[cfg(feature = "fuse")]
368    #[command(display_order = 14)]
369    #[command(
370        long_about = "Mounts a Hexz snapshot as a FUSE filesystem.\n\nAllows standard tools to read data from the snapshot as if it were a normal directory."
371    )]
372    #[command(after_help = "hexz mount model.hxz /mnt/model --rw")]
373    Mount {
374        /// Snapshot to mount
375        snap: String,
376
377        /// Mount point directory
378        mountpoint: PathBuf,
379
380        /// Overlay for writes
381        #[arg(long)]
382        overlay: Option<PathBuf>,
383
384        /// Run as daemon
385        #[arg(short, long)]
386        daemon: bool,
387
388        /// Enable read-write mode
389        #[arg(long)]
390        rw: bool,
391
392        /// Cache size (e.g., "1G")
393        #[arg(long)]
394        cache_size: Option<String>,
395
396        /// User ID for files
397        #[arg(long, default_value_t = 1000)]
398        uid: u32,
399
400        /// Group ID for files
401        #[arg(long, default_value_t = 1000)]
402        gid: u32,
403
404        /// Export as NBD device
405        #[arg(long)]
406        nbd: bool,
407    },
408
409    /// Unmount filesystem
410    #[cfg(feature = "fuse")]
411    #[command(display_order = 15)]
412    #[command(long_about = "Unmounts a previously mounted Hexz filesystem.")]
413    #[command(after_help = "hexz unmount /mnt/model")]
414    Unmount {
415        /// Mount point to unmount
416        mountpoint: PathBuf,
417    },
418
419    // ------------------------------------------------------------------------
420    // System & Diagnostics
421    // ------------------------------------------------------------------------
422    /// Run system diagnostics
423    #[cfg(feature = "diagnostics")]
424    #[command(display_order = 20)]
425    #[command(
426        long_about = "Checks the system for compatibility with Hexz features (FUSE, KVM, AVX2, etc.)."
427    )]
428    #[command(after_help = "hexz doctor")]
429    Doctor,
430
431    /// Benchmark archive performance
432    #[cfg(feature = "diagnostics")]
433    #[command(display_order = 21)]
434    #[command(
435        long_about = "Runs read/write benchmarks on a specific archive to test throughput and latency."
436    )]
437    #[command(after_help = "hexz bench model.hxz --threads 4")]
438    Bench {
439        /// Archive to benchmark
440        image: PathBuf,
441
442        /// Block size for testing
443        #[arg(long)]
444        block_size: Option<u32>,
445
446        /// Duration in seconds
447        #[arg(long)]
448        duration: Option<u64>,
449
450        /// Number of threads
451        #[arg(long)]
452        threads: Option<usize>,
453    },
454
455    /// Serve archive over network
456    #[cfg(feature = "server")]
457    #[command(display_order = 22)]
458    #[command(
459        long_about = "Starts an HTTP/S3 compatible server to stream the snapshot over the network.\n\nClients can fetch specific byte ranges efficiently."
460    )]
461    #[command(after_help = "hexz serve model.hxz --port 8080")]
462    Serve {
463        /// Snapshot to serve
464        snap: String,
465
466        /// Server port
467        #[arg(long, default_value_t = 8080)]
468        port: u16,
469
470        /// Run as daemon
471        #[arg(short, long)]
472        daemon: bool,
473
474        /// Enable NBD protocol
475        #[arg(long)]
476        nbd: bool,
477
478        /// Enable S3-compatible API
479        #[arg(long)]
480        s3: bool,
481    },
482
483    /// Generate signing keys
484    #[cfg(feature = "signing")]
485    #[command(display_order = 23)]
486    #[command(long_about = "Generates an Ed25519 keypair for signing Hexz archives.")]
487    #[command(after_help = "hexz keygen --output-dir ~/.hexz/keys")]
488    Keygen {
489        /// Output directory for keys
490        #[arg(short, long)]
491        output_dir: Option<PathBuf>,
492    },
493
494    /// Sign archive
495    #[cfg(feature = "signing")]
496    #[command(display_order = 24)]
497    #[command(long_about = "Cryptographically signs a Hexz archive using a private key.")]
498    #[command(after_help = "hexz sign private.pem model.hxz")]
499    Sign {
500        /// Private key path
501        key: PathBuf,
502
503        /// Archive to sign
504        image: PathBuf,
505    },
506
507    /// Verify archive signature
508    #[cfg(feature = "signing")]
509    #[command(display_order = 25)]
510    #[command(
511        long_about = "Verifies the cryptographic signature of an archive using a public key."
512    )]
513    #[command(after_help = "hexz verify public.pem model.hxz")]
514    Verify {
515        /// Public key path
516        key: PathBuf,
517
518        /// Archive to verify
519        image: PathBuf,
520    },
521}