Skip to main content

hexz_cli/
args.rs

1use clap::{Parser, Subcommand};
2use std::path::PathBuf;
3
4/// Hexz - High-performance snapshot and streaming engine
5#[derive(Parser)]
6#[command(name = "hexz", version, about, long_about = None)]
7#[command(disable_help_flag = true)] // We handle help manually
8#[command(styles = get_styles())]
9pub struct Cli {
10    #[arg(short, long, action = clap::ArgAction::SetTrue)]
11    pub help: bool,
12
13    #[command(subcommand)]
14    pub command: Option<Commands>,
15}
16
17fn get_styles() -> clap::builder::Styles {
18    use clap::builder::styling::{AnsiColor, Effects, Styles};
19    Styles::styled()
20        .header(AnsiColor::Yellow.on_default() | Effects::BOLD)
21        .usage(AnsiColor::Green.on_default() | Effects::BOLD)
22        .literal(AnsiColor::Cyan.on_default() | Effects::BOLD)
23        .placeholder(AnsiColor::Cyan.on_default())
24}
25
26/// Top-level command categories
27#[derive(Subcommand)]
28pub enum Commands {
29    // ------------------------------------------------------------------------
30    // Archive Operations
31    // ------------------------------------------------------------------------
32    /// Pack data into a Hexz archive
33    #[command(display_order = 1)]
34    #[command(
35        long_about = "Creates a highly compressed, encrypted, and deduplicated archive from a disk image or memory dump.\n\nIt uses Content-Defined Chunking (CDC) to ensure that only changed weights are stored when archiving multiple versions of a model. This is the primary way to ingest data into Hexz."
36    )]
37    #[command(after_help = "hexz pack model.hxz --disk ./model.bin --compression zstd --cdc")]
38    Pack {
39        /// Output archive path (.hxz)
40        output: PathBuf,
41
42        /// Path to disk image to pack
43        #[arg(long)]
44        disk: Option<PathBuf>,
45
46        /// Path to memory dump to pack
47        #[arg(long)]
48        memory: Option<PathBuf>,
49
50        /// Compression algorithm (lz4, zstd, none)
51        #[arg(long, default_value = "lz4")]
52        compression: String,
53
54        /// Enable encryption
55        #[arg(long)]
56        encrypt: bool,
57
58        /// Train compression dictionary
59        #[arg(long)]
60        train_dict: bool,
61
62        /// Block size in bytes (must be > 0)
63        #[arg(long, default_value_t = 65536, value_parser = clap::value_parser!(u32).range(1..))]
64        block_size: u32,
65
66        /// Enable content-defined chunking (CDC)
67        #[arg(long)]
68        cdc: bool,
69
70        /// Minimum chunk size for CDC
71        #[arg(long, default_value_t = 16384, value_parser = clap::value_parser!(u32).range(1..))]
72        min_chunk: u32,
73
74        /// Average chunk size for CDC
75        #[arg(long, default_value_t = 65536, value_parser = clap::value_parser!(u32).range(1..))]
76        avg_chunk: u32,
77
78        /// Maximum chunk size for CDC
79        #[arg(long, default_value_t = 131072, value_parser = clap::value_parser!(u32).range(1..))]
80        max_chunk: u32,
81
82        /// Suppress all output and progress bars
83        #[arg(long, short)]
84        silent: bool,
85    },
86
87    /// Inspect archive metadata
88    #[command(display_order = 2)]
89    #[command(
90        long_about = "Reads the header and index of a Hexz archive without decompressing the full body.\n\nUse this to verify archive integrity, check compression ratios, or view metadata about the stored snapshot."
91    )]
92    #[command(after_help = "hexz inspect ./model.hxz --json")]
93    Inspect {
94        /// Path to archive
95        snap: PathBuf,
96
97        /// Output as JSON
98        #[arg(long)]
99        json: bool,
100    },
101
102    /// Show differences in overlay
103    #[cfg(feature = "diagnostics")]
104    #[command(display_order = 3)]
105    #[command(
106        long_about = "Analyzes the differences between a base image and an overlay.\n\nThis is useful for auditing what changed in a fine-tuning run or verifying that a thin snapshot only contains the expected deltas."
107    )]
108    #[command(after_help = "hexz diff finetuned.overlay --blocks")]
109    Diff {
110        /// Path to overlay
111        overlay: PathBuf,
112
113        /// Show block-level differences
114        #[arg(long)]
115        blocks: bool,
116
117        /// Show file-level differences
118        #[arg(long)]
119        files: bool,
120    },
121
122    /// Build archive from source directory
123    #[command(display_order = 4)]
124    #[command(
125        long_about = "Recursively builds a Hexz archive from a local directory structure.\n\nUnlike 'pack' which handles raw disk images, 'build' is designed for file-system level packing."
126    )]
127    #[command(after_help = "hexz build ./checkpoints/ archive.hxz")]
128    Build {
129        /// Source directory
130        source: PathBuf,
131
132        /// Output archive path
133        output: PathBuf,
134
135        /// Optional memory dump
136        #[arg(long)]
137        memory: Option<PathBuf>,
138
139        /// Build profile
140        #[arg(long)]
141        profile: Option<String>,
142
143        /// Enable encryption
144        #[arg(long)]
145        encrypt: bool,
146
147        /// Enable CDC
148        #[arg(long)]
149        cdc: bool,
150    },
151
152    /// Analyze archive structure
153    #[cfg(feature = "diagnostics")]
154    #[command(display_order = 5)]
155    #[command(
156        long_about = "Performs a deep structural analysis of the archive format.\n\nUsed primarily for debugging corruption issues or optimizing block alignment strategies."
157    )]
158    #[command(after_help = "hexz analyze ./corrupt_image.hxz")]
159    Analyze {
160        /// Archive to analyze
161        input: PathBuf,
162    },
163
164    /// Convert external formats to Hexz snapshot
165    #[command(display_order = 6)]
166    #[command(
167        long_about = "Ingests external formats like tar, HDF5, or WebDataset into a Hexz snapshot.\n\nThis allows legacy datasets to benefit from Hexz's random access and deduplication features."
168    )]
169    #[command(after_help = "hexz convert tar data.tar data.hxz")]
170    Convert {
171        /// Source format (tar, hdf5, webdataset)
172        format: String,
173
174        /// Input file path
175        input: PathBuf,
176
177        /// Output snapshot path (.hxz)
178        output: PathBuf,
179
180        /// Compression algorithm (lz4, zstd)
181        #[arg(long, default_value = "lz4")]
182        compression: String,
183
184        /// Block size in bytes
185        #[arg(long, default_value_t = 65536)]
186        block_size: u32,
187
188        /// Build profile (ml, eda, embedded, generic, archival)
189        #[arg(long)]
190        profile: Option<String>,
191
192        /// Suppress output
193        #[arg(long, short)]
194        silent: bool,
195    },
196
197    // ------------------------------------------------------------------------
198    // Virtual Machine Operations
199    // ------------------------------------------------------------------------
200    /// Boot a virtual machine from snapshot
201    #[cfg(feature = "fuse")]
202    #[command(display_order = 10)]
203    #[command(
204        long_about = "Boots a transient Virtual Machine directly from a Hexz snapshot.\n\nThe VM uses a copy-on-write overlay, meaning the original snapshot remains immutable. Changes are lost on shutdown unless --persist is used."
205    )]
206    #[command(after_help = "hexz boot ubuntu.hxz --ram 4G --no-graphics")]
207    Boot {
208        /// Snapshot to boot from
209        snap: String,
210
211        /// RAM size (e.g., "4G")
212        #[arg(long)]
213        ram: Option<String>,
214
215        /// Disable KVM acceleration
216        #[arg(long)]
217        no_kvm: bool,
218
219        /// Network mode (user, bridge, none)
220        #[arg(long, default_value = "user")]
221        network: String,
222
223        /// Hypervisor backend (qemu, firecracker)
224        #[arg(long, default_value = "qemu")]
225        backend: String,
226
227        /// Persistent overlay path
228        #[arg(long)]
229        persist: Option<PathBuf>,
230
231        /// QMP socket path for control
232        #[arg(long)]
233        qmp_socket: Option<PathBuf>,
234
235        /// Disable graphics (headless mode)
236        #[arg(long)]
237        no_graphics: bool,
238
239        /// Enable VNC server
240        #[arg(long)]
241        vnc: bool,
242    },
243
244    /// Install OS from ISO to snapshot
245    #[cfg(feature = "fuse")]
246    #[command(display_order = 11)]
247    #[command(
248        long_about = "Runs an OS installer from an ISO and captures the result into a new Hexz snapshot.\n\nThis automates the process of creating base images for VMs."
249    )]
250    #[command(after_help = "hexz install alpine.iso alpine-base.hxz")]
251    Install {
252        /// Path to ISO image
253        iso: PathBuf,
254
255        /// Output snapshot path
256        output: PathBuf,
257
258        /// Virtual disk size (e.g., "10G")
259        #[arg(long, default_value = "10G")]
260        primary_size: String,
261
262        /// RAM size (e.g., "4G")
263        #[arg(long, default_value = "4G")]
264        ram: String,
265
266        /// Disable graphics
267        #[arg(long)]
268        no_graphics: bool,
269
270        /// Enable VNC
271        #[arg(long)]
272        vnc: bool,
273
274        /// Enable CDC
275        #[arg(long)]
276        cdc: bool,
277    },
278
279    /// Create snapshot via QMP
280    #[cfg(unix)]
281    #[command(display_order = 12)]
282    #[command(
283        long_about = "Triggers a live snapshot of a running VM via the QMP socket.\n\nThis allows for capturing the state of a running system without shutting it down."
284    )]
285    #[command(after_help = "hexz snap /tmp/qmp.sock base.hxz overlay.bin live.hxz")]
286    Snap {
287        /// QMP socket path
288        socket: PathBuf,
289
290        /// Base snapshot
291        base: PathBuf,
292
293        /// Overlay path
294        overlay: PathBuf,
295
296        /// Output snapshot
297        output: PathBuf,
298    },
299
300    /// Commit overlay changes to new snapshot
301    #[command(display_order = 13)]
302    #[command(
303        long_about = "Finalizes a writable overlay into a new immutable snapshot.\n\nSupports 'thin' snapshots which only store the deltas referencing the parent, ideal for iterative model fine-tuning."
304    )]
305    #[command(after_help = "hexz commit base.hxz overlay.bin new_model.hxz --thin")]
306    Commit {
307        /// Base snapshot
308        base: PathBuf,
309
310        /// Overlay with changes
311        overlay: PathBuf,
312
313        /// Output snapshot
314        output: PathBuf,
315
316        /// Compression algorithm
317        #[arg(long, default_value = "lz4")]
318        compression: String,
319
320        /// Block size (must be > 0)
321        #[arg(long, default_value_t = 65536, value_parser = clap::value_parser!(u32).range(1..))]
322        block_size: u32,
323
324        /// Keep overlay file after commit
325        #[arg(long)]
326        keep_overlay: bool,
327
328        /// Flatten all layers into single archive
329        #[arg(long)]
330        flatten: bool,
331
332        /// Commit message
333        #[arg(long)]
334        message: Option<String>,
335
336        /// Create thin snapshot (reference base)
337        #[arg(long)]
338        thin: bool,
339    },
340
341    /// Mount snapshot as filesystem
342    #[cfg(feature = "fuse")]
343    #[command(display_order = 14)]
344    #[command(
345        long_about = "Mounts a Hexz snapshot as a FUSE filesystem.\n\nAllows standard tools to read data from the snapshot as if it were a normal directory."
346    )]
347    #[command(after_help = "hexz mount model.hxz /mnt/model --rw")]
348    Mount {
349        /// Snapshot to mount
350        snap: String,
351
352        /// Mount point directory
353        mountpoint: PathBuf,
354
355        /// Overlay for writes
356        #[arg(long)]
357        overlay: Option<PathBuf>,
358
359        /// Run as daemon
360        #[arg(short, long)]
361        daemon: bool,
362
363        /// Enable read-write mode
364        #[arg(long)]
365        rw: bool,
366
367        /// Cache size (e.g., "1G")
368        #[arg(long)]
369        cache_size: Option<String>,
370
371        /// User ID for files
372        #[arg(long, default_value_t = 1000)]
373        uid: u32,
374
375        /// Group ID for files
376        #[arg(long, default_value_t = 1000)]
377        gid: u32,
378
379        /// Export as NBD device
380        #[arg(long)]
381        nbd: bool,
382    },
383
384    /// Unmount filesystem
385    #[cfg(feature = "fuse")]
386    #[command(display_order = 15)]
387    #[command(long_about = "Unmounts a previously mounted Hexz filesystem.")]
388    #[command(after_help = "hexz unmount /mnt/model")]
389    Unmount {
390        /// Mount point to unmount
391        mountpoint: PathBuf,
392    },
393
394    // ------------------------------------------------------------------------
395    // System & Diagnostics
396    // ------------------------------------------------------------------------
397    /// Run system diagnostics
398    #[cfg(feature = "diagnostics")]
399    #[command(display_order = 20)]
400    #[command(
401        long_about = "Checks the system for compatibility with Hexz features (FUSE, KVM, AVX2, etc.)."
402    )]
403    #[command(after_help = "hexz doctor")]
404    Doctor,
405
406    /// Benchmark archive performance
407    #[cfg(feature = "diagnostics")]
408    #[command(display_order = 21)]
409    #[command(
410        long_about = "Runs read/write benchmarks on a specific archive to test throughput and latency."
411    )]
412    #[command(after_help = "hexz bench model.hxz --threads 4")]
413    Bench {
414        /// Archive to benchmark
415        image: PathBuf,
416
417        /// Block size for testing
418        #[arg(long)]
419        block_size: Option<u32>,
420
421        /// Duration in seconds
422        #[arg(long)]
423        duration: Option<u64>,
424
425        /// Number of threads
426        #[arg(long)]
427        threads: Option<usize>,
428    },
429
430    /// Serve archive over network
431    #[cfg(feature = "server")]
432    #[command(display_order = 22)]
433    #[command(
434        long_about = "Starts an HTTP/S3 compatible server to stream the snapshot over the network.\n\nClients can fetch specific byte ranges efficiently."
435    )]
436    #[command(after_help = "hexz serve model.hxz --port 8080")]
437    Serve {
438        /// Snapshot to serve
439        snap: String,
440
441        /// Server port
442        #[arg(long, default_value_t = 8080)]
443        port: u16,
444
445        /// Run as daemon
446        #[arg(short, long)]
447        daemon: bool,
448
449        /// Enable NBD protocol
450        #[arg(long)]
451        nbd: bool,
452
453        /// Enable S3-compatible API
454        #[arg(long)]
455        s3: bool,
456    },
457
458    /// Generate signing keys
459    #[cfg(feature = "signing")]
460    #[command(display_order = 23)]
461    #[command(long_about = "Generates an Ed25519 keypair for signing Hexz archives.")]
462    #[command(after_help = "hexz keygen --output-dir ~/.hexz/keys")]
463    Keygen {
464        /// Output directory for keys
465        #[arg(short, long)]
466        output_dir: Option<PathBuf>,
467    },
468
469    /// Sign archive
470    #[cfg(feature = "signing")]
471    #[command(display_order = 24)]
472    #[command(long_about = "Cryptographically signs a Hexz archive using a private key.")]
473    #[command(after_help = "hexz sign private.pem model.hxz")]
474    Sign {
475        /// Private key path
476        key: PathBuf,
477
478        /// Archive to sign
479        image: PathBuf,
480    },
481
482    /// Verify archive signature
483    #[cfg(feature = "signing")]
484    #[command(display_order = 25)]
485    #[command(
486        long_about = "Verifies the cryptographic signature of an archive using a public key."
487    )]
488    #[command(after_help = "hexz verify public.pem model.hxz")]
489    Verify {
490        /// Public key path
491        key: PathBuf,
492
493        /// Archive to verify
494        image: PathBuf,
495    },
496}