Skip to main content

hexz_cli/
args.rs

1use clap::{Parser, Subcommand};
2use std::path::PathBuf;
3
4/// Compressed, deduplicated snapshots
5#[derive(Parser)]
6#[command(name = "hexz", version, about = "Compressed, deduplicated snapshots", long_about = None)]
7#[command(disable_help_flag = true)] // We handle help manually
8#[command(styles = get_styles())]
9pub struct Cli {
10    #[arg(short, long, action = clap::ArgAction::SetTrue)]
11    pub help: bool,
12
13    #[command(subcommand)]
14    pub command: Option<Commands>,
15}
16
17fn get_styles() -> clap::builder::Styles {
18    use clap::builder::styling::{AnsiColor, Effects, Styles};
19    Styles::styled()
20        .header(AnsiColor::Yellow.on_default() | Effects::BOLD)
21        .usage(AnsiColor::Green.on_default() | Effects::BOLD)
22        .literal(AnsiColor::Cyan.on_default() | Effects::BOLD)
23        .placeholder(AnsiColor::Cyan.on_default())
24}
25
26/// Top-level command categories
27#[derive(Subcommand)]
28pub enum Commands {
29    // ------------------------------------------------------------------------
30    // Archive Operations
31    // ------------------------------------------------------------------------
32    /// Pack files into a snapshot
33    #[command(display_order = 1)]
34    #[command(
35        long_about = "Creates a highly compressed, encrypted, and deduplicated archive from a disk image or memory dump.\n\nIt uses Content-Defined Chunking (CDC) to ensure that only changed weights are stored when archiving multiple versions of a model. This is the primary way to ingest data into Hexz."
36    )]
37    #[command(after_help = "hexz pack model.hxz --disk ./model.bin --compression zstd")]
38    Pack {
39        /// Output archive path (.hxz)
40        output: PathBuf,
41
42        /// Path to disk image to pack
43        #[arg(long)]
44        disk: Option<PathBuf>,
45
46        /// Path to memory dump to pack
47        #[arg(long)]
48        memory: Option<PathBuf>,
49
50        /// Compression algorithm (lz4, zstd, none)
51        #[arg(long, default_value = "lz4")]
52        compression: String,
53
54        /// Enable encryption
55        #[arg(long)]
56        encrypt: bool,
57
58        /// Train compression dictionary
59        #[arg(long)]
60        train_dict: bool,
61
62        /// Block size in bytes (must be > 0)
63        #[arg(long, default_value_t = 65536, value_parser = clap::value_parser!(u32).range(1..))]
64        block_size: u32,
65
66        /// Minimum CDC chunk size (auto-detected if not specified)
67        #[arg(long, value_parser = clap::value_parser!(u32).range(1..))]
68        min_chunk: Option<u32>,
69
70        /// Average CDC chunk size (auto-detected if not specified)
71        #[arg(long, value_parser = clap::value_parser!(u32).range(1..))]
72        avg_chunk: Option<u32>,
73
74        /// Maximum CDC chunk size (auto-detected if not specified)
75        #[arg(long, value_parser = clap::value_parser!(u32).range(1..))]
76        max_chunk: Option<u32>,
77
78        /// Number of compression worker threads (0 = auto)
79        #[arg(long)]
80        workers: Option<usize>,
81
82        /// Run DCAM analysis to auto-tune CDC chunk sizes (slower but adaptive).
83        /// Without this flag, CDC uses global defaults: min=16 KiB, avg=64 KiB, max=256 KiB.
84        #[arg(long)]
85        dcam: bool,
86
87        /// Suppress all output and progress bars
88        #[arg(long, short)]
89        silent: bool,
90    },
91
92    /// Import safetensors as a snapshot
93    #[command(display_order = 2)]
94    #[command(alias = "store")]
95    #[command(
96        long_about = "Converts a .safetensors file to a .hxz archive.\n\nNo PyTorch required — tensor bytes are copied directly from the source file. If --base is given, only changed blocks are stored; frozen tensors are referenced from the parent archive."
97    )]
98    #[command(after_help = "hexz import model.safetensors model.hxz --compression zstd")]
99    Import {
100        /// Input .safetensors file
101        input: PathBuf,
102
103        /// Output .hxz archive
104        output: PathBuf,
105
106        /// Parent .hxz archive for delta deduplication
107        #[arg(long)]
108        base: Option<PathBuf>,
109
110        /// Compression algorithm (lz4, zstd)
111        #[arg(long, default_value = "zstd")]
112        compression: String,
113
114        /// Block size in bytes
115        #[arg(long, default_value_t = 65536, value_parser = clap::value_parser!(u32).range(1..))]
116        block_size: u32,
117
118        /// Suppress progress output
119        #[arg(long, short)]
120        silent: bool,
121    },
122
123    /// Export safetensors from a snapshot
124    #[command(display_order = 3)]
125    #[command(alias = "extract")]
126    #[command(
127        long_about = "Extracts a .hxz archive back to a .safetensors file.\n\nIf --tensor is given, only the raw bytes for that tensor are written (no file header)."
128    )]
129    #[command(after_help = "hexz export model.hxz model-out.safetensors")]
130    Export {
131        /// Input .hxz archive
132        input: PathBuf,
133
134        /// Output .safetensors file (default: <input stem>.safetensors)
135        output: Option<PathBuf>,
136
137        /// Extract a single tensor by name (raw bytes, no header)
138        #[arg(long)]
139        tensor: Option<String>,
140    },
141
142    /// Show snapshot details
143    #[command(display_order = 4)]
144    #[command(alias = "inspect")]
145    #[command(
146        long_about = "Reads the header and index of a Hexz archive without decompressing the full body.\n\nUse this to verify archive integrity, check compression ratios, or view metadata about the stored snapshot."
147    )]
148    #[command(after_help = "hexz show ./model.hxz --json")]
149    Show {
150        /// Path to archive
151        snap: PathBuf,
152
153        /// Output as JSON
154        #[arg(long)]
155        json: bool,
156    },
157
158    /// Compare two snapshots
159    #[command(display_order = 3)]
160    #[command(
161        long_about = "Compares the BLAKE3 block hashes of two Hexz archives.\n\nReports how much data is shared between them, unique to each, and the storage savings achieved through deduplication. Useful for understanding how much a fine-tuned checkpoint differs from its base."
162    )]
163    #[command(after_help = "hexz diff base.hxz finetuned.hxz")]
164    Diff {
165        /// First archive
166        a: PathBuf,
167
168        /// Second archive
169        b: PathBuf,
170    },
171
172    /// Show snapshot lineage tree
173    #[command(display_order = 4)]
174    #[command(alias = "ls")]
175    #[command(
176        long_about = "Scans a directory for .hxz archives and renders their parent-child relationships as a tree.\n\nParent links are read from each archive's header. Archives whose declared parent lives outside the scanned directory are annotated as external."
177    )]
178    #[command(after_help = "hexz log ./checkpoints/")]
179    Log {
180        /// Directory to scan
181        dir: PathBuf,
182    },
183
184    /// Pack with profile presets
185    #[command(display_order = 4)]
186    #[command(
187        long_about = "Creates a Hexz archive using a named build profile.\n\nProfiles automatically select compression, block size, and dictionary training settings optimized for different workloads (ML, EDA, embedded, generic)."
188    )]
189    #[command(after_help = "hexz build disk.img archive.hxz --profile ml")]
190    Build {
191        /// Source disk image
192        source: PathBuf,
193
194        /// Output archive path
195        output: PathBuf,
196
197        /// Optional memory dump
198        #[arg(long)]
199        memory: Option<PathBuf>,
200
201        /// Build profile
202        #[arg(long)]
203        profile: Option<String>,
204
205        /// Enable encryption
206        #[arg(long)]
207        encrypt: bool,
208    },
209
210    /// Convert external formats to snapshot
211    #[command(display_order = 6)]
212    #[command(
213        long_about = "Ingests external formats like tar, HDF5, or WebDataset into a Hexz snapshot.\n\nThis allows legacy datasets to benefit from Hexz's random access and deduplication features."
214    )]
215    #[command(after_help = "hexz convert tar data.tar data.hxz")]
216    Convert {
217        /// Source format (tar, hdf5, webdataset)
218        format: String,
219
220        /// Input file path
221        input: PathBuf,
222
223        /// Output snapshot path (.hxz)
224        output: PathBuf,
225
226        /// Compression algorithm (lz4, zstd)
227        #[arg(long, default_value = "lz4")]
228        compression: String,
229
230        /// Block size in bytes
231        #[arg(long, default_value_t = 65536)]
232        block_size: u32,
233
234        /// Build profile (ml, eda, embedded, generic)
235        #[arg(long)]
236        profile: Option<String>,
237
238        /// Suppress output
239        #[arg(long, short)]
240        silent: bool,
241    },
242
243    /// Estimate compression savings
244    #[command(display_order = 7)]
245    #[command(
246        long_about = "Quickly estimates the compression and deduplication savings if a raw data file\nwere packed into the Hexz format. Samples blocks without reading the whole file,\nso it completes in seconds even on multi-GB inputs."
247    )]
248    #[command(after_help = "hexz predict model.bin --block-size 65536 --json")]
249    Predict {
250        /// Path to the raw data file to analyze
251        file: PathBuf,
252
253        /// Block size in bytes
254        #[arg(long, default_value_t = 65536)]
255        block_size: u32,
256
257        /// Minimum CDC chunk size (auto-detected if not specified)
258        #[arg(long)]
259        min_chunk: Option<u32>,
260
261        /// Average CDC chunk size (auto-detected if not specified)
262        #[arg(long)]
263        avg_chunk: Option<u32>,
264
265        /// Maximum CDC chunk size (auto-detected if not specified)
266        #[arg(long)]
267        max_chunk: Option<u32>,
268
269        /// Output as JSON
270        #[arg(long)]
271        json: bool,
272    },
273
274    // ------------------------------------------------------------------------
275    // Virtual Machine Operations
276    // ------------------------------------------------------------------------
277    /// Boot a VM from snapshot
278    #[cfg(feature = "fuse")]
279    #[command(display_order = 10)]
280    #[command(
281        long_about = "Boots a transient Virtual Machine directly from a Hexz snapshot.\n\nThe VM uses a copy-on-write overlay, meaning the original snapshot remains immutable. Changes are lost on shutdown unless --persist is used."
282    )]
283    #[command(after_help = "hexz boot ubuntu.hxz --ram 4G --no-graphics")]
284    Boot {
285        /// Snapshot to boot from
286        snap: String,
287
288        /// RAM size (e.g., "4G")
289        #[arg(long)]
290        ram: Option<String>,
291
292        /// Disable KVM acceleration
293        #[arg(long)]
294        no_kvm: bool,
295
296        /// Network mode (user, bridge, none)
297        #[arg(long, default_value = "user")]
298        network: String,
299
300        /// Hypervisor backend (qemu, firecracker)
301        #[arg(long, default_value = "qemu")]
302        backend: String,
303
304        /// Persistent overlay path
305        #[arg(long)]
306        persist: Option<PathBuf>,
307
308        /// QMP socket path for control
309        #[arg(long)]
310        qmp_socket: Option<PathBuf>,
311
312        /// Disable graphics (headless mode)
313        #[arg(long)]
314        no_graphics: bool,
315
316        /// Enable VNC server
317        #[arg(long)]
318        vnc: bool,
319    },
320
321    /// Install OS from ISO to snapshot
322    #[cfg(feature = "fuse")]
323    #[command(display_order = 11)]
324    #[command(
325        long_about = "Runs an OS installer from an ISO and captures the result into a new Hexz snapshot.\n\nThis automates the process of creating base images for VMs."
326    )]
327    #[command(after_help = "hexz install alpine.iso alpine-base.hxz")]
328    Install {
329        /// Path to ISO image
330        iso: PathBuf,
331
332        /// Output snapshot path
333        output: PathBuf,
334
335        /// Virtual disk size (e.g., "10G")
336        #[arg(long, default_value = "10G")]
337        primary_size: String,
338
339        /// RAM size (e.g., "4G")
340        #[arg(long, default_value = "4G")]
341        ram: String,
342
343        /// Disable graphics
344        #[arg(long)]
345        no_graphics: bool,
346
347        /// Enable VNC
348        #[arg(long)]
349        vnc: bool,
350    },
351
352    /// Take a live VM snapshot
353    #[cfg(unix)]
354    #[command(display_order = 12)]
355    #[command(
356        long_about = "Triggers a live snapshot of a running VM via the QMP socket.\n\nThis allows for capturing the state of a running system without shutting it down."
357    )]
358    #[command(after_help = "hexz snap /tmp/qmp.sock base.hxz overlay.bin live.hxz")]
359    Snap {
360        /// QMP socket path
361        socket: PathBuf,
362
363        /// Base snapshot
364        base: PathBuf,
365
366        /// Overlay path
367        overlay: PathBuf,
368
369        /// Output snapshot
370        output: PathBuf,
371    },
372
373    /// Commit overlay to new snapshot
374    #[command(display_order = 13)]
375    #[command(
376        long_about = "Finalizes a writable overlay into a new immutable snapshot.\n\nSupports 'thin' snapshots which only store the deltas referencing the parent, ideal for iterative model fine-tuning."
377    )]
378    #[command(after_help = "hexz commit base.hxz overlay.bin new_model.hxz --thin")]
379    Commit {
380        /// Base snapshot
381        base: PathBuf,
382
383        /// Overlay with changes
384        overlay: PathBuf,
385
386        /// Output snapshot
387        output: PathBuf,
388
389        /// Compression algorithm
390        #[arg(long, default_value = "lz4")]
391        compression: String,
392
393        /// Block size (must be > 0)
394        #[arg(long, default_value_t = 65536, value_parser = clap::value_parser!(u32).range(1..))]
395        block_size: u32,
396
397        /// Keep overlay file after commit
398        #[arg(long)]
399        keep_overlay: bool,
400
401        /// Path to memory dump to include
402        #[arg(long)]
403        memory: Option<PathBuf>,
404
405        /// Commit message
406        #[arg(long)]
407        message: Option<String>,
408
409        /// Create thin snapshot (reference base)
410        #[arg(long)]
411        thin: bool,
412    },
413
414    /// Mount snapshot as filesystem
415    #[cfg(feature = "fuse")]
416    #[command(display_order = 14)]
417    #[command(
418        long_about = "Mounts a Hexz snapshot as a FUSE filesystem.\n\nAllows standard tools to read data from the snapshot as if it were a normal directory."
419    )]
420    #[command(after_help = "hexz mount model.hxz /mnt/model --rw")]
421    Mount {
422        /// Snapshot to mount
423        snap: String,
424
425        /// Mount point directory
426        mountpoint: PathBuf,
427
428        /// Overlay for writes
429        #[arg(long)]
430        overlay: Option<PathBuf>,
431
432        /// Run as daemon
433        #[arg(short, long)]
434        daemon: bool,
435
436        /// Enable read-write mode
437        #[arg(long)]
438        rw: bool,
439
440        /// Cache size (e.g., "1G")
441        #[arg(long)]
442        cache_size: Option<String>,
443
444        /// User ID for files
445        #[arg(long, default_value_t = 1000)]
446        uid: u32,
447
448        /// Group ID for files
449        #[arg(long, default_value_t = 1000)]
450        gid: u32,
451
452        /// Export as NBD device
453        #[arg(long)]
454        nbd: bool,
455
456        /// Prefetch window size (number of blocks to read ahead)
457        #[arg(long)]
458        prefetch: Option<u32>,
459    },
460
461    /// Unmount snapshot filesystem
462    #[cfg(feature = "fuse")]
463    #[command(display_order = 15)]
464    #[command(long_about = "Unmounts a previously mounted Hexz filesystem.")]
465    #[command(after_help = "hexz unmount /mnt/model")]
466    Unmount {
467        /// Mount point to unmount
468        mountpoint: PathBuf,
469    },
470
471    // ------------------------------------------------------------------------
472    // System & Diagnostics
473    // ------------------------------------------------------------------------
474    /// Check system requirements
475    #[cfg(feature = "diagnostics")]
476    #[command(display_order = 20)]
477    #[command(
478        long_about = "Checks the system for compatibility with Hexz features (FUSE, QEMU, network)."
479    )]
480    #[command(after_help = "hexz doctor")]
481    Doctor,
482
483    /// Serve snapshot over network
484    #[cfg(feature = "server")]
485    #[command(display_order = 22)]
486    #[command(
487        long_about = "Starts an HTTP server to stream the snapshot over the network.\n\nClients can fetch specific byte ranges efficiently."
488    )]
489    #[command(after_help = "hexz serve model.hxz --port 8080")]
490    Serve {
491        /// Snapshot to serve
492        snap: String,
493
494        /// Server port
495        #[arg(long, default_value_t = 8080)]
496        port: u16,
497
498        /// Bind address
499        #[arg(long, default_value = "127.0.0.1")]
500        bind: String,
501
502        /// Run as daemon
503        #[arg(short, long)]
504        daemon: bool,
505
506        /// Enable NBD protocol
507        #[arg(long)]
508        nbd: bool,
509    },
510
511    /// Generate signing keypair
512    #[cfg(feature = "signing")]
513    #[command(display_order = 23)]
514    #[command(long_about = "Generates an Ed25519 keypair for signing Hexz archives.")]
515    #[command(after_help = "hexz keygen --output-dir ~/.hexz/keys")]
516    Keygen {
517        /// Output directory for keys
518        #[arg(short, long)]
519        output_dir: Option<PathBuf>,
520    },
521
522    /// Sign a snapshot
523    #[cfg(feature = "signing")]
524    #[command(display_order = 24)]
525    #[command(long_about = "Cryptographically signs a Hexz archive using a private key.")]
526    #[command(after_help = "hexz sign private.pem model.hxz")]
527    Sign {
528        /// Private key path
529        key: PathBuf,
530
531        /// Archive to sign
532        image: PathBuf,
533    },
534
535    /// Verify snapshot signature
536    #[cfg(feature = "signing")]
537    #[command(display_order = 25)]
538    #[command(
539        long_about = "Verifies the cryptographic signature of an archive using a public key."
540    )]
541    #[command(after_help = "hexz verify public.pem model.hxz")]
542    Verify {
543        /// Public key path
544        key: PathBuf,
545
546        /// Archive to verify
547        image: PathBuf,
548    },
549}