Skip to main content

hexz_cli/cmd/data/
pack.rs

1//! Pack data into a Hexz archive.
2//!
3//! This command creates a `.st` archive from disk images and/or memory dumps,
4//! delegating to the core packing logic in [`hexz_ops::pack`].
5//!
6//! # Overview
7//!
8//! The `pack` command is the primary way to create Hexz archives from raw data.
9//! It supports multiple compression algorithms, encryption, deduplication, and
10//! both fixed-size and content-defined chunking strategies.
11//!
12//! # Key Features
13//!
14//! - **Compression**: LZ4 (speed) or Zstandard (compression ratio)
15//! - **Deduplication**: SHA-256 based block deduplication (enabled by default)
16//! - **Encryption**: AES-256-GCM with password-based key derivation
17//! - **CDC**: Content-defined chunking for better deduplication
18//! - **Dictionary Training**: Improves Zstd compression by 10-30%
19//!
20//! # Performance Tuning
21//!
22//! **For maximum speed:**
23//! ```bash
24//! hexz data pack --disk image.img --output fast.st --compression lz4
25//! ```
26//!
27//! **For maximum compression:**
28//! ```bash
29//! hexz data pack --disk image.img --output small.st \
30//!   --compression zstd --train-dict --cdc
31//! ```
32//!
33//! **For balanced performance:**
34//! ```bash
35//! hexz data pack --disk image.img --output balanced.st \
36//!   --compression lz4 --block-size 131072
37//! ```
38
39use crate::ui::progress::create_progress_bar;
40use anyhow::Result;
41use hexz_ops::pack::{PackConfig, pack_snapshot};
42use std::path::PathBuf;
43use std::sync::{Arc, Mutex};
44
45/// Execute the pack command to create a Hexz snapshot archive.
46///
47/// This command creates a `.st` snapshot file from disk and/or memory dump files.
48/// It supports compression (LZ4 or Zstd), optional encryption, deduplication,
49/// content-defined chunking (CDC), and dictionary training for improved compression.
50///
51/// # Workflow
52///
53/// The packing process follows these steps:
54///
55/// 1. **Password prompt** (if encryption enabled): Prompts for password and derives encryption key
56/// 2. **Dictionary training** (if enabled): Samples blocks and trains a Zstd compression dictionary
57/// 3. **Chunking**: Splits input file(s) into blocks using fixed-size or CDC chunking
58/// 4. **Compression**: Compresses each block using the selected algorithm and optional dictionary
59/// 5. **Deduplication**: Hashes compressed blocks and eliminates duplicates (enabled by default)
60/// 6. **Index building**: Constructs the master index with page entries and block metadata
61/// 7. **Header writing**: Serializes header with format version, offsets, and feature flags
62///
63/// # Arguments
64///
65/// * `disk` - Optional path to disk image file (raw or qcow2)
66/// * `memory` - Optional path to memory dump file
67/// * `output` - Output path for the `.st` snapshot file
68/// * `compression` - Compression algorithm: "lz4" (fast) or "zstd" (balanced)
69/// * `encrypt` - Enable AES-256-GCM encryption (prompts for password)
70/// * `train_dict` - Train a Zstd dictionary for improved compression ratios
71/// * `block_size` - Block size in bytes (default: 64 KiB)
72/// * `cdc` - Enable content-defined chunking for variable-sized blocks
73/// * `min_chunk` - Minimum chunk size for CDC (default: 16 KiB)
74/// * `avg_chunk` - Average chunk size for CDC (default: 64 KiB)
75/// * `max_chunk` - Maximum chunk size for CDC (default: 128 KiB)
76/// * `silent` - Suppress progress output
77///
78/// # Performance Characteristics
79///
80/// - **LZ4**: ~500 MB/s compression throughput
81/// - **Zstd level 3**: ~200 MB/s compression throughput
82/// - **Deduplication overhead**: ~5-10% additional time for hashing
83/// - **Dictionary training**: 2-5 seconds for typical datasets
84///
85/// # Example
86///
87/// ```no_run
88/// # use std::path::PathBuf;
89/// # use hexz_cli::cmd::data::pack;
90/// // Pack a disk image with Zstd compression and dictionary training
91/// pack::run(
92///     Some(PathBuf::from("disk.img")),
93///     None,
94///     PathBuf::from("snapshot.hxz"),
95///     "zstd".to_string(),
96///     false,  // no encryption
97///     true,   // train dictionary
98///     65536,  // 64 KiB blocks
99///     None,   // min chunk (auto-detected)
100///     None,   // avg chunk (auto-detected)
101///     None,   // max chunk (auto-detected)
102///     None,   // workers (auto)
103///     false,  // dcam
104///     false,  // silent
105/// );
106/// ```
107#[allow(clippy::too_many_arguments)]
108pub fn run(
109    disk: Option<PathBuf>,
110    memory: Option<PathBuf>,
111    output: PathBuf,
112    compression: String,
113    encrypt: bool,
114    train_dict: bool,
115    block_size: u32,
116    min_chunk: Option<u32>,
117    avg_chunk: Option<u32>,
118    max_chunk: Option<u32>,
119    workers: Option<usize>,
120    dcam: bool,
121    silent: bool,
122) -> Result<()> {
123    // Get password if encryption is enabled (env var for non-interactive use)
124    let password = if encrypt {
125        Some(match std::env::var("HEXZ_PASSWORD") {
126            Ok(p) => p,
127            Err(_) => rpassword::prompt_password("Enter encryption password: ")?,
128        })
129    } else {
130        None
131    };
132
133    // Calculate total size for progress bar
134    let total_size = {
135        let mut size = 0u64;
136        if let Some(ref path) = disk {
137            size += std::fs::metadata(path)?.len();
138        }
139        if let Some(ref path) = memory {
140            size += std::fs::metadata(path)?.len();
141        }
142        size
143    };
144
145    // Create progress bar
146    let pb = if !silent {
147        let pb = create_progress_bar(total_size);
148        let pb = Arc::new(Mutex::new(pb));
149        Some(pb)
150    } else {
151        None
152    };
153    let pb_clone = pb.clone();
154
155    if train_dict && !silent {
156        println!("Training compression dictionary...");
157    }
158
159    // Create pack configuration
160    let config = PackConfig {
161        disk,
162        memory,
163        output: output.clone(),
164        compression,
165        encrypt,
166        password,
167        train_dict,
168        block_size,
169        min_chunk,
170        avg_chunk,
171        max_chunk,
172        parallel: workers != Some(1),
173        num_workers: workers.unwrap_or(0),
174        use_dcam: dcam,
175        ..Default::default()
176    };
177
178    // Run the packing operation with progress callback
179    pack_snapshot(
180        config,
181        Some(move |current, _total| {
182            if let Some(ref pb) = pb_clone {
183                if let Ok(pb) = pb.lock() {
184                    pb.set_position(current);
185                }
186            }
187        }),
188    )?;
189
190    if let Some(ref pb) = pb {
191        if let Ok(pb) = pb.lock() {
192            pb.finish_with_message("Done");
193        }
194    }
195
196    if !silent {
197        println!("Archive created: {:?}", output);
198    }
199    Ok(())
200}