Skip to main content

hexz_cli/cmd/data/
inspect.rs

1//! Inspect archive metadata and display snapshot information.
2//!
3//! This command provides a detailed inspection of Hexz snapshot files (`.st`),
4//! reading the file header and master index to display metadata about the
5//! snapshot's structure, compression, encryption status, and storage statistics.
6//!
7//! # Use Cases
8//!
9//! - **Snapshot Inspection**: Verify snapshot format version and feature flags
10//! - **Compression Analysis**: Check compression algorithm and ratio achieved
11//! - **Capacity Planning**: View original vs. compressed size for storage estimates
12//! - **Debugging**: Identify snapshot corruption or format mismatches
13//! - **Automation**: JSON output mode enables scripting and tooling integration
14//!
15//! # Workflow
16//!
17//! The command performs these steps:
18//!
19//! 1. **Header Reading**: Reads the fixed-size header (4096 bytes) from file start
20//! 2. **Index Location**: Uses `index_offset` from header to locate the master index
21//! 3. **Index Parsing**: Deserializes the master index to extract page metadata
22//! 4. **Compression Ratio**: Calculates ratio from uncompressed vs. file size
23//! 5. **Output Formatting**: Renders human-readable or JSON output
24//!
25//! # Output Format
26//!
27//! The command displays:
28//!
29//! **Header Information:**
30//! - Format version (currently v1)
31//! - Compression algorithm (LZ4 or Zstd)
32//! - Block size used for chunking
33//!
34//! **Feature Flags:**
35//! - Encryption status (encrypted or plaintext)
36//! - Disk presence (whether snapshot contains disk image)
37//! - Memory presence (whether snapshot contains memory dump)
38//! - Variable blocks (whether CDC chunking was used)
39//!
40//! **Storage Statistics:**
41//! - Original size (sum of uncompressed disk + memory)
42//! - Compressed size (total file size on disk)
43//! - Compression ratio (multiplier showing space savings)
44//!
45//! **Index Details:**
46//! - Index offset in file (byte position)
47//! - Disk pages (number of index pages for primary stream)
48//! - Memory pages (number of index pages for secondary stream)
49//!
50//! # Common Usage Patterns
51//!
52//! ```bash
53//! # Inspect a snapshot with human-readable output
54//! hexz info vm-snapshot.st
55//!
56//! # Get machine-readable JSON for scripting
57//! hexz info vm-snapshot.st --json | jq .compression_ratio
58//!
59//! # Verify snapshot integrity
60//! hexz info corrupted.st  # Will fail if header is malformed
61//! ```
62
63use anyhow::{Context, Result};
64use hexz_ops::inspect::inspect_snapshot;
65use indicatif::HumanBytes;
66use std::path::PathBuf;
67
68/// Summarize metadata JSON into a compact human-readable string.
69///
70/// If the metadata contains `hexz_checkpoint`, show checkpoint version,
71/// tensor count, and scalar count. Otherwise show the byte length.
72fn summarize_metadata(raw: &str) -> String {
73    if let Ok(obj) = serde_json::from_str::<serde_json::Value>(raw) {
74        if let Some(ver) = obj.get("hexz_checkpoint").and_then(|v| v.as_str()) {
75            let tensors = obj
76                .get("tensors")
77                .and_then(|v| v.as_object())
78                .map(|m| m.len())
79                .unwrap_or(0);
80            let scalars = obj
81                .get("scalars")
82                .and_then(|v| v.as_object())
83                .map(|m| m.len())
84                .unwrap_or(0);
85            return format!(
86                "checkpoint v{}, {} tensors, {} scalars",
87                ver, tensors, scalars
88            );
89        }
90    }
91    format!("{} bytes", raw.len())
92}
93
94/// Executes the info command to display snapshot metadata.
95///
96/// Reads and parses the snapshot header and master index, then displays
97/// comprehensive metadata about the snapshot's format, compression, features,
98/// and storage statistics. Output can be formatted as human-readable text or
99/// JSON for machine consumption.
100///
101/// # Arguments
102///
103/// * `snap` - Path to the `.st` snapshot file to inspect
104/// * `json` - If true, output JSON format; otherwise, human-readable format
105///
106/// # Output Details
107///
108/// **Human-Readable Format:**
109/// Displays formatted output with sections for Features and Storage Statistics,
110/// using human-friendly byte sizes (e.g., "10.5 GB") and clearly labeled fields.
111///
112/// **JSON Format:**
113/// Outputs a single JSON object with fields:
114/// - `path`: Snapshot file path (string)
115/// - `version`: Format version number (integer)
116/// - `compression`: Compression algorithm ("Lz4" or "Zstd")
117/// - `block_size`: Block size in bytes (integer)
118/// - `encrypted`: Encryption status (boolean)
119/// - `has_disk`: Primary stream present (boolean)
120/// - `has_memory`: Secondary stream present (boolean)
121/// - `variable_blocks`: CDC chunking enabled (boolean)
122/// - `original_size`: Uncompressed size in bytes (integer)
123/// - `compressed_size`: File size in bytes (integer)
124/// - `compression_ratio`: Compression multiplier (float)
125/// - `index_offset`: Master index byte offset (integer)
126/// - `primary_pages`: Number of disk index pages (integer)
127/// - `secondary_pages`: Number of memory index pages (integer)
128///
129/// # Errors
130///
131/// Returns an error if:
132/// - The snapshot file cannot be opened (file not found, permission denied)
133/// - The header cannot be read (file too small, I/O error)
134/// - The header format is invalid (corrupted file, wrong format)
135/// - The master index cannot be read (corrupted index, truncated file)
136/// - The master index format is invalid (version mismatch, corrupted data)
137///
138/// # Examples
139///
140/// ```no_run
141/// use std::path::PathBuf;
142/// use hexz_cli::cmd::data::inspect;
143///
144/// // Display human-readable snapshot information
145/// inspect::run(PathBuf::from("snapshot.hxz"), false)?;
146///
147/// // Output JSON for automated processing
148/// inspect::run(PathBuf::from("snapshot.hxz"), true)?;
149/// # Ok::<(), anyhow::Error>(())
150/// ```
151pub fn run(snap: PathBuf, json: bool) -> Result<()> {
152    // Note: inspect_snapshot in hexz_core needs to parse the full index
153    // to return the block_stats every time.
154    let info = inspect_snapshot(&snap).context("Failed to inspect snapshot")?;
155
156    let total_uncompressed = info.total_uncompressed();
157    let ratio = info.compression_ratio();
158
159    if json {
160        // Output machine-readable JSON
161        let out = serde_json::json!({
162            "path": snap,
163            "version": info.version,
164            "compression": info.compression,
165            "block_size": info.block_size,
166            "encrypted": info.encrypted,
167            "has_primary": info.has_primary,
168            "has_secondary": info.has_secondary,
169            "variable_blocks": info.variable_blocks,
170            "original_size": total_uncompressed,
171            "compressed_size": info.file_size,
172            "compression_ratio": ratio,
173            "index_offset": info.index_offset,
174            "primary_pages": info.primary_pages,
175            "secondary_pages": info.secondary_pages,
176            "parent_paths": info.parent_paths,
177            "metadata": info.metadata,
178            "block_stats": info.block_stats,
179        });
180
181        println!("{}", serde_json::to_string_pretty(&out)?);
182    } else {
183        // Compact human-readable output
184        let filename = snap
185            .file_name()
186            .map(|f| f.to_string_lossy().to_string())
187            .unwrap_or_else(|| snap.display().to_string());
188
189        let comp_name = match info.compression {
190            hexz_core::format::header::CompressionType::Lz4 => "LZ4",
191            hexz_core::format::header::CompressionType::Zstd => "Zstd",
192        };
193
194        println!("{}", filename);
195        let block_kib = info.block_size / 1024;
196        println!(
197            "  format:     v{}, {}, {} KiB blocks",
198            info.version, comp_name, block_kib,
199        );
200        println!(
201            "  size:       {} on disk, {} uncompressed ({:.2}x)",
202            HumanBytes(info.file_size),
203            HumanBytes(total_uncompressed),
204            ratio,
205        );
206
207        if !info.parent_paths.is_empty() {
208            // Show just the filename of the first parent
209            let parent_display = std::path::Path::new(&info.parent_paths[0])
210                .file_name()
211                .map(|f| f.to_string_lossy().to_string())
212                .unwrap_or_else(|| info.parent_paths[0].clone());
213            println!("  parent:     {}", parent_display);
214        }
215
216        if let Some(stats) = &info.block_stats {
217            let mut parts = Vec::new();
218            if stats.data_blocks > 0 {
219                parts.push(format!(
220                    "{} data ({} unique)",
221                    stats.data_blocks, stats.unique_blocks
222                ));
223            }
224            if stats.parent_ref_blocks > 0 {
225                parts.push(format!("{} parent refs", stats.parent_ref_blocks));
226            }
227            if stats.zero_blocks > 0 {
228                parts.push(format!("{} zero", stats.zero_blocks));
229            }
230            if !parts.is_empty() {
231                println!("  blocks:     {}", parts.join(", "));
232            }
233        }
234
235        // Metadata summary
236        if let Some(meta) = &info.metadata {
237            let summary = summarize_metadata(meta);
238            println!("  metadata:   {}", summary);
239        }
240    }
241
242    Ok(())
243}