hexz_cli/cmd/data/inspect.rs
1//! Inspect archive metadata and display snapshot information.
2//!
3//! This command provides a detailed inspection of Hexz snapshot files (`.st`),
4//! reading the file header and master index to display metadata about the
5//! snapshot's structure, compression, encryption status, and storage statistics.
6//!
7//! # Use Cases
8//!
9//! - **Snapshot Inspection**: Verify snapshot format version and feature flags
10//! - **Compression Analysis**: Check compression algorithm and ratio achieved
11//! - **Capacity Planning**: View original vs. compressed size for storage estimates
12//! - **Debugging**: Identify snapshot corruption or format mismatches
13//! - **Automation**: JSON output mode enables scripting and tooling integration
14//!
15//! # Workflow
16//!
17//! The command performs these steps:
18//!
19//! 1. **Header Reading**: Reads the fixed-size header (4096 bytes) from file start
20//! 2. **Index Location**: Uses `index_offset` from header to locate the master index
21//! 3. **Index Parsing**: Deserializes the master index to extract page metadata
22//! 4. **Compression Ratio**: Calculates ratio from uncompressed vs. file size
23//! 5. **Output Formatting**: Renders human-readable or JSON output
24//!
25//! # Output Format
26//!
27//! The command displays:
28//!
29//! **Header Information:**
30//! - Format version (currently v1)
31//! - Compression algorithm (LZ4 or Zstd)
32//! - Block size used for chunking
33//!
34//! **Feature Flags:**
35//! - Encryption status (encrypted or plaintext)
36//! - Disk presence (whether snapshot contains disk image)
37//! - Memory presence (whether snapshot contains memory dump)
38//! - Variable blocks (whether CDC chunking was used)
39//!
40//! **Storage Statistics:**
41//! - Original size (sum of uncompressed disk + memory)
42//! - Compressed size (total file size on disk)
43//! - Compression ratio (multiplier showing space savings)
44//!
45//! **Index Details:**
46//! - Index offset in file (byte position)
47//! - Disk pages (number of index pages for primary stream)
48//! - Memory pages (number of index pages for secondary stream)
49//!
50//! # Common Usage Patterns
51//!
52//! ```bash
53//! # Inspect a snapshot with human-readable output
54//! hexz info vm-snapshot.st
55//!
56//! # Get machine-readable JSON for scripting
57//! hexz info vm-snapshot.st --json | jq .compression_ratio
58//!
59//! # Verify snapshot integrity
60//! hexz info corrupted.st # Will fail if header is malformed
61//! ```
62
63use anyhow::{Context, Result};
64use hexz_ops::inspect::inspect_snapshot;
65use indicatif::HumanBytes;
66use std::path::PathBuf;
67
68/// Executes the info command to display snapshot metadata.
69///
70/// Reads and parses the snapshot header and master index, then displays
71/// comprehensive metadata about the snapshot's format, compression, features,
72/// and storage statistics. Output can be formatted as human-readable text or
73/// JSON for machine consumption.
74///
75/// # Arguments
76///
77/// * `snap` - Path to the `.st` snapshot file to inspect
78/// * `json` - If true, output JSON format; otherwise, human-readable format
79///
80/// # Output Details
81///
82/// **Human-Readable Format:**
83/// Displays formatted output with sections for Features and Storage Statistics,
84/// using human-friendly byte sizes (e.g., "10.5 GB") and clearly labeled fields.
85///
86/// **JSON Format:**
87/// Outputs a single JSON object with fields:
88/// - `path`: Snapshot file path (string)
89/// - `version`: Format version number (integer)
90/// - `compression`: Compression algorithm ("Lz4" or "Zstd")
91/// - `block_size`: Block size in bytes (integer)
92/// - `encrypted`: Encryption status (boolean)
93/// - `has_disk`: Primary stream present (boolean)
94/// - `has_memory`: Secondary stream present (boolean)
95/// - `variable_blocks`: CDC chunking enabled (boolean)
96/// - `original_size`: Uncompressed size in bytes (integer)
97/// - `compressed_size`: File size in bytes (integer)
98/// - `compression_ratio`: Compression multiplier (float)
99/// - `index_offset`: Master index byte offset (integer)
100/// - `primary_pages`: Number of disk index pages (integer)
101/// - `secondary_pages`: Number of memory index pages (integer)
102///
103/// # Errors
104///
105/// Returns an error if:
106/// - The snapshot file cannot be opened (file not found, permission denied)
107/// - The header cannot be read (file too small, I/O error)
108/// - The header format is invalid (corrupted file, wrong format)
109/// - The master index cannot be read (corrupted index, truncated file)
110/// - The master index format is invalid (version mismatch, corrupted data)
111///
112/// # Examples
113///
114/// ```no_run
115/// use std::path::PathBuf;
116/// use hexz_cli::cmd::data::inspect;
117///
118/// // Display human-readable snapshot information
119/// inspect::run(PathBuf::from("snapshot.hxz"), false)?;
120///
121/// // Output JSON for automated processing
122/// inspect::run(PathBuf::from("snapshot.hxz"), true)?;
123/// # Ok::<(), anyhow::Error>(())
124/// ```
125pub fn run(snap: PathBuf, json: bool) -> Result<()> {
126 // Note: inspect_snapshot in hexz_core needs to parse the full index
127 // to return the block_stats every time.
128 let info = inspect_snapshot(&snap).context("Failed to inspect snapshot")?;
129
130 let total_uncompressed = info.total_uncompressed();
131 let ratio = info.compression_ratio();
132
133 if json {
134 // Output machine-readable JSON
135 let out = serde_json::json!({
136 "path": snap,
137 "version": info.version,
138 "compression": info.compression,
139 "block_size": info.block_size,
140 "encrypted": info.encrypted,
141 "has_primary": info.has_primary,
142 "has_secondary": info.has_secondary,
143 "variable_blocks": info.variable_blocks,
144 "original_size": total_uncompressed,
145 "compressed_size": info.file_size,
146 "compression_ratio": ratio,
147 "index_offset": info.index_offset,
148 "primary_pages": info.primary_pages,
149 "secondary_pages": info.secondary_pages,
150 "parent_paths": info.parent_paths,
151 "metadata": info.metadata,
152 "block_stats": info.block_stats,
153 });
154
155 println!("{}", serde_json::to_string_pretty(&out)?);
156 } else {
157 // Output human-readable text
158 println!("Snapshot: {:?}", snap);
159 println!("Format Version: {}", info.version);
160 println!("Compression: {:?}", info.compression);
161 println!("Block Size: {}", HumanBytes(info.block_size as u64));
162
163 println!("\n--- Features ---");
164 println!(
165 "Encrypted: {}",
166 if info.encrypted { "Yes" } else { "No" }
167 );
168 println!(
169 "Primary Stream: {}",
170 if info.has_primary { "Yes" } else { "No" }
171 );
172 println!(
173 "Secondary Strm: {}",
174 if info.has_secondary { "Yes" } else { "No" }
175 );
176 println!(
177 "Variable Blks: {}",
178 if info.variable_blocks {
179 "Yes (CDC)"
180 } else {
181 "No"
182 }
183 );
184
185 println!("\n--- Storage Statistics ---");
186 println!("Original Size: {}", HumanBytes(total_uncompressed));
187 println!("Compressed: {}", HumanBytes(info.file_size));
188 println!("Ratio: {:.2}x", ratio);
189
190 println!("\n--- Index Details ---");
191 println!("Index Offset: {}", info.index_offset);
192 println!("Primary Pages: {}", info.primary_pages);
193 println!("Secondary Pgs: {}", info.secondary_pages);
194
195 println!("\n--- Lineage & Metadata ---");
196 if info.parent_paths.is_empty() {
197 println!("Parent Links: None (Standalone)");
198 } else {
199 for (i, p) in info.parent_paths.iter().enumerate() {
200 if i == 0 {
201 println!("Parent Links: {}", p);
202 } else {
203 println!(" {}", p);
204 }
205 }
206 }
207
208 if let Some(meta) = &info.metadata {
209 println!("Metadata: {}", meta);
210 } else {
211 println!("Metadata: None");
212 }
213
214 if let Some(stats) = &info.block_stats {
215 println!("\n--- Deduplication Breakdown (Primary) ---");
216 println!(
217 "Data Blocks: {} ({})",
218 stats.data_blocks,
219 HumanBytes(stats.data_bytes)
220 );
221 println!(
222 "Parent Refs: {} ({})",
223 stats.parent_ref_blocks,
224 HumanBytes(stats.parent_ref_bytes)
225 );
226 println!(
227 "Zero Blocks: {} ({})",
228 stats.zero_blocks,
229 HumanBytes(stats.zero_bytes)
230 );
231 }
232 }
233
234 Ok(())
235}