Skip to main content

hexz_cli/cmd/data/
inspect.rs

1//! Inspect archive metadata and display snapshot information.
2//!
3//! This command provides a detailed inspection of Hexz snapshot files (`.st`),
4//! reading the file header and master index to display metadata about the
5//! snapshot's structure, compression, encryption status, and storage statistics.
6//!
7//! # Use Cases
8//!
9//! - **Snapshot Inspection**: Verify snapshot format version and feature flags
10//! - **Compression Analysis**: Check compression algorithm and ratio achieved
11//! - **Capacity Planning**: View original vs. compressed size for storage estimates
12//! - **Debugging**: Identify snapshot corruption or format mismatches
13//! - **Automation**: JSON output mode enables scripting and tooling integration
14//!
15//! # Workflow
16//!
17//! The command performs these steps:
18//!
19//! 1. **Header Reading**: Reads the fixed-size header (4096 bytes) from file start
20//! 2. **Index Location**: Uses `index_offset` from header to locate the master index
21//! 3. **Index Parsing**: Deserializes the master index to extract page metadata
22//! 4. **Compression Ratio**: Calculates ratio from uncompressed vs. file size
23//! 5. **Output Formatting**: Renders human-readable or JSON output
24//!
25//! # Output Format
26//!
27//! The command displays:
28//!
29//! **Header Information:**
30//! - Format version (currently v1)
31//! - Compression algorithm (LZ4 or Zstd)
32//! - Block size used for chunking
33//!
34//! **Feature Flags:**
35//! - Encryption status (encrypted or plaintext)
36//! - Disk presence (whether snapshot contains disk image)
37//! - Memory presence (whether snapshot contains memory dump)
38//! - Variable blocks (whether CDC chunking was used)
39//!
40//! **Storage Statistics:**
41//! - Original size (sum of uncompressed disk + memory)
42//! - Compressed size (total file size on disk)
43//! - Compression ratio (multiplier showing space savings)
44//!
45//! **Index Details:**
46//! - Index offset in file (byte position)
47//! - Disk pages (number of index pages for primary stream)
48//! - Memory pages (number of index pages for secondary stream)
49//!
50//! # Common Usage Patterns
51//!
52//! ```bash
53//! # Inspect a snapshot with human-readable output
54//! hexz info vm-snapshot.st
55//!
56//! # Get machine-readable JSON for scripting
57//! hexz info vm-snapshot.st --json | jq .compression_ratio
58//!
59//! # Verify snapshot integrity
60//! hexz info corrupted.st  # Will fail if header is malformed
61//! ```
62
63use anyhow::{Context, Result};
64use hexz_ops::inspect::inspect_snapshot;
65use indicatif::HumanBytes;
66use std::path::PathBuf;
67
68/// Executes the info command to display snapshot metadata.
69///
70/// Reads and parses the snapshot header and master index, then displays
71/// comprehensive metadata about the snapshot's format, compression, features,
72/// and storage statistics. Output can be formatted as human-readable text or
73/// JSON for machine consumption.
74///
75/// # Arguments
76///
77/// * `snap` - Path to the `.st` snapshot file to inspect
78/// * `json` - If true, output JSON format; otherwise, human-readable format
79///
80/// # Output Details
81///
82/// **Human-Readable Format:**
83/// Displays formatted output with sections for Features and Storage Statistics,
84/// using human-friendly byte sizes (e.g., "10.5 GB") and clearly labeled fields.
85///
86/// **JSON Format:**
87/// Outputs a single JSON object with fields:
88/// - `path`: Snapshot file path (string)
89/// - `version`: Format version number (integer)
90/// - `compression`: Compression algorithm ("Lz4" or "Zstd")
91/// - `block_size`: Block size in bytes (integer)
92/// - `encrypted`: Encryption status (boolean)
93/// - `has_disk`: Primary stream present (boolean)
94/// - `has_memory`: Secondary stream present (boolean)
95/// - `variable_blocks`: CDC chunking enabled (boolean)
96/// - `original_size`: Uncompressed size in bytes (integer)
97/// - `compressed_size`: File size in bytes (integer)
98/// - `compression_ratio`: Compression multiplier (float)
99/// - `index_offset`: Master index byte offset (integer)
100/// - `primary_pages`: Number of disk index pages (integer)
101/// - `secondary_pages`: Number of memory index pages (integer)
102///
103/// # Errors
104///
105/// Returns an error if:
106/// - The snapshot file cannot be opened (file not found, permission denied)
107/// - The header cannot be read (file too small, I/O error)
108/// - The header format is invalid (corrupted file, wrong format)
109/// - The master index cannot be read (corrupted index, truncated file)
110/// - The master index format is invalid (version mismatch, corrupted data)
111///
112/// # Examples
113///
114/// ```no_run
115/// use std::path::PathBuf;
116/// use hexz_cli::cmd::data::inspect;
117///
118/// // Display human-readable snapshot information
119/// inspect::run(PathBuf::from("snapshot.hxz"), false)?;
120///
121/// // Output JSON for automated processing
122/// inspect::run(PathBuf::from("snapshot.hxz"), true)?;
123/// # Ok::<(), anyhow::Error>(())
124/// ```
125pub fn run(snap: PathBuf, json: bool) -> Result<()> {
126    // Note: inspect_snapshot in hexz_core needs to parse the full index
127    // to return the block_stats every time.
128    let info = inspect_snapshot(&snap).context("Failed to inspect snapshot")?;
129
130    let total_uncompressed = info.total_uncompressed();
131    let ratio = info.compression_ratio();
132
133    if json {
134        // Output machine-readable JSON
135        let out = serde_json::json!({
136            "path": snap,
137            "version": info.version,
138            "compression": info.compression,
139            "block_size": info.block_size,
140            "encrypted": info.encrypted,
141            "has_primary": info.has_primary,
142            "has_secondary": info.has_secondary,
143            "variable_blocks": info.variable_blocks,
144            "original_size": total_uncompressed,
145            "compressed_size": info.file_size,
146            "compression_ratio": ratio,
147            "index_offset": info.index_offset,
148            "primary_pages": info.primary_pages,
149            "secondary_pages": info.secondary_pages,
150            "parent_paths": info.parent_paths,
151            "metadata": info.metadata,
152            "block_stats": info.block_stats,
153        });
154
155        println!("{}", serde_json::to_string_pretty(&out)?);
156    } else {
157        // Output human-readable text
158        println!("Snapshot:       {:?}", snap);
159        println!("Format Version: {}", info.version);
160        println!("Compression:    {:?}", info.compression);
161        println!("Block Size:     {}", HumanBytes(info.block_size as u64));
162
163        println!("\n--- Features ---");
164        println!(
165            "Encrypted:      {}",
166            if info.encrypted { "Yes" } else { "No" }
167        );
168        println!(
169            "Primary Stream: {}",
170            if info.has_primary { "Yes" } else { "No" }
171        );
172        println!(
173            "Secondary Strm: {}",
174            if info.has_secondary { "Yes" } else { "No" }
175        );
176        println!(
177            "Variable Blks:  {}",
178            if info.variable_blocks {
179                "Yes (CDC)"
180            } else {
181                "No"
182            }
183        );
184
185        println!("\n--- Storage Statistics ---");
186        println!("Original Size:  {}", HumanBytes(total_uncompressed));
187        println!("Compressed:     {}", HumanBytes(info.file_size));
188        println!("Ratio:          {:.2}x", ratio);
189
190        println!("\n--- Index Details ---");
191        println!("Index Offset:   {}", info.index_offset);
192        println!("Primary Pages:  {}", info.primary_pages);
193        println!("Secondary Pgs:  {}", info.secondary_pages);
194
195        println!("\n--- Lineage & Metadata ---");
196        if info.parent_paths.is_empty() {
197            println!("Parent Links:   None (Standalone)");
198        } else {
199            for (i, p) in info.parent_paths.iter().enumerate() {
200                if i == 0 {
201                    println!("Parent Links:   {}", p);
202                } else {
203                    println!("                {}", p);
204                }
205            }
206        }
207
208        if let Some(meta) = &info.metadata {
209            println!("Metadata:       {}", meta);
210        } else {
211            println!("Metadata:       None");
212        }
213
214        if let Some(stats) = &info.block_stats {
215            println!("\n--- Deduplication Breakdown (Primary) ---");
216            println!(
217                "Data Blocks:    {} ({})",
218                stats.data_blocks,
219                HumanBytes(stats.data_bytes)
220            );
221            println!(
222                "Parent Refs:    {} ({})",
223                stats.parent_ref_blocks,
224                HumanBytes(stats.parent_ref_bytes)
225            );
226            println!(
227                "Zero Blocks:    {} ({})",
228                stats.zero_blocks,
229                HumanBytes(stats.zero_bytes)
230            );
231        }
232    }
233
234    Ok(())
235}