Skip to main content

hexz_cli/cmd/data/
diff.rs

1//! Show differences in overlay and identify modified blocks.
2//!
3//! This command analyzes overlay files created by the FUSE mount (in read-write mode)
4//! to display which blocks have been modified, providing statistics about write
5//! activity and changed data. The overlay mechanism tracks writes at 4 KiB granularity,
6//! allowing efficient copy-on-write semantics without modifying the base snapshot.
7//!
8//! # Overlay Format
9//!
10//! When mounting a snapshot in read-write mode, two files are created:
11//!
12//! **Overlay File (`.overlay`):**
13//! - Contains modified 4 KiB blocks written by the VM/guest
14//! - Sparse file with blocks at their original logical offsets
15//! - Only modified blocks consume disk space
16//!
17//! **Metadata File (`.meta`):**
18//! - Contains a sorted list of modified block indices (8 bytes each)
19//! - Used to quickly enumerate changed blocks without scanning the overlay
20//! - Format: array of `u64` block numbers in little-endian encoding
21//!
22//! # Use Cases
23//!
24//! - **Change Tracking**: Identify what data has been modified during VM execution
25//! - **Incremental Commits**: Determine which blocks need to be merged into new snapshot
26//! - **Debugging**: Investigate unexpected writes or storage growth
27//! - **Capacity Planning**: Estimate commit size before running `vm commit`
28//! - **File-Level Analysis**: Map modified blocks to files (future enhancement)
29//!
30//! # Output Modes
31//!
32//! **Default Mode (Summary):**
33//! Displays basic statistics:
34//! - Total number of modified blocks
35//! - Estimated data size changed
36//!
37//! **Blocks Mode (`--blocks`):**
38//! Shows overlay statistics with human-readable sizes.
39//!
40//! **Files Mode (`--files`):**
41//! Lists individual modified block indices. File-level resolution
42//! (mapping blocks to filesystem inodes) is not yet implemented.
43//!
44//! # Comparison to Other Diff Tools
45//!
46//! Unlike traditional file diffs (e.g., `diff`, `rsync --dry-run`):
47//! - Operates at block level, not file level
48//! - Does not require mounting or filesystem parsing
49//! - Fast: reads only the small metadata file, not entire overlay
50//! - Shows raw block changes, not semantic file differences
51//!
52//! # Common Usage Patterns
53//!
54//! ```bash
55//! # Show summary of changes
56//! hexz diff overlay.img
57//!
58//! # Show detailed block statistics
59//! hexz diff overlay.img --blocks
60//!
61//! # List all modified block indices
62//! hexz diff overlay.img --files
63//!
64//! # Estimate commit size before running vm commit
65//! hexz diff vm-state.overlay --blocks
66//! # Output: "Modified Blocks: 5120 | Total Changed Data: 20.0 MB"
67//! ```
68
69use anyhow::Result;
70use hexz_common::constants::{META_ENTRY_SIZE, OVERLAY_BLOCK_SIZE};
71use indicatif::HumanBytes;
72use std::fs::File;
73use std::io::{Read, Seek, SeekFrom};
74use std::path::PathBuf;
75
76/// Executes the diff command to analyze overlay modifications.
77///
78/// Reads the overlay metadata file (`.meta`) to determine which blocks have been
79/// modified, then displays statistics about the changes. The metadata file contains
80/// a sorted array of 64-bit block indices that were written during overlay operation.
81///
82/// # Arguments
83///
84/// * `overlay` - Path to the overlay file (e.g., `vm-state.overlay`)
85/// * `blocks` - If true, display block-level statistics with human-readable sizes
86/// * `files` - If true, display individual modified block indices (file mapping not implemented)
87///
88/// # Output Format
89///
90/// **Blocks Mode:**
91/// ```text
92/// --- Overlay Statistics ---
93/// Modified Blocks: 5120
94/// Total Changed Data: 20.0 MB
95/// ```
96///
97/// **Files Mode:**
98/// ```text
99/// --- Modified Files (Heuristic) ---
100/// File resolution is not yet implemented. Use --blocks for raw stats.
101/// Modified Block Indices:
102///   Block 128
103///   Block 129
104///   Block 256
105///   ...
106/// ```
107///
108/// **Default Mode:**
109/// ```text
110/// Overlay: "vm-state.overlay"
111/// Modified Blocks: 5120
112/// Estimated Size: 20.0 MB
113/// ```
114///
115/// # File-Level Resolution (Future Enhancement)
116///
117/// To map block indices to files, the implementation would need to:
118/// 1. Read the base image partition table (MBR/GPT)
119/// 2. Identify the filesystem type (ext4, xfs, ntfs, etc.)
120/// 3. Parse the filesystem metadata (superblock, inode tables)
121/// 4. Map block indices to inode numbers
122/// 5. Resolve inode paths from directory entries
123///
124/// This requires filesystem-specific parsers and is left for future work.
125///
126/// # Errors
127///
128/// Returns an error if:
129/// - The overlay file does not exist (note: metadata file absence is not an error)
130/// - The metadata file cannot be opened or read
131/// - I/O errors occur while reading block indices
132///
133/// Note: If the metadata file does not exist, the command prints a message and
134/// returns successfully (interpreted as zero modifications).
135///
136/// # Examples
137///
138/// ```no_run
139/// use std::path::PathBuf;
140/// use hexz_cli::cmd::data::diff;
141///
142/// // Show summary of overlay changes
143/// diff::run(PathBuf::from("vm-state.overlay"), false, false)?;
144///
145/// // Display detailed statistics
146/// diff::run(PathBuf::from("vm-state.overlay"), true, false)?;
147///
148/// // List all modified block indices
149/// diff::run(PathBuf::from("vm-state.overlay"), false, true)?;
150/// # Ok::<(), anyhow::Error>(())
151/// ```
152pub fn run(overlay: PathBuf, blocks: bool, files: bool) -> Result<()> {
153    let meta_path = overlay.with_extension("meta");
154    if !meta_path.exists() {
155        println!("No metadata file found for overlay: {:?}", overlay);
156        return Ok(());
157    }
158
159    let mut f = File::open(&meta_path)?;
160    let len = f.metadata()?.len();
161    let count = len / META_ENTRY_SIZE as u64;
162
163    if blocks {
164        println!("--- Overlay Statistics ---");
165        println!("Modified Blocks: {}", count);
166        println!(
167            "Total Changed Data: {}",
168            HumanBytes(count * OVERLAY_BLOCK_SIZE)
169        );
170    }
171
172    if files {
173        println!("\n--- Modified Files (Heuristic) ---");
174        println!("File resolution is not yet implemented. Use --blocks for raw stats.");
175        // TODO: Future implementation:
176        // 1. Read base image MBR/GPT.
177        // 2. Identify partition.
178        // 3. Mount or parse filesystem (ext4/xfs).
179        // 4. Map block indices to file inodes.
180
181        println!("Modified Block Indices:");
182        let mut buf = [0u8; META_ENTRY_SIZE];
183        f.seek(SeekFrom::Start(0))?;
184        for _ in 0..count {
185            if f.read_exact(&mut buf).is_ok() {
186                let blk = u64::from_le_bytes(buf);
187                println!("  Block {}", blk);
188            }
189        }
190    }
191
192    if !blocks && !files {
193        // Default behavior: just show summary
194        println!("Overlay: {:?}", overlay);
195        println!("Modified Blocks: {}", count);
196        println!("Estimated Size: {}", HumanBytes(count * OVERLAY_BLOCK_SIZE));
197    }
198
199    Ok(())
200}