Skip to main content

hexz_cli/cmd/vm/
snap.rs

1//! Live snapshot creation via QEMU QMP (QEMU Machine Protocol).
2//!
3//! This command creates a live snapshot of a running VM by connecting to its
4//! QMP control socket, pausing the VM, dumping memory state via QEMU's migration
5//! mechanism, and then creating a new snapshot that includes both the overlay
6//! (modified disk blocks) and the captured memory dump.
7//!
8//! # QMP Protocol Integration
9//!
10//! QEMU Machine Protocol (QMP) is a JSON-based protocol for controlling QEMU:
11//!
12//! **Connection Sequence:**
13//! 1. Connect to Unix socket (created with `--qmp-socket` during boot)
14//! 2. Receive QMP greeting banner
15//! 3. Send `qmp_capabilities` to negotiate features
16//! 4. Send commands and receive JSON responses
17//!
18//! **Commands Used:**
19//! - `stop`: Pauses VM execution (sets state to "paused")
20//! - `migrate`: Triggers memory dump to file via `exec:cat > <path>`
21//! - `query-migrate`: Polls migration status ("active", "completed", "failed")
22//! - `cont`: Resumes VM execution after snapshot is complete
23//!
24//! **QMP Message Format:**
25//! ```json
26//! // Command (sent by client)
27//! {"execute": "stop"}
28//!
29//! // Response (received from QEMU)
30//! {"return": {}}
31//!
32//! // Status query response
33//! {"return": {"status": "completed", "total": 4294967296}}
34//! ```
35//!
36//! # Snapshot Format
37//!
38//! The live snapshot captures both persistent and volatile state:
39//!
40//! **Disk State (from overlay):**
41//! - Modified blocks since VM boot
42//! - 4 KiB granularity tracked in `.meta` file
43//! - Merged with base snapshot during commit
44//!
45//! **Memory State (from QEMU migration):**
46//! - Full RAM dump in QEMU migration format
47//! - Includes CPU registers, device state, page tables
48//! - Compressed with LZ4 by default for fast resume
49//!
50//! The resulting snapshot is a "thick" snapshot (default) containing all state
51//! needed to resume the VM independently of the base snapshot.
52//!
53//! # Use Cases
54//!
55//! - **Checkpoint and Restore**: Save VM state for later resume
56//! - **Testing and Development**: Create snapshots before risky operations
57//! - **Migration**: Capture running VM for transfer to another host
58//! - **Debugging**: Preserve exact VM state for post-mortem analysis
59//! - **Backup**: Create consistent backups while VM is running
60//!
61//! # Workflow
62//!
63//! 1. **Connect to QMP**: Opens Unix socket to running QEMU instance
64//! 2. **Negotiate Capabilities**: Establishes QMP protocol version
65//! 3. **Pause VM**: Sends `stop` command to freeze execution
66//! 4. **Dump Memory**: Uses `migrate` command with `exec:` URI to save RAM
67//! 5. **Poll Status**: Repeatedly checks migration progress until complete
68//! 6. **Create Snapshot**: Calls `commit` to merge overlay + memory
69//! 7. **Resume VM**: Sends `cont` command to unpause execution
70//!
71//! # Performance Characteristics
72//!
73//! - **Pause Time**: Typically 50-200 ms for `stop` command
74//! - **Memory Dump**: ~500-1000 MB/s (depends on storage bandwidth)
75//! - **Snapshot Creation**: ~200-500 MB/s (LZ4 compression)
76//! - **Total Downtime**: Typically 2-10 seconds for 4-8 GB VM
77//!
78//! # Error Handling
79//!
80//! If snapshot creation fails after pausing the VM, the command:
81//! 1. Attempts to resume the VM with `cont` command
82//! 2. Returns the snapshot error to the caller
83//! 3. Leaves overlay files intact for retry
84//!
85//! This ensures the VM is not left in a paused state even on failure.
86//!
87//! # Common Usage Patterns
88//!
89//! ```bash
90//! # Create live snapshot of running VM
91//! hexz vm snap \
92//!   --socket /tmp/vm.qmp \
93//!   --overlay vm-state.overlay \
94//!   --base vm-base.st \
95//!   --output vm-checkpoint.st
96//!
97//! # Resume from snapshot later
98//! hexz vm boot vm-checkpoint.st --ram 4G
99//! ```
100
101use anyhow::{Context, Result};
102use serde_json::Value;
103use std::io::{Read, Write};
104use std::os::unix::net::UnixStream;
105use std::path::PathBuf;
106use std::thread;
107use std::time::Duration;
108use tempfile::NamedTempFile;
109
110use crate::cmd::vm::commit;
111
112/// Interval between QMP status polls while waiting for migration (500 ms).
113///
114/// **Architectural intent:** Balances responsiveness with QMP socket load.
115/// Polling too frequently adds overhead; polling too slowly delays completion detection.
116const QMP_POLL_SLEEP: Duration = Duration::from_millis(500);
117
118/// Buffer size for QMP socket reads (4 KiB).
119///
120/// **Architectural intent:** Large enough for typical QMP responses but small
121/// enough to avoid excessive memory allocation. QMP responses are usually <1 KiB.
122const QMP_BUFFER_SIZE: usize = 4096;
123
124/// Default block size for snapshot commit operations (64 KiB).
125///
126/// **Architectural intent:** Matches the standard block size used by `pack` and
127/// `build` commands for consistency across snapshot formats.
128const DEFAULT_COMMIT_BLOCK_SIZE: u32 = 65536;
129
130/// Executes the live snapshot command via QMP.
131///
132/// Connects to a running QEMU instance via its QMP socket, pauses execution,
133/// dumps memory state to a temporary file, creates a new snapshot that merges
134/// the overlay and memory dump, and resumes execution. This enables capturing
135/// a consistent point-in-time snapshot without shutting down the VM.
136///
137/// # Arguments
138///
139/// * `socket_path` - Path to the QMP Unix socket (created with `--qmp-socket`)
140/// * `overlay_path` - Path to the overlay file containing modified disk blocks
141/// * `base_hexz_path` - Path to the base snapshot the VM was booted from
142/// * `output_path` - Path for the output snapshot file
143///
144/// # QMP Command Sequence
145///
146/// 1. Connect to `socket_path` and read greeting
147/// 2. Send `qmp_capabilities` and wait for acknowledgment
148/// 3. Send `stop` to pause VM
149/// 4. Send `migrate` with URI `exec:cat > <temp_file>`
150/// 5. Poll `query-migrate` until status is "completed" or "failed"
151/// 6. Call `commit::run()` to create snapshot with overlay + memory
152/// 7. Send `cont` to resume VM (even if commit fails)
153///
154/// # Snapshot Parameters
155///
156/// The snapshot is created with:
157/// - Compression: LZ4 (fast decompression for quick resume)
158/// - Block size: 64 KiB (default)
159/// - Dictionary training: Enabled (improves memory compression)
160/// - Thin mode: Disabled (creates standalone snapshot)
161///
162/// # Errors
163///
164/// Returns an error if:
165/// - QMP socket cannot be connected (VM not running or socket path wrong)
166/// - QMP commands fail (protocol error, QEMU internal error)
167/// - Memory migration fails (disk full, I/O error)
168/// - Commit operation fails (compression error, write failure)
169///
170/// Note: VM resume is attempted even if errors occur, to prevent leaving
171/// the VM in a paused state.
172///
173/// # Examples
174///
175/// ```no_run
176/// use std::path::PathBuf;
177/// use hexz_cli::cmd::vm::snap;
178///
179/// // Create live snapshot of running VM
180/// snap::run(
181///     PathBuf::from("/tmp/vm.qmp"),
182///     PathBuf::from("vm-state.overlay"),
183///     PathBuf::from("vm-base.hxz"),
184///     PathBuf::from("vm-checkpoint.hxz"),
185/// )?;
186/// # Ok::<(), anyhow::Error>(())
187/// ```
188pub fn run(
189    socket_path: PathBuf,
190    overlay_path: PathBuf,
191    base_hexz_path: PathBuf,
192    output_path: PathBuf,
193) -> Result<()> {
194    println!("Connecting to VM at {:?}", socket_path);
195    let mut stream = UnixStream::connect(&socket_path)
196        .context("Failed to connect to QMP socket. Is the VM running with --qmp-socket?")?;
197
198    read_response(&mut stream)?;
199    send_command(&mut stream, "qmp_capabilities", None)?;
200    read_response(&mut stream)?;
201
202    println!("Pausing VM...");
203    send_command(&mut stream, "stop", None)?;
204    read_response(&mut stream)?;
205
206    println!("Dumping Guest Memory...");
207    let mem_dump = NamedTempFile::new()?;
208    let mem_path = mem_dump.path().to_str().unwrap().to_string();
209
210    let migrate_cmd = format!("exec:cat > '{}'", mem_path.replace('\'', "'\\''"));
211    let args = serde_json::json!({ "uri": migrate_cmd });
212
213    send_command(&mut stream, "migrate", Some(args))?;
214    read_response(&mut stream)?;
215
216    print!("Saving memory");
217    loop {
218        send_command(&mut stream, "query-migrate", None)?;
219        let resp = read_response(&mut stream)?;
220
221        if let Some(status) = resp["return"]["status"].as_str() {
222            if status == "completed" {
223                println!(" Done.");
224                break;
225            } else if status == "failed" {
226                println!(" Memory dump failed. Resuming VM...");
227                send_command(&mut stream, "cont", None)?;
228                anyhow::bail!("Memory dump failed: {:?}", resp);
229            }
230        }
231        print!(".");
232        std::io::stdout().flush()?;
233        thread::sleep(QMP_POLL_SLEEP);
234    }
235
236    println!("Creating snapshot...");
237
238    let commit_result = commit::run(
239        base_hexz_path,
240        overlay_path,
241        Some(mem_dump.path().to_path_buf()),
242        output_path,
243        "lz4".to_string(),
244        DEFAULT_COMMIT_BLOCK_SIZE,
245        true,
246        None,
247        false, // Default to thick snapshot for live snaps
248    );
249
250    println!("Resuming VM...");
251    let resume_result = send_command(&mut stream, "cont", None);
252    let _ = read_response(&mut stream);
253
254    commit_result.context("Snapshot commit failed")?;
255    resume_result.context("Failed to resume VM")?;
256
257    Ok(())
258}
259
260/// Sends a QMP command to the QEMU instance.
261///
262/// Constructs a QMP command JSON object with the specified command name and
263/// optional arguments, serializes it, and writes it to the QMP socket.
264///
265/// # Arguments
266///
267/// * `stream` - Mutable reference to the connected QMP Unix socket
268/// * `cmd` - QMP command name (e.g., "stop", "cont", "query-migrate")
269/// * `args` - Optional JSON object containing command arguments
270///
271/// # QMP Command Format
272///
273/// ```json
274/// {"execute": "command_name"}
275/// {"execute": "command_name", "arguments": {...}}
276/// ```
277///
278/// # Errors
279///
280/// Returns an error if:
281/// - JSON serialization fails (should never happen with valid commands)
282/// - Socket write fails (connection closed, I/O error)
283fn send_command(stream: &mut UnixStream, cmd: &str, args: Option<Value>) -> Result<()> {
284    let mut json = serde_json::json!({
285        "execute": cmd
286    });
287    if let Some(a) = args {
288        json["arguments"] = a;
289    }
290    let data = serde_json::to_string(&json)?;
291    stream.write_all(data.as_bytes())?;
292    Ok(())
293}
294
295/// Reads a QMP response from the QEMU instance.
296///
297/// Reads data from the QMP socket, parses line-delimited JSON, and returns the
298/// first object containing a "return" field. This filters out QMP events and
299/// focuses on command responses.
300///
301/// # Arguments
302///
303/// * `stream` - Mutable reference to the connected QMP Unix socket
304///
305/// # Response Handling
306///
307/// QMP sends line-delimited JSON. Each line can be:
308/// - Command response: `{"return": {...}}`
309/// - Event notification: `{"event": "...", "data": {...}}`
310/// - Error response: `{"error": {...}}`
311///
312/// This function returns the first line with a "return" field, which corresponds
313/// to the most recent command's response.
314///
315/// # Errors
316///
317/// Returns an error if:
318/// - Socket read fails (connection closed, I/O error)
319/// - JSON parsing fails (malformed QMP response)
320///
321/// Note: If no response with "return" is found, returns empty JSON object `{}`.
322fn read_response(stream: &mut UnixStream) -> Result<Value> {
323    let mut buf = [0u8; QMP_BUFFER_SIZE];
324    let n = stream.read(&mut buf)?;
325    let s = String::from_utf8_lossy(&buf[..n]);
326
327    for line in s.lines() {
328        if let Ok(val) = serde_json::from_str::<Value>(line)
329            && val.get("return").is_some()
330        {
331            return Ok(val);
332        }
333    }
334    Ok(serde_json::json!({}))
335}
336
337#[cfg(test)]
338mod tests {
339    use super::*;
340    use std::os::unix::net::UnixStream;
341
342    /// Test send_command with a simple command (no arguments).
343    #[test]
344    fn test_send_command_no_args() {
345        let (mut client, mut server) = UnixStream::pair().unwrap();
346        send_command(&mut client, "stop", None).unwrap();
347
348        let mut buf = [0u8; 1024];
349        let n = server.read(&mut buf).unwrap();
350        let sent: serde_json::Value = serde_json::from_slice(&buf[..n]).unwrap();
351
352        assert_eq!(sent["execute"], "stop");
353        assert!(sent.get("arguments").is_none());
354    }
355
356    /// Test send_command with arguments.
357    #[test]
358    fn test_send_command_with_args() {
359        let (mut client, mut server) = UnixStream::pair().unwrap();
360        let args = serde_json::json!({ "uri": "exec:cat > /tmp/mem.bin" });
361        send_command(&mut client, "migrate", Some(args)).unwrap();
362
363        let mut buf = [0u8; 1024];
364        let n = server.read(&mut buf).unwrap();
365        let sent: serde_json::Value = serde_json::from_slice(&buf[..n]).unwrap();
366
367        assert_eq!(sent["execute"], "migrate");
368        assert_eq!(sent["arguments"]["uri"], "exec:cat > /tmp/mem.bin");
369    }
370
371    /// Test send_command with qmp_capabilities (initial handshake).
372    #[test]
373    fn test_send_command_qmp_capabilities() {
374        let (mut client, mut server) = UnixStream::pair().unwrap();
375        send_command(&mut client, "qmp_capabilities", None).unwrap();
376
377        let mut buf = [0u8; 1024];
378        let n = server.read(&mut buf).unwrap();
379        let sent: serde_json::Value = serde_json::from_slice(&buf[..n]).unwrap();
380
381        assert_eq!(sent["execute"], "qmp_capabilities");
382    }
383
384    /// Test send_command with cont (resume).
385    #[test]
386    fn test_send_command_cont() {
387        let (mut client, mut server) = UnixStream::pair().unwrap();
388        send_command(&mut client, "cont", None).unwrap();
389
390        let mut buf = [0u8; 1024];
391        let n = server.read(&mut buf).unwrap();
392        let sent: serde_json::Value = serde_json::from_slice(&buf[..n]).unwrap();
393
394        assert_eq!(sent["execute"], "cont");
395    }
396
397    /// Test read_response with a valid return response.
398    #[test]
399    fn test_read_response_with_return() {
400        let (mut client, mut server) = UnixStream::pair().unwrap();
401
402        // Write a QMP response from the "server" side
403        let response = "{\"return\": {}}\n";
404        server.write_all(response.as_bytes()).unwrap();
405        // Shut down write side so client read doesn't hang
406        server.shutdown(std::net::Shutdown::Write).unwrap();
407
408        let val = read_response(&mut client).unwrap();
409        assert!(val.get("return").is_some());
410    }
411
412    /// Test read_response with migration status response.
413    #[test]
414    fn test_read_response_migration_completed() {
415        let (mut client, mut server) = UnixStream::pair().unwrap();
416
417        let response = "{\"return\": {\"status\": \"completed\", \"total\": 4294967296}}\n";
418        server.write_all(response.as_bytes()).unwrap();
419        server.shutdown(std::net::Shutdown::Write).unwrap();
420
421        let val = read_response(&mut client).unwrap();
422        assert_eq!(val["return"]["status"], "completed");
423        assert_eq!(val["return"]["total"], 4294967296u64);
424    }
425
426    /// Test read_response with migration failed status.
427    #[test]
428    fn test_read_response_migration_failed() {
429        let (mut client, mut server) = UnixStream::pair().unwrap();
430
431        let response = "{\"return\": {\"status\": \"failed\"}}\n";
432        server.write_all(response.as_bytes()).unwrap();
433        server.shutdown(std::net::Shutdown::Write).unwrap();
434
435        let val = read_response(&mut client).unwrap();
436        assert_eq!(val["return"]["status"], "failed");
437    }
438
439    /// Test read_response with event (no "return" field) — should return empty.
440    #[test]
441    fn test_read_response_event_only() {
442        let (mut client, mut server) = UnixStream::pair().unwrap();
443
444        let response = "{\"event\": \"STOP\", \"data\": {}}\n";
445        server.write_all(response.as_bytes()).unwrap();
446        server.shutdown(std::net::Shutdown::Write).unwrap();
447
448        let val = read_response(&mut client).unwrap();
449        // No "return" field, should get empty object
450        assert!(val.get("return").is_none());
451        assert_eq!(val, serde_json::json!({}));
452    }
453
454    /// Test read_response with multiple lines — returns first with "return".
455    #[test]
456    fn test_read_response_multi_line() {
457        let (mut client, mut server) = UnixStream::pair().unwrap();
458
459        let response = "{\"event\": \"STOP\"}\n{\"return\": {\"status\": \"active\"}}\n";
460        server.write_all(response.as_bytes()).unwrap();
461        server.shutdown(std::net::Shutdown::Write).unwrap();
462
463        let val = read_response(&mut client).unwrap();
464        assert_eq!(val["return"]["status"], "active");
465    }
466
467    /// Test read_response with QMP greeting banner.
468    #[test]
469    fn test_read_response_qmp_greeting() {
470        let (mut client, mut server) = UnixStream::pair().unwrap();
471
472        // QMP greeting doesn't have "return"
473        let greeting =
474            "{\"QMP\": {\"version\": {\"qemu\": {\"micro\": 0, \"minor\": 2, \"major\": 8}}}}\n";
475        server.write_all(greeting.as_bytes()).unwrap();
476        server.shutdown(std::net::Shutdown::Write).unwrap();
477
478        let val = read_response(&mut client).unwrap();
479        assert_eq!(val, serde_json::json!({}));
480    }
481
482    /// Test send_command with query-migrate.
483    #[test]
484    fn test_send_command_query_migrate() {
485        let (mut client, mut server) = UnixStream::pair().unwrap();
486        send_command(&mut client, "query-migrate", None).unwrap();
487
488        let mut buf = [0u8; 1024];
489        let n = server.read(&mut buf).unwrap();
490        let sent: serde_json::Value = serde_json::from_slice(&buf[..n]).unwrap();
491
492        assert_eq!(sent["execute"], "query-migrate");
493    }
494}