hexz_cli/cmd/vm/snap.rs
1//! Live snapshot creation via QEMU QMP (QEMU Machine Protocol).
2//!
3//! This command creates a live snapshot of a running VM by connecting to its
4//! QMP control socket, pausing the VM, dumping memory state via QEMU's migration
5//! mechanism, and then creating a new snapshot that includes both the overlay
6//! (modified disk blocks) and the captured memory dump.
7//!
8//! # QMP Protocol Integration
9//!
10//! QEMU Machine Protocol (QMP) is a JSON-based protocol for controlling QEMU:
11//!
12//! **Connection Sequence:**
13//! 1. Connect to Unix socket (created with `--qmp-socket` during boot)
14//! 2. Receive QMP greeting banner
15//! 3. Send `qmp_capabilities` to negotiate features
16//! 4. Send commands and receive JSON responses
17//!
18//! **Commands Used:**
19//! - `stop`: Pauses VM execution (sets state to "paused")
20//! - `migrate`: Triggers memory dump to file via `exec:cat > <path>`
21//! - `query-migrate`: Polls migration status ("active", "completed", "failed")
22//! - `cont`: Resumes VM execution after snapshot is complete
23//!
24//! **QMP Message Format:**
25//! ```json
26//! // Command (sent by client)
27//! {"execute": "stop"}
28//!
29//! // Response (received from QEMU)
30//! {"return": {}}
31//!
32//! // Status query response
33//! {"return": {"status": "completed", "total": 4294967296}}
34//! ```
35//!
36//! # Snapshot Format
37//!
38//! The live snapshot captures both persistent and volatile state:
39//!
40//! **Disk State (from overlay):**
41//! - Modified blocks since VM boot
42//! - 4 KiB granularity tracked in `.meta` file
43//! - Merged with base snapshot during commit
44//!
45//! **Memory State (from QEMU migration):**
46//! - Full RAM dump in QEMU migration format
47//! - Includes CPU registers, device state, page tables
48//! - Compressed with LZ4 by default for fast resume
49//!
50//! The resulting snapshot is a "thick" snapshot (default) containing all state
51//! needed to resume the VM independently of the base snapshot.
52//!
53//! # Use Cases
54//!
55//! - **Checkpoint and Restore**: Save VM state for later resume
56//! - **Testing and Development**: Create snapshots before risky operations
57//! - **Migration**: Capture running VM for transfer to another host
58//! - **Debugging**: Preserve exact VM state for post-mortem analysis
59//! - **Backup**: Create consistent backups while VM is running
60//!
61//! # Workflow
62//!
63//! 1. **Connect to QMP**: Opens Unix socket to running QEMU instance
64//! 2. **Negotiate Capabilities**: Establishes QMP protocol version
65//! 3. **Pause VM**: Sends `stop` command to freeze execution
66//! 4. **Dump Memory**: Uses `migrate` command with `exec:` URI to save RAM
67//! 5. **Poll Status**: Repeatedly checks migration progress until complete
68//! 6. **Create Snapshot**: Calls `commit` to merge overlay + memory
69//! 7. **Resume VM**: Sends `cont` command to unpause execution
70//!
71//! # Performance Characteristics
72//!
73//! - **Pause Time**: Typically 50-200 ms for `stop` command
74//! - **Memory Dump**: ~500-1000 MB/s (depends on storage bandwidth)
75//! - **Snapshot Creation**: ~200-500 MB/s (LZ4 compression)
76//! - **Total Downtime**: Typically 2-10 seconds for 4-8 GB VM
77//!
78//! # Error Handling
79//!
80//! If snapshot creation fails after pausing the VM, the command:
81//! 1. Attempts to resume the VM with `cont` command
82//! 2. Returns the snapshot error to the caller
83//! 3. Leaves overlay files intact for retry
84//!
85//! This ensures the VM is not left in a paused state even on failure.
86//!
87//! # Common Usage Patterns
88//!
89//! ```bash
90//! # Create live snapshot of running VM
91//! hexz vm snap \
92//! --socket /tmp/vm.qmp \
93//! --overlay vm-state.overlay \
94//! --base vm-base.st \
95//! --output vm-checkpoint.st
96//!
97//! # Resume from snapshot later
98//! hexz vm boot vm-checkpoint.st --ram 4G
99//! ```
100
101use anyhow::{Context, Result};
102use serde_json::Value;
103use std::io::{Read, Write};
104use std::os::unix::net::UnixStream;
105use std::path::PathBuf;
106use std::thread;
107use std::time::Duration;
108use tempfile::NamedTempFile;
109
110use crate::cmd::vm::commit;
111
112/// Interval between QMP status polls while waiting for migration (500 ms).
113///
114/// **Architectural intent:** Balances responsiveness with QMP socket load.
115/// Polling too frequently adds overhead; polling too slowly delays completion detection.
116const QMP_POLL_SLEEP: Duration = Duration::from_millis(500);
117
118/// Buffer size for QMP socket reads (4 KiB).
119///
120/// **Architectural intent:** Large enough for typical QMP responses but small
121/// enough to avoid excessive memory allocation. QMP responses are usually <1 KiB.
122const QMP_BUFFER_SIZE: usize = 4096;
123
124/// Default block size for snapshot commit operations (64 KiB).
125///
126/// **Architectural intent:** Matches the standard block size used by `pack` and
127/// `build` commands for consistency across snapshot formats.
128const DEFAULT_COMMIT_BLOCK_SIZE: u32 = 65536;
129
130/// Executes the live snapshot command via QMP.
131///
132/// Connects to a running QEMU instance via its QMP socket, pauses execution,
133/// dumps memory state to a temporary file, creates a new snapshot that merges
134/// the overlay and memory dump, and resumes execution. This enables capturing
135/// a consistent point-in-time snapshot without shutting down the VM.
136///
137/// # Arguments
138///
139/// * `socket_path` - Path to the QMP Unix socket (created with `--qmp-socket`)
140/// * `overlay_path` - Path to the overlay file containing modified disk blocks
141/// * `base_hexz_path` - Path to the base snapshot the VM was booted from
142/// * `output_path` - Path for the output snapshot file
143///
144/// # QMP Command Sequence
145///
146/// 1. Connect to `socket_path` and read greeting
147/// 2. Send `qmp_capabilities` and wait for acknowledgment
148/// 3. Send `stop` to pause VM
149/// 4. Send `migrate` with URI `exec:cat > <temp_file>`
150/// 5. Poll `query-migrate` until status is "completed" or "failed"
151/// 6. Call `commit::run()` to create snapshot with overlay + memory
152/// 7. Send `cont` to resume VM (even if commit fails)
153///
154/// # Snapshot Parameters
155///
156/// The snapshot is created with:
157/// - Compression: LZ4 (fast decompression for quick resume)
158/// - Block size: 64 KiB (default)
159/// - Dictionary training: Enabled (improves memory compression)
160/// - Thin mode: Disabled (creates standalone snapshot)
161///
162/// # Errors
163///
164/// Returns an error if:
165/// - QMP socket cannot be connected (VM not running or socket path wrong)
166/// - QMP commands fail (protocol error, QEMU internal error)
167/// - Memory migration fails (disk full, I/O error)
168/// - Commit operation fails (compression error, write failure)
169///
170/// Note: VM resume is attempted even if errors occur, to prevent leaving
171/// the VM in a paused state.
172///
173/// # Examples
174///
175/// ```no_run
176/// use std::path::PathBuf;
177/// use hexz_cli::cmd::vm::snap;
178///
179/// // Create live snapshot of running VM
180/// snap::run(
181/// PathBuf::from("/tmp/vm.qmp"),
182/// PathBuf::from("vm-state.overlay"),
183/// PathBuf::from("vm-base.hxz"),
184/// PathBuf::from("vm-checkpoint.hxz"),
185/// )?;
186/// # Ok::<(), anyhow::Error>(())
187/// ```
188pub fn run(
189 socket_path: PathBuf,
190 overlay_path: PathBuf,
191 base_hexz_path: PathBuf,
192 output_path: PathBuf,
193) -> Result<()> {
194 println!("Connecting to VM at {:?}", socket_path);
195 let mut stream = UnixStream::connect(&socket_path)
196 .context("Failed to connect to QMP socket. Is the VM running with --qmp-socket?")?;
197
198 read_response(&mut stream)?;
199 send_command(&mut stream, "qmp_capabilities", None)?;
200 read_response(&mut stream)?;
201
202 println!("Pausing VM...");
203 send_command(&mut stream, "stop", None)?;
204 read_response(&mut stream)?;
205
206 println!("Dumping Guest Memory...");
207 let mem_dump = NamedTempFile::new()?;
208 let mem_path = mem_dump.path().to_str().unwrap().to_string();
209
210 let migrate_cmd = format!("exec:cat > '{}'", mem_path.replace('\'', "'\\''"));
211 let args = serde_json::json!({ "uri": migrate_cmd });
212
213 send_command(&mut stream, "migrate", Some(args))?;
214 read_response(&mut stream)?;
215
216 print!("Saving memory");
217 loop {
218 send_command(&mut stream, "query-migrate", None)?;
219 let resp = read_response(&mut stream)?;
220
221 if let Some(status) = resp["return"]["status"].as_str() {
222 if status == "completed" {
223 println!(" Done.");
224 break;
225 } else if status == "failed" {
226 println!(" Memory dump failed. Resuming VM...");
227 send_command(&mut stream, "cont", None)?;
228 anyhow::bail!("Memory dump failed: {:?}", resp);
229 }
230 }
231 print!(".");
232 std::io::stdout().flush()?;
233 thread::sleep(QMP_POLL_SLEEP);
234 }
235
236 println!("Creating snapshot...");
237
238 let commit_result = commit::run(
239 base_hexz_path,
240 overlay_path,
241 Some(mem_dump.path().to_path_buf()),
242 output_path,
243 "lz4".to_string(),
244 DEFAULT_COMMIT_BLOCK_SIZE,
245 true,
246 None,
247 false, // Default to thick snapshot for live snaps
248 );
249
250 println!("Resuming VM...");
251 let resume_result = send_command(&mut stream, "cont", None);
252 let _ = read_response(&mut stream);
253
254 commit_result.context("Snapshot commit failed")?;
255 resume_result.context("Failed to resume VM")?;
256
257 Ok(())
258}
259
260/// Sends a QMP command to the QEMU instance.
261///
262/// Constructs a QMP command JSON object with the specified command name and
263/// optional arguments, serializes it, and writes it to the QMP socket.
264///
265/// # Arguments
266///
267/// * `stream` - Mutable reference to the connected QMP Unix socket
268/// * `cmd` - QMP command name (e.g., "stop", "cont", "query-migrate")
269/// * `args` - Optional JSON object containing command arguments
270///
271/// # QMP Command Format
272///
273/// ```json
274/// {"execute": "command_name"}
275/// {"execute": "command_name", "arguments": {...}}
276/// ```
277///
278/// # Errors
279///
280/// Returns an error if:
281/// - JSON serialization fails (should never happen with valid commands)
282/// - Socket write fails (connection closed, I/O error)
283fn send_command(stream: &mut UnixStream, cmd: &str, args: Option<Value>) -> Result<()> {
284 let mut json = serde_json::json!({
285 "execute": cmd
286 });
287 if let Some(a) = args {
288 json["arguments"] = a;
289 }
290 let data = serde_json::to_string(&json)?;
291 stream.write_all(data.as_bytes())?;
292 Ok(())
293}
294
295/// Reads a QMP response from the QEMU instance.
296///
297/// Reads data from the QMP socket, parses line-delimited JSON, and returns the
298/// first object containing a "return" field. This filters out QMP events and
299/// focuses on command responses.
300///
301/// # Arguments
302///
303/// * `stream` - Mutable reference to the connected QMP Unix socket
304///
305/// # Response Handling
306///
307/// QMP sends line-delimited JSON. Each line can be:
308/// - Command response: `{"return": {...}}`
309/// - Event notification: `{"event": "...", "data": {...}}`
310/// - Error response: `{"error": {...}}`
311///
312/// This function returns the first line with a "return" field, which corresponds
313/// to the most recent command's response.
314///
315/// # Errors
316///
317/// Returns an error if:
318/// - Socket read fails (connection closed, I/O error)
319/// - JSON parsing fails (malformed QMP response)
320///
321/// Note: If no response with "return" is found, returns empty JSON object `{}`.
322fn read_response(stream: &mut UnixStream) -> Result<Value> {
323 let mut buf = [0u8; QMP_BUFFER_SIZE];
324 let n = stream.read(&mut buf)?;
325 let s = String::from_utf8_lossy(&buf[..n]);
326
327 for line in s.lines() {
328 if let Ok(val) = serde_json::from_str::<Value>(line)
329 && val.get("return").is_some()
330 {
331 return Ok(val);
332 }
333 }
334 Ok(serde_json::json!({}))
335}
336
337#[cfg(test)]
338mod tests {
339 use super::*;
340 use std::os::unix::net::UnixStream;
341
342 /// Test send_command with a simple command (no arguments).
343 #[test]
344 fn test_send_command_no_args() {
345 let (mut client, mut server) = UnixStream::pair().unwrap();
346 send_command(&mut client, "stop", None).unwrap();
347
348 let mut buf = [0u8; 1024];
349 let n = server.read(&mut buf).unwrap();
350 let sent: serde_json::Value = serde_json::from_slice(&buf[..n]).unwrap();
351
352 assert_eq!(sent["execute"], "stop");
353 assert!(sent.get("arguments").is_none());
354 }
355
356 /// Test send_command with arguments.
357 #[test]
358 fn test_send_command_with_args() {
359 let (mut client, mut server) = UnixStream::pair().unwrap();
360 let args = serde_json::json!({ "uri": "exec:cat > /tmp/mem.bin" });
361 send_command(&mut client, "migrate", Some(args)).unwrap();
362
363 let mut buf = [0u8; 1024];
364 let n = server.read(&mut buf).unwrap();
365 let sent: serde_json::Value = serde_json::from_slice(&buf[..n]).unwrap();
366
367 assert_eq!(sent["execute"], "migrate");
368 assert_eq!(sent["arguments"]["uri"], "exec:cat > /tmp/mem.bin");
369 }
370
371 /// Test send_command with qmp_capabilities (initial handshake).
372 #[test]
373 fn test_send_command_qmp_capabilities() {
374 let (mut client, mut server) = UnixStream::pair().unwrap();
375 send_command(&mut client, "qmp_capabilities", None).unwrap();
376
377 let mut buf = [0u8; 1024];
378 let n = server.read(&mut buf).unwrap();
379 let sent: serde_json::Value = serde_json::from_slice(&buf[..n]).unwrap();
380
381 assert_eq!(sent["execute"], "qmp_capabilities");
382 }
383
384 /// Test send_command with cont (resume).
385 #[test]
386 fn test_send_command_cont() {
387 let (mut client, mut server) = UnixStream::pair().unwrap();
388 send_command(&mut client, "cont", None).unwrap();
389
390 let mut buf = [0u8; 1024];
391 let n = server.read(&mut buf).unwrap();
392 let sent: serde_json::Value = serde_json::from_slice(&buf[..n]).unwrap();
393
394 assert_eq!(sent["execute"], "cont");
395 }
396
397 /// Test read_response with a valid return response.
398 #[test]
399 fn test_read_response_with_return() {
400 let (mut client, mut server) = UnixStream::pair().unwrap();
401
402 // Write a QMP response from the "server" side
403 let response = "{\"return\": {}}\n";
404 server.write_all(response.as_bytes()).unwrap();
405 // Shut down write side so client read doesn't hang
406 server.shutdown(std::net::Shutdown::Write).unwrap();
407
408 let val = read_response(&mut client).unwrap();
409 assert!(val.get("return").is_some());
410 }
411
412 /// Test read_response with migration status response.
413 #[test]
414 fn test_read_response_migration_completed() {
415 let (mut client, mut server) = UnixStream::pair().unwrap();
416
417 let response = "{\"return\": {\"status\": \"completed\", \"total\": 4294967296}}\n";
418 server.write_all(response.as_bytes()).unwrap();
419 server.shutdown(std::net::Shutdown::Write).unwrap();
420
421 let val = read_response(&mut client).unwrap();
422 assert_eq!(val["return"]["status"], "completed");
423 assert_eq!(val["return"]["total"], 4294967296u64);
424 }
425
426 /// Test read_response with migration failed status.
427 #[test]
428 fn test_read_response_migration_failed() {
429 let (mut client, mut server) = UnixStream::pair().unwrap();
430
431 let response = "{\"return\": {\"status\": \"failed\"}}\n";
432 server.write_all(response.as_bytes()).unwrap();
433 server.shutdown(std::net::Shutdown::Write).unwrap();
434
435 let val = read_response(&mut client).unwrap();
436 assert_eq!(val["return"]["status"], "failed");
437 }
438
439 /// Test read_response with event (no "return" field) — should return empty.
440 #[test]
441 fn test_read_response_event_only() {
442 let (mut client, mut server) = UnixStream::pair().unwrap();
443
444 let response = "{\"event\": \"STOP\", \"data\": {}}\n";
445 server.write_all(response.as_bytes()).unwrap();
446 server.shutdown(std::net::Shutdown::Write).unwrap();
447
448 let val = read_response(&mut client).unwrap();
449 // No "return" field, should get empty object
450 assert!(val.get("return").is_none());
451 assert_eq!(val, serde_json::json!({}));
452 }
453
454 /// Test read_response with multiple lines — returns first with "return".
455 #[test]
456 fn test_read_response_multi_line() {
457 let (mut client, mut server) = UnixStream::pair().unwrap();
458
459 let response = "{\"event\": \"STOP\"}\n{\"return\": {\"status\": \"active\"}}\n";
460 server.write_all(response.as_bytes()).unwrap();
461 server.shutdown(std::net::Shutdown::Write).unwrap();
462
463 let val = read_response(&mut client).unwrap();
464 assert_eq!(val["return"]["status"], "active");
465 }
466
467 /// Test read_response with QMP greeting banner.
468 #[test]
469 fn test_read_response_qmp_greeting() {
470 let (mut client, mut server) = UnixStream::pair().unwrap();
471
472 // QMP greeting doesn't have "return"
473 let greeting =
474 "{\"QMP\": {\"version\": {\"qemu\": {\"micro\": 0, \"minor\": 2, \"major\": 8}}}}\n";
475 server.write_all(greeting.as_bytes()).unwrap();
476 server.shutdown(std::net::Shutdown::Write).unwrap();
477
478 let val = read_response(&mut client).unwrap();
479 assert_eq!(val, serde_json::json!({}));
480 }
481
482 /// Test send_command with query-migrate.
483 #[test]
484 fn test_send_command_query_migrate() {
485 let (mut client, mut server) = UnixStream::pair().unwrap();
486 send_command(&mut client, "query-migrate", None).unwrap();
487
488 let mut buf = [0u8; 1024];
489 let n = server.read(&mut buf).unwrap();
490 let sent: serde_json::Value = serde_json::from_slice(&buf[..n]).unwrap();
491
492 assert_eq!(sent["execute"], "query-migrate");
493 }
494}