Skip to main content

ryra_vm/
machine.rs

1use std::path::{Path, PathBuf};
2use std::process::Stdio;
3use std::sync::Mutex;
4
5use anyhow::{Context, Result};
6use tokio::process::Command;
7
8use crate::image::Image;
9
10/// Global registry of active VM PIDs and work dirs, for signal cleanup.
11static ACTIVE_VMS: Mutex<Vec<ActiveVm>> = Mutex::new(Vec::new());
12
13struct ActiveVm {
14    pid: u32,
15    work_dir: PathBuf,
16}
17
18/// Kill all active VMs and clean up their work dirs.
19/// Called from the Ctrl-C handler to ensure no orphaned processes.
20pub fn cleanup_all_vms() {
21    let vms = {
22        let mut guard = ACTIVE_VMS.lock().unwrap_or_else(|e| e.into_inner());
23        std::mem::take(&mut *guard)
24    };
25    for vm in &vms {
26        // Send SIGKILL to the QEMU process
27        unsafe {
28            libc::kill(vm.pid as i32, libc::SIGKILL);
29        }
30        let _ = std::fs::remove_dir_all(&vm.work_dir);
31    }
32    if !vms.is_empty() {
33        eprintln!("\nCleaned up {} VM(s)", vms.len());
34    }
35}
36
37fn register_vm(pid: u32, work_dir: &Path) {
38    let mut guard = ACTIVE_VMS.lock().unwrap_or_else(|e| e.into_inner());
39    guard.push(ActiveVm {
40        pid,
41        work_dir: work_dir.to_path_buf(),
42    });
43}
44
45fn deregister_vm(pid: u32) {
46    let mut guard = ACTIVE_VMS.lock().unwrap_or_else(|e| e.into_inner());
47    guard.retain(|vm| vm.pid != pid);
48}
49
50/// A running QEMU VM for E2E testing.
51pub struct Machine {
52    pub name: String,
53    pub ssh_host: String,
54    pub ssh_port: u16,
55    pub work_dir: PathBuf,
56    process: tokio::process::Child,
57}
58
59impl Drop for Machine {
60    /// Kill the QEMU process if it's still alive. Needed because tokio's
61    /// `Child::drop` does NOT reap the process by default, so any early
62    /// return from `spawn_*` (e.g. SSH timeout during wait_for_ssh) would
63    /// otherwise orphan a QEMU holding the forwarded SSH port. Async destroy
64    /// paths that want to survive this (keep_alive) use `std::mem::forget`
65    /// to skip Drop entirely.
66    fn drop(&mut self) {
67        if let Some(pid) = self.process.id() {
68            // SIGKILL is sync and cheap. `Child::start_kill` would work too but
69            // we're in Drop so we can't .await anything.
70            unsafe {
71                libc::kill(pid as i32, libc::SIGKILL);
72            }
73            deregister_vm(pid);
74        }
75    }
76}
77
78/// Options that affect how the VM is launched.
79pub struct SpawnOpts {
80    pub use_kvm: bool,
81    pub memory_mb: u32,
82    pub cpus: u32,
83    /// Virtual disk size in GB. The disk is copy-on-write so actual host
84    /// usage is only the delta from the base image.
85    pub disk_gb: u32,
86}
87
88impl SpawnOpts {
89    /// SSH + cloud-init timeout. Without KVM, everything is ~10x slower.
90    pub fn boot_timeout(&self) -> std::time::Duration {
91        if self.use_kvm {
92            std::time::Duration::from_secs(300)
93        } else {
94            std::time::Duration::from_secs(900) // 15 minutes for TCG
95        }
96    }
97}
98
99impl Default for SpawnOpts {
100    fn default() -> Self {
101        Self {
102            use_kvm: true,
103            memory_mb: 2048,
104            cpus: 2,
105            disk_gb: 20,
106        }
107    }
108}
109
110pub fn random_id() -> String {
111    use rand::Rng;
112    let mut rng = rand::rng();
113    format!("{:06x}", rng.random::<u32>() & 0xffffff)
114}
115
116impl Machine {
117    pub async fn spawn(
118        image: &Image,
119        test_id: &str,
120        ssh_port: u16,
121        opts: &SpawnOpts,
122    ) -> Result<Self> {
123        if let Some(ref snapshot) = image.snapshot {
124            return Self::spawn_from_snapshot(image, snapshot, test_id, ssh_port, opts).await;
125        }
126        Self::spawn_cold(image, test_id, ssh_port, opts).await
127    }
128
129    /// Instant boot: restore from a saved QEMU snapshot (<1s to SSH).
130    async fn spawn_from_snapshot(
131        image: &Image,
132        snapshot: &crate::image::SnapshotFiles,
133        test_id: &str,
134        ssh_port: u16,
135        opts: &SpawnOpts,
136    ) -> Result<Self> {
137        let name = format!("ryra-test-{test_id}");
138        let work_dir = vm_work_base_dir()?.join(&name);
139        tokio::fs::create_dir_all(&work_dir)
140            .await
141            .context("failed to create VM work directory")?;
142
143        // Reflink-copy snapshot files for per-VM isolation (instant on btrfs)
144        let disk = work_dir.join("disk.qcow2");
145        let efi_vars = work_dir.join("efivars.qcow2");
146        let seed_iso = work_dir.join("seed.iso");
147
148        run_cmd(
149            "cp",
150            &[
151                "--reflink=auto",
152                &snapshot.disk.to_string_lossy(),
153                &disk.to_string_lossy(),
154            ],
155        )
156        .await
157        .context("failed to copy snapshot disk")?;
158        run_cmd(
159            "cp",
160            &[
161                "--reflink=auto",
162                &snapshot.efivars.to_string_lossy(),
163                &efi_vars.to_string_lossy(),
164            ],
165        )
166        .await
167        .context("failed to copy snapshot efivars")?;
168        run_cmd(
169            "cp",
170            &[
171                "--reflink=auto",
172                &snapshot.seed_iso.to_string_lossy(),
173                &seed_iso.to_string_lossy(),
174            ],
175        )
176        .await
177        .context("failed to copy snapshot seed ISO")?;
178
179        // Copy shared SSH key to work dir (ssh_key_path() expects it there)
180        let key_path = work_dir.join("id_ed25519");
181        tokio::fs::copy(&snapshot.ssh_key, &key_path)
182            .await
183            .context("failed to copy SSH key")?;
184
185        // Build QEMU args — memory must match the snapshot's size exactly
186        let memory = snapshot.memory_mb.to_string();
187        let cpus = opts.cpus.to_string();
188        let efi_code_arg = format!(
189            "if=pflash,format=raw,file={},readonly=on",
190            image.efi_code.display()
191        );
192        let efi_vars_arg = format!("if=pflash,format=qcow2,file={}", efi_vars.display());
193        let disk_arg = format!("if=virtio,file={},format=qcow2", disk.display());
194        let seed_arg = format!(
195            "if=virtio,file={},format=raw,readonly=on",
196            seed_iso.display()
197        );
198        let nic_arg = format!("user,hostfwd=tcp::{ssh_port}-:22");
199        let serial_log = work_dir.join("serial.log");
200        let serial_arg = format!("file:{}", serial_log.display());
201        let shared_store = image_shared_store_dir()?;
202        tokio::fs::create_dir_all(&shared_store).await.ok();
203        let virtfs_arg = format!(
204            "local,path={},mount_tag=images,security_model=none,readonly=on",
205            shared_store.display()
206        );
207
208        let mut args: Vec<&str> = vec![
209            "-machine",
210            "virt",
211            "-cpu",
212            if opts.use_kvm { "host" } else { "max" },
213            "-m",
214            &memory,
215            "-smp",
216            &cpus,
217            "-drive",
218            &efi_code_arg,
219            "-drive",
220            &efi_vars_arg,
221            "-drive",
222            &disk_arg,
223            "-drive",
224            &seed_arg,
225            "-nic",
226            &nic_arg,
227            "-nographic",
228            "-serial",
229            &serial_arg,
230            "-monitor",
231            "none",
232            "-virtfs",
233            &virtfs_arg,
234            "-loadvm",
235            "ready",
236        ];
237
238        if opts.use_kvm {
239            args.extend(crate::accel_args());
240        }
241
242        let process = Command::new("qemu-system-aarch64")
243            .args(&args)
244            .stdout(Stdio::null())
245            .stderr(Stdio::null())
246            .spawn()
247            .context("failed to start QEMU — is qemu-system-aarch64 installed?")?;
248
249        let machine = Machine {
250            name,
251            ssh_host: "127.0.0.1".to_string(),
252            ssh_port,
253            work_dir,
254            process,
255        };
256
257        if let Some(pid) = machine.process.id() {
258            register_vm(pid, &machine.work_dir);
259        }
260
261        // Wait for SSH — snapshot restore is sub-second once pages are warm,
262        // but the very first restore in a session can take >30s on a cold page
263        // cache (hundreds of MB of snapshot state faulted in from disk).
264        machine
265            .wait_for_ssh(std::time::Duration::from_secs(60))
266            .await?;
267
268        // Fix clock skew: the snapshot's system clock is frozen at save time,
269        // which can be hours or days behind wall clock. Services that enforce
270        // fresh time (authelia's NTP startup check, TLS cert validity, OIDC
271        // token expiry, …) fail hard when the guest clock is off, so we
272        // force-set it before any test starts.
273        //
274        // Expand the timestamp on the HOST side and send it as a literal.
275        // The obvious-looking `date -s "$(date -u -R)"` evaluates the inner
276        // `$()` inside the guest, which just reads the frozen clock and
277        // rewrites it to itself — effectively a no-op.
278        let host_epoch = std::time::SystemTime::now()
279            .duration_since(std::time::UNIX_EPOCH)
280            .map(|d| d.as_secs())
281            .unwrap_or(0);
282        if host_epoch > 0
283            && let Err(e) = machine
284                .exec(&format!(
285                    "sudo date -s @{host_epoch} >/dev/null 2>&1 && sudo hwclock --systohc >/dev/null 2>&1 || true"
286                ))
287                .await
288        {
289            eprintln!("  warning: failed to sync clock in snapshot-booted VM: {e:#}");
290        }
291
292        Ok(machine)
293    }
294
295    /// Cold boot: traditional boot with cloud-init (fallback when no snapshot).
296    async fn spawn_cold(
297        image: &Image,
298        test_id: &str,
299        ssh_port: u16,
300        opts: &SpawnOpts,
301    ) -> Result<Self> {
302        let name = format!("ryra-test-{test_id}");
303        let work_dir = vm_work_base_dir()?.join(&name);
304        tokio::fs::create_dir_all(&work_dir)
305            .await
306            .context("failed to create VM work directory")?;
307
308        // Create a copy-on-write disk backed by the base image
309        let disk = work_dir.join("disk.qcow2");
310        let disk_size = format!("{}G", opts.disk_gb);
311        run_cmd(
312            "qemu-img",
313            &[
314                "create",
315                "-f",
316                "qcow2",
317                "-b",
318                &image.path.to_string_lossy(),
319                "-F",
320                "qcow2",
321                &disk.to_string_lossy(),
322                &disk_size,
323            ],
324        )
325        .await
326        .context("qemu-img create failed")?;
327
328        // Copy EFI vars template (writable per-VM)
329        let efi_vars = work_dir.join("efivars.fd");
330        tokio::fs::copy(&image.efi_vars_template, &efi_vars)
331            .await
332            .context("failed to copy EFI vars template")?;
333
334        // Generate SSH key pair
335        let key_path = work_dir.join("id_ed25519");
336        let _ = tokio::fs::remove_file(&key_path).await;
337        run_cmd(
338            "ssh-keygen",
339            &[
340                "-t",
341                "ed25519",
342                "-f",
343                &key_path.to_string_lossy(),
344                "-N",
345                "",
346                "-q",
347            ],
348        )
349        .await
350        .context("ssh-keygen failed")?;
351
352        let pub_key = tokio::fs::read_to_string(format!("{}.pub", key_path.display()))
353            .await
354            .context("failed to read SSH public key")?;
355
356        // Build cloud-init seed ISO
357        let seed_iso = work_dir.join("seed.iso");
358        build_seed_iso(&work_dir, &seed_iso, &name, pub_key.trim()).await?;
359
360        // Build QEMU args
361        let memory = opts.memory_mb.to_string();
362        let cpus = opts.cpus.to_string();
363        let efi_code_arg = format!(
364            "if=pflash,format=raw,file={},readonly=on",
365            image.efi_code.display()
366        );
367        let efi_vars_arg = format!("if=pflash,format=raw,file={}", efi_vars.display());
368        let disk_arg = format!("if=virtio,file={},format=qcow2", disk.display());
369        let seed_arg = format!("if=virtio,file={},format=raw", seed_iso.display());
370        let nic_arg = format!("user,hostfwd=tcp::{ssh_port}-:22");
371        let serial_log = work_dir.join("serial.log");
372        let serial_arg = format!("file:{}", serial_log.display());
373
374        let shared_store = image_shared_store_dir()?;
375        tokio::fs::create_dir_all(&shared_store).await.ok();
376        let virtfs_arg = format!(
377            "local,path={},mount_tag=images,security_model=none,readonly=on",
378            shared_store.display()
379        );
380
381        let mut args: Vec<&str> = vec![
382            "-machine",
383            "virt",
384            "-cpu",
385            if opts.use_kvm { "host" } else { "max" },
386            "-m",
387            &memory,
388            "-smp",
389            &cpus,
390            "-drive",
391            &efi_code_arg,
392            "-drive",
393            &efi_vars_arg,
394            "-drive",
395            &disk_arg,
396            "-drive",
397            &seed_arg,
398            "-nic",
399            &nic_arg,
400            "-nographic",
401            "-serial",
402            &serial_arg,
403            "-monitor",
404            "none",
405            "-virtfs",
406            &virtfs_arg,
407        ];
408
409        if opts.use_kvm {
410            args.extend(crate::accel_args());
411        }
412
413        let process = Command::new("qemu-system-aarch64")
414            .args(&args)
415            .stdout(Stdio::null())
416            .stderr(Stdio::null())
417            .spawn()
418            .context("failed to start QEMU — is qemu-system-aarch64 installed?")?;
419
420        let machine = Machine {
421            name,
422            ssh_host: "127.0.0.1".to_string(),
423            ssh_port,
424            work_dir,
425            process,
426        };
427
428        if let Some(pid) = machine.process.id() {
429            register_vm(pid, &machine.work_dir);
430        }
431
432        let boot_timeout = opts.boot_timeout();
433        machine.wait_for_ssh(boot_timeout).await?;
434
435        machine
436            .exec("cloud-init status --wait")
437            .await
438            .context("cloud-init did not complete")?;
439
440        Ok(machine)
441    }
442
443    /// Build common SSH arguments for connecting to this VM.
444    fn ssh_args(&self) -> Vec<String> {
445        let key = self.ssh_key_path();
446        vec![
447            "-o".into(),
448            "StrictHostKeyChecking=no".into(),
449            "-o".into(),
450            "UserKnownHostsFile=/dev/null".into(),
451            "-o".into(),
452            "LogLevel=ERROR".into(),
453            "-o".into(),
454            "ConnectTimeout=10".into(),
455            "-o".into(),
456            "BatchMode=yes".into(),
457            "-i".into(),
458            key.to_string_lossy().into_owned(),
459            "-p".into(),
460            self.ssh_port.to_string(),
461            format!("ryra@{}", self.ssh_host),
462        ]
463    }
464
465    /// Run a command inside the VM via SSH.
466    pub async fn exec(&self, cmd: &str) -> Result<ExecOutput> {
467        let mut args = self.ssh_args();
468        args.push(cmd.to_string());
469        let output = Command::new("ssh")
470            .args(&args)
471            .output()
472            .await
473            .with_context(|| format!("failed to SSH exec in {}: {cmd}", self.name))?;
474
475        let stdout = String::from_utf8_lossy(&output.stdout).to_string();
476        let stderr = String::from_utf8_lossy(&output.stderr).to_string();
477
478        if !output.status.success() {
479            anyhow::bail!(
480                "command failed in VM {} (exit {}): {cmd}\nstdout: {stdout}\nstderr: {stderr}",
481                self.name,
482                output.status,
483            );
484        }
485
486        Ok(ExecOutput { stdout, stderr })
487    }
488
489    /// Run a command inside the VM via SSH, streaming stdout/stderr to the terminal.
490    /// Returns the exit status (Ok if success, Err if non-zero).
491    pub async fn exec_streaming(&self, cmd: &str, prefix: &str) -> Result<ExecOutput> {
492        use tokio::io::{AsyncBufReadExt, BufReader};
493        use tokio::process::Command as TokioCommand;
494
495        let mut args = self.ssh_args();
496        args.push(cmd.to_string());
497        let mut child = TokioCommand::new("ssh")
498            .args(&args)
499            .stdout(std::process::Stdio::piped())
500            .stderr(std::process::Stdio::piped())
501            .spawn()
502            .with_context(|| format!("failed to SSH exec in {}: {cmd}", self.name))?;
503
504        let stdout_pipe = child.stdout.take();
505        let stderr_pipe = child.stderr.take();
506
507        let prefix_out = prefix.to_string();
508        let prefix_err = prefix.to_string();
509
510        let stdout_handle = tokio::spawn(async move {
511            let mut lines = String::new();
512            if let Some(pipe) = stdout_pipe {
513                let mut reader = BufReader::new(pipe).lines();
514                while let Ok(Some(line)) = reader.next_line().await {
515                    if prefix_out.is_empty() {
516                        println!("    {line}");
517                    } else {
518                        println!("[{prefix_out}]     {line}");
519                    }
520                    lines.push_str(&line);
521                    lines.push('\n');
522                }
523            }
524            lines
525        });
526
527        let stderr_handle = tokio::spawn(async move {
528            let mut lines = String::new();
529            if let Some(pipe) = stderr_pipe {
530                let mut reader = BufReader::new(pipe).lines();
531                while let Ok(Some(line)) = reader.next_line().await {
532                    if prefix_err.is_empty() {
533                        eprintln!("    {line}");
534                    } else {
535                        eprintln!("[{prefix_err}]     {line}");
536                    }
537                    lines.push_str(&line);
538                    lines.push('\n');
539                }
540            }
541            lines
542        });
543
544        let status = child.wait().await?;
545        let stdout_buf = stdout_handle.await.unwrap_or_default();
546        let stderr_buf = stderr_handle.await.unwrap_or_default();
547
548        if !status.success() {
549            anyhow::bail!(
550                "command failed in VM {} (exit {}): {cmd}\nstdout: {stdout_buf}\nstderr: {stderr_buf}",
551                self.name,
552                status,
553            );
554        }
555
556        Ok(ExecOutput {
557            stdout: stdout_buf,
558            stderr: stderr_buf,
559        })
560    }
561
562    /// Wait for the qemu process to exit on its own (e.g., after a clean
563    /// `sudo poweroff`). Returns once the process is gone or `timeout` elapses.
564    /// Used before snapshotting so the disk is fully released.
565    pub async fn wait_for_exit(&mut self, timeout: std::time::Duration) {
566        let start = std::time::Instant::now();
567        while start.elapsed() < timeout {
568            if matches!(self.process.try_wait(), Ok(Some(_))) {
569                return;
570            }
571            tokio::time::sleep(std::time::Duration::from_millis(250)).await;
572        }
573    }
574
575    /// Shut down the VM and clean up files.
576    pub async fn destroy(mut self) -> Result<()> {
577        if let Some(pid) = self.process.id() {
578            deregister_vm(pid);
579        }
580        let _ = self.exec("sudo poweroff").await;
581        tokio::time::sleep(std::time::Duration::from_secs(3)).await;
582        let _ = self.process.kill().await;
583        let _ = self.process.wait().await;
584        let _ = tokio::fs::remove_dir_all(&self.work_dir).await;
585        Ok(())
586    }
587
588    /// Print SSH connection info for debugging, then detach.
589    /// VM keeps running until the user kills it or the process exits.
590    pub fn keep_alive(self) {
591        if let Some(pid) = self.process.id() {
592            deregister_vm(pid);
593        }
594        println!(
595            "  VM still running. Connect with:\n    \
596             ssh -o StrictHostKeyChecking=no -o UserKnownHostsFile=/dev/null \
597             -i {}/id_ed25519 -p {} ryra@{}\n  \
598             Serial log: {}/serial.log\n  \
599             Kill with: kill {}",
600            self.work_dir.display(),
601            self.ssh_port,
602            self.ssh_host,
603            self.work_dir.display(),
604            self.process
605                .id()
606                .map(|id| id.to_string())
607                .unwrap_or_else(|| "?".to_string()),
608        );
609        // Intentionally leak — VM process stays alive, cleaned up when parent exits
610        std::mem::forget(self);
611    }
612
613    fn ssh_key_path(&self) -> PathBuf {
614        self.work_dir.join("id_ed25519")
615    }
616
617    async fn wait_for_ssh(&self, timeout: std::time::Duration) -> Result<()> {
618        let start = std::time::Instant::now();
619        let mut ssh_args = self.ssh_args();
620        // Override ConnectTimeout to 3s for probing
621        if let Some(pos) = ssh_args.iter().position(|a| a == "ConnectTimeout=10") {
622            ssh_args[pos] = "ConnectTimeout=3".into();
623        }
624        ssh_args.push("true".into());
625        let mut last_log = std::time::Instant::now();
626
627        loop {
628            // Log progress every 30 seconds
629            if last_log.elapsed().as_secs() >= 30 {
630                println!(
631                    "  [{}] still waiting for SSH... ({:.0}s elapsed)",
632                    self.name,
633                    start.elapsed().as_secs_f64()
634                );
635                last_log = std::time::Instant::now();
636            }
637
638            // Try a real SSH command (not just TCP connect)
639            let result = Command::new("ssh")
640                .args(&ssh_args)
641                .stdout(Stdio::null())
642                .stderr(Stdio::null())
643                .status()
644                .await;
645
646            if let Ok(status) = result
647                && status.success()
648            {
649                return Ok(());
650            }
651
652            if start.elapsed() > timeout {
653                anyhow::bail!(
654                    "timed out waiting for SSH on {}:{} after {}s\n  \
655                     Check serial log: {}/serial.log",
656                    self.ssh_host,
657                    self.ssh_port,
658                    timeout.as_secs(),
659                    self.work_dir.display(),
660                );
661            }
662
663            tokio::time::sleep(std::time::Duration::from_secs(2)).await;
664        }
665    }
666}
667
668#[allow(dead_code)]
669pub struct ExecOutput {
670    pub stdout: String,
671    pub stderr: String,
672}
673
674impl ExecOutput {
675    pub fn stdout_trimmed(&self) -> &str {
676        self.stdout.trim()
677    }
678}
679
680/// Run a command and bail if it fails.
681async fn run_cmd(program: &str, args: &[&str]) -> Result<()> {
682    let status = Command::new(program)
683        .args(args)
684        .stdout(Stdio::null())
685        .stderr(Stdio::null())
686        .status()
687        .await
688        .with_context(|| format!("failed to run {program}"))?;
689    if !status.success() {
690        anyhow::bail!("{program} failed with exit status {status}");
691    }
692    Ok(())
693}
694
695/// Read the host's /etc/subuid entry for the invoking user so the VM can
696/// mirror it. Matching subuid ranges ensures image store files (owned by host
697/// UIDs in the subuid range) map to the correct container UIDs inside the VM.
698fn host_subid_mapping() -> Result<(u32, u32)> {
699    let user = std::env::var("USER").context("USER env var not set")?;
700    let subuid = std::fs::read_to_string("/etc/subuid").context("failed to read /etc/subuid")?;
701    for line in subuid.lines() {
702        let parts: Vec<&str> = line.split(':').collect();
703        if parts.len() == 3 && parts[0] == user {
704            let start: u32 = parts[1]
705                .parse()
706                .with_context(|| format!("invalid subuid start for {user}: {}", parts[1]))?;
707            let size: u32 = parts[2]
708                .parse()
709                .with_context(|| format!("invalid subuid size for {user}: {}", parts[2]))?;
710            return Ok((start, size));
711        }
712    }
713    anyhow::bail!("no subuid entry found for user {user} in /etc/subuid")
714}
715
716/// Build a minimal cloud-init seed ISO — just SSH key, no package installs.
717/// Used for test VMs backed by a prepared image that already has packages.
718pub(crate) async fn build_seed_iso(
719    work_dir: &Path,
720    output: &Path,
721    hostname: &str,
722    pub_key: &str,
723) -> Result<()> {
724    // VMs run podman rootless as UID 1000 (matching host) and mirror the
725    // host's subuid/subgid range so rootless user namespaces map image store
726    // files correctly. The image store is shared from host via 9p, and files
727    // inside are owned by host UIDs in the subuid range (e.g., postgres UID
728    // 70 in the image → host UID subuid_start+69). Identical VM subuid
729    // mapping makes these appear as the same container UIDs inside the VM.
730    let (subid_start, subid_size) = host_subid_mapping()?;
731    let user_data = format!(
732        r#"#cloud-config
733ssh_pwauth: false
734
735users:
736  - name: ryra
737    uid: 1000
738    shell: /bin/bash
739    lock_passwd: true
740    sudo: ALL=(ALL) NOPASSWD:ALL
741    ssh_authorized_keys:
742      - {pub_key}
743
744write_files:
745  - path: /etc/subuid
746    content: "ryra:{subid_start}:{subid_size}\n"
747    permissions: '0644'
748  - path: /etc/subgid
749    content: "ryra:{subid_start}:{subid_size}\n"
750    permissions: '0644'
751
752runcmd:
753  - loginctl enable-linger ryra
754"#
755    );
756    write_seed_iso(work_dir, output, hostname, &user_data).await
757}
758
759/// Build a full cloud-init seed ISO — installs all packages.
760/// Used once during image preparation.
761pub async fn build_seed_iso_full(
762    work_dir: &Path,
763    output: &Path,
764    hostname: &str,
765    pub_key: &str,
766    packages: &[&str],
767) -> Result<()> {
768    let package_list = packages
769        .iter()
770        .map(|p| format!("  - {p}"))
771        .collect::<Vec<_>>()
772        .join("\n");
773    let (subid_start, subid_size) = host_subid_mapping()?;
774    let user_data = format!(
775        r#"#cloud-config
776ssh_pwauth: false
777
778users:
779  - name: ryra
780    uid: 1000
781    shell: /bin/bash
782    lock_passwd: true
783    sudo: ALL=(ALL) NOPASSWD:ALL
784    ssh_authorized_keys:
785      - {pub_key}
786
787packages:
788{package_list}
789
790write_files:
791  - path: /etc/subuid
792    content: "ryra:{subid_start}:{subid_size}\n"
793    permissions: '0644'
794  - path: /etc/subgid
795    content: "ryra:{subid_start}:{subid_size}\n"
796    permissions: '0644'
797
798runcmd:
799  - loginctl enable-linger ryra
800"#
801    );
802    write_seed_iso(work_dir, output, hostname, &user_data).await
803}
804
805/// Write a cloud-init seed ISO from given user-data content.
806async fn write_seed_iso(
807    work_dir: &Path,
808    output: &Path,
809    hostname: &str,
810    user_data: &str,
811) -> Result<()> {
812    let seed_dir = work_dir.join("seed");
813    tokio::fs::create_dir_all(&seed_dir)
814        .await
815        .context("failed to create seed dir")?;
816
817    let meta_data = format!("instance-id: {hostname}\nlocal-hostname: {hostname}\n");
818    tokio::fs::write(seed_dir.join("meta-data"), &meta_data)
819        .await
820        .context("failed to write meta-data")?;
821
822    tokio::fs::write(seed_dir.join("user-data"), user_data)
823        .await
824        .context("failed to write user-data")?;
825
826    // Build ISO with genisoimage or mkisofs
827    let iso_tools = ["genisoimage", "mkisofs"];
828    let mut created = false;
829    for tool in &iso_tools {
830        let result = Command::new(tool)
831            .args([
832                "-output",
833                &output.to_string_lossy(),
834                "-volid",
835                "cidata",
836                "-joliet",
837                "-rock",
838                &seed_dir.to_string_lossy(),
839            ])
840            .stdout(Stdio::null())
841            .stderr(Stdio::null())
842            .status()
843            .await;
844
845        if let Ok(status) = result
846            && status.success()
847        {
848            created = true;
849            break;
850        }
851    }
852
853    if !created {
854        anyhow::bail!(
855            "failed to create seed ISO — install genisoimage or mkisofs:\n  \
856             sudo apt install genisoimage    # Debian/Ubuntu\n  \
857             sudo dnf install genisoimage    # Fedora\n  \
858             sudo pacman -S cdrtools         # Arch"
859        );
860    }
861
862    // Clean up seed dir
863    let _ = tokio::fs::remove_dir_all(&seed_dir).await;
864
865    Ok(())
866}
867
868/// Directory for saved container image tars shared into VMs via 9p.
869///
870/// Tars are an intermediate step: the host's rootless podman store uses UID
871/// shifting that other podman instances can't read. We save to tar first,
872/// then load into the shared store (see [`image_shared_store_dir`]).
873fn image_tar_cache_dir() -> Result<PathBuf> {
874    Ok(cache_base_dir()?.join("image-tars"))
875}
876
877/// Dedicated overlay store populated from tars, shared into VMs via 9p.
878///
879/// VMs configure `additionalimagestores` pointing at this mount, so all
880/// pre-cached images are available instantly — no per-image `podman load`.
881///
882/// This store is created with `podman --root <path> --storage-driver overlay`
883/// which uses unprivileged overlayfs (kernel ≥5.11). The resulting files are
884/// owned by the current user and readable by root podman in the VM.
885pub fn image_shared_store_dir() -> Result<PathBuf> {
886    Ok(cache_base_dir()?.join("image-store"))
887}
888
889/// Base directory for VM work dirs (disk images, keys, logs).
890/// Uses ~/.cache/ryra-vm/vms/ instead of /tmp so we don't fill
891/// up a RAM-backed tmpfs with multi-GB qcow2 COW disks.
892fn vm_work_base_dir() -> Result<PathBuf> {
893    Ok(cache_base_dir()?.join("vms"))
894}
895
896/// Shared cache root for all ryra-vm artifacts.
897fn cache_base_dir() -> Result<PathBuf> {
898    let base = dirs::cache_dir().context("could not determine cache directory (is $HOME set?)")?;
899    Ok(base.join("ryra-vm"))
900}
901
902/// Ensure a container image is cached in the shared store for VM sharing.
903///
904/// Flow: pull → save to tar (intermediate) → load into shared overlay store.
905/// The shared store at ~/.cache/ryra-vm/image-store/ is mounted into VMs
906/// via 9p and used as an `additionalimagestores` entry, making images
907/// available instantly without per-image `podman load`.
908///
909/// Tars are kept as an intermediate cache so that re-populating the shared
910/// store (e.g., after clearing it) doesn't require re-pulling from registries.
911pub async fn ensure_image_cached(image: &str) -> Result<()> {
912    let store_dir = image_shared_store_dir()?;
913    tokio::fs::create_dir_all(&store_dir).await.ok();
914
915    // Check if the image is already in the shared store
916    if image_exists_in_store(&store_dir, image).await {
917        return Ok(());
918    }
919
920    // Ensure tar exists (pull + save if needed)
921    let tar_dir = image_tar_cache_dir()?;
922    tokio::fs::create_dir_all(&tar_dir).await.ok();
923    let tar_name = sanitize_image_name(image);
924    let tar_path = tar_dir.join(&tar_name);
925
926    if !tar_path.exists() {
927        // Podman sometimes doesn't recognize the docker.io/ prefix for local
928        // lookups (image exists, save) even though pull writes it that way.
929        // Try the full name first, then the short name without docker.io/.
930        let short_name = strip_docker_io(image);
931        let local_name = if Command::new("podman")
932            .args(["image", "exists", image])
933            .stdout(Stdio::null())
934            .stderr(Stdio::null())
935            .status()
936            .await
937            .map(|s| s.success())
938            .unwrap_or(false)
939        {
940            image
941        } else if short_name != image
942            && Command::new("podman")
943                .args(["image", "exists", short_name])
944                .stdout(Stdio::null())
945                .stderr(Stdio::null())
946                .status()
947                .await
948                .map(|s| s.success())
949                .unwrap_or(false)
950        {
951            short_name
952        } else {
953            // Not in local store — pull it
954            println!("    pulling {image}...");
955            let status = Command::new("podman")
956                .args(["pull", image])
957                .stdout(Stdio::null())
958                .stderr(Stdio::null())
959                .status()
960                .await
961                .context("failed to run podman pull")?;
962            if !status.success() {
963                anyhow::bail!("podman pull {image} failed");
964            }
965            image
966        };
967
968        println!("    saving {image}...");
969        let status = Command::new("podman")
970            .args(["save", "-o", &tar_path.display().to_string(), local_name])
971            .stdout(Stdio::null())
972            .stderr(Stdio::null())
973            .status()
974            .await
975            .context("failed to run podman save")?;
976        if !status.success() {
977            let _ = tokio::fs::remove_file(&tar_path).await;
978            anyhow::bail!("podman save {image} failed");
979        }
980    }
981
982    // Load tar into the shared overlay store
983    println!("    loading {image} into shared store...");
984    let status = Command::new("podman")
985        .args([
986            "--root",
987            &store_dir.display().to_string(),
988            "--storage-driver",
989            "overlay",
990            "load",
991            "-i",
992            &tar_path.display().to_string(),
993        ])
994        .stdout(Stdio::null())
995        .stderr(Stdio::null())
996        .status()
997        .await
998        .context("failed to load image into shared store")?;
999    if !status.success() {
1000        anyhow::bail!("podman load into shared store failed for {image}");
1001    }
1002
1003    // Delete the tar — the shared store is the canonical cache now.
1004    // If the shared store is cleared, tars will be recreated on next run.
1005    let _ = tokio::fs::remove_file(&tar_path).await;
1006
1007    Ok(())
1008}
1009
1010/// Check if an image exists in the shared store.
1011///
1012/// Quadlets use fully qualified names (e.g. "docker.io/library/caddy:2-alpine"),
1013/// but older caches may hold short-name entries — try both forms so existing
1014/// shared stores keep hitting.
1015async fn image_exists_in_store(store_dir: &Path, image: &str) -> bool {
1016    let short = strip_docker_io(image);
1017    let expanded_library = format!("docker.io/library/{short}");
1018    let expanded_org = format!("docker.io/{short}");
1019    for name in [image, short, &expanded_library, &expanded_org] {
1020        let ok = Command::new("podman")
1021            .args([
1022                "--root",
1023                &store_dir.display().to_string(),
1024                "--storage-driver",
1025                "overlay",
1026                "image",
1027                "exists",
1028                name,
1029            ])
1030            .stdout(Stdio::null())
1031            .stderr(Stdio::null())
1032            .status()
1033            .await
1034            .map(|s| s.success())
1035            .unwrap_or(false);
1036        if ok {
1037            return true;
1038        }
1039    }
1040    false
1041}
1042
1043/// Strip the `docker.io/` or `docker.io/library/` prefix from an image name.
1044/// Podman sometimes doesn't recognize the prefix for local operations even
1045/// though `podman pull` stores images with it.
1046fn strip_docker_io(image: &str) -> &str {
1047    image
1048        .strip_prefix("docker.io/library/")
1049        .or_else(|| image.strip_prefix("docker.io/"))
1050        .unwrap_or(image)
1051}
1052
1053fn sanitize_image_name(image: &str) -> String {
1054    image.replace(['/', ':'], "_") + ".tar"
1055}
1056
1057/// Make host container images available in the VM via shared store.
1058///
1059/// On snapshot-restored VMs, the config is already baked in. On cold boot,
1060/// this configures everything from scratch. Idempotent — safe to call either way.
1061pub async fn load_images_into_vm(machine: &Machine, _images: &[String]) -> Result<()> {
1062    // Mount the 9p store (already mounted from snapshot, or needs mounting on cold boot).
1063    // Uses sudo because mount is a privileged operation.
1064    machine
1065        .exec("sudo mkdir -p /mnt/images && (mountpoint -q /mnt/images 2>/dev/null || sudo mount -t 9p -o trans=virtio,version=9p2000.L,ro images /mnt/images)")
1066        .await
1067        .context("failed to mount 9p image store in VM")?;
1068
1069    // Configure rootless podman storage/registries at the user level
1070    // (~/.config/containers/). These are used by the ryra user's rootless
1071    // podman and don't require sudo. The VM's /etc/subuid is set to match
1072    // the host's at cloud-init time, so default rootless UID mapping aligns
1073    // with the shared image store — no userns=auto needed.
1074    machine
1075        .exec(
1076            "mkdir -p ~/.config/containers && \
1077             (grep -q '/mnt/images' ~/.config/containers/storage.conf 2>/dev/null || \
1078              printf '[storage]\\ndriver = \"overlay\"\\n[storage.options]\\nadditionalimagestores = [\"/mnt/images\"]\\n' > ~/.config/containers/storage.conf) && \
1079             (grep -q 'docker.io' ~/.config/containers/registries.conf 2>/dev/null || \
1080              printf 'unqualified-search-registries = [\"docker.io\"]\\n' > ~/.config/containers/registries.conf)",
1081        )
1082        .await
1083        .context("failed to configure podman config in VM")?;
1084
1085    Ok(())
1086}
1087
1088/// Build common SCP arguments for copying files to a VM.
1089fn scp_base_args(machine: &Machine) -> Vec<String> {
1090    let key = machine.ssh_key_path();
1091    vec![
1092        "-o".into(),
1093        "StrictHostKeyChecking=no".into(),
1094        "-o".into(),
1095        "UserKnownHostsFile=/dev/null".into(),
1096        "-o".into(),
1097        "LogLevel=ERROR".into(),
1098        "-i".into(),
1099        key.to_string_lossy().into_owned(),
1100        "-P".into(),
1101        machine.ssh_port.to_string(),
1102    ]
1103}
1104
1105/// SCP a local file into the VM at the given remote path.
1106async fn scp_to_vm(machine: &Machine, local_path: &Path, remote_path: &str) -> Result<()> {
1107    let dest = format!("ryra@{}:{remote_path}", machine.ssh_host);
1108    let mut args = scp_base_args(machine);
1109    args.push(local_path.to_string_lossy().into_owned());
1110    args.push(dest);
1111
1112    let status = Command::new("scp")
1113        .args(&args)
1114        .status()
1115        .await
1116        .with_context(|| format!("failed to SCP {} to VM", local_path.display()))?;
1117    if !status.success() {
1118        anyhow::bail!("SCP of {} failed", local_path.display());
1119    }
1120    Ok(())
1121}
1122
1123/// SCP a local directory recursively into the VM at the given remote path.
1124async fn scp_dir_to_vm(machine: &Machine, local_path: &Path, remote_path: &str) -> Result<()> {
1125    let dest = format!("ryra@{}:{remote_path}", machine.ssh_host);
1126    let mut args = scp_base_args(machine);
1127    args.push("-r".into());
1128    args.push(local_path.to_string_lossy().into_owned());
1129    args.push(dest);
1130
1131    let status = Command::new("scp")
1132        .args(&args)
1133        .status()
1134        .await
1135        .with_context(|| format!("failed to SCP {} to VM", local_path.display()))?;
1136    if !status.success() {
1137        anyhow::bail!("SCP of {} failed", local_path.display());
1138    }
1139    Ok(())
1140}
1141
1142/// SCP a remote directory recursively from the VM to the given local path.
1143/// The local parent directory must exist; the remote directory contents are
1144/// written into `local_path`.
1145pub async fn scp_dir_from_vm(
1146    machine: &Machine,
1147    remote_path: &str,
1148    local_path: &Path,
1149) -> Result<()> {
1150    let source = format!("ryra@{}:{remote_path}", machine.ssh_host);
1151    let mut args = scp_base_args(machine);
1152    args.push("-r".into());
1153    args.push(source);
1154    args.push(local_path.to_string_lossy().into_owned());
1155
1156    let status = Command::new("scp")
1157        .args(&args)
1158        .status()
1159        .await
1160        .with_context(|| format!("failed to SCP {remote_path} from VM"))?;
1161    if !status.success() {
1162        anyhow::bail!("SCP of {remote_path} from VM failed");
1163    }
1164    Ok(())
1165}
1166
1167/// Copy the ryra binary into a running VM via SCP.
1168pub async fn copy_ryra_to_vm(machine: &Machine, ryra_bin: &Path) -> Result<()> {
1169    // SCP to user's home (writable), then sudo-move to system PATH.
1170    scp_to_vm(machine, ryra_bin, "/tmp/ryra").await?;
1171    machine
1172        .exec("sudo mv /tmp/ryra /usr/local/bin/ryra && sudo chmod +x /usr/local/bin/ryra")
1173        .await?;
1174    Ok(())
1175}
1176
1177/// Copy test fixtures into a running VM via SCP.
1178///
1179/// The fixtures_dir contains service directories (e.g. whoami/, postgres/).
1180/// These get copied into /opt/ryra-test-registry/ so ryra can find them
1181/// at /opt/ryra-test-registry/whoami/service.toml etc.
1182pub async fn copy_fixtures_to_vm(machine: &Machine, fixtures_dir: &Path) -> Result<()> {
1183    if !fixtures_dir.exists() {
1184        return Ok(());
1185    }
1186
1187    // Create destination dir (needs sudo — /opt is root-owned, chown to ryra user)
1188    machine
1189        .exec(
1190            "sudo mkdir -p /opt/ryra-test-registry && sudo chown ryra:ryra /opt/ryra-test-registry",
1191        )
1192        .await?;
1193
1194    // SCP each service dir individually to avoid nesting issues
1195    // (scp -r dir/ remote:dest/ creates dest/dir/, not dest/contents/)
1196    let mut entries = tokio::fs::read_dir(fixtures_dir)
1197        .await
1198        .context("failed to read fixtures directory")?;
1199
1200    while let Some(entry) = entries.next_entry().await? {
1201        let path = entry.path();
1202        scp_dir_to_vm(machine, &path, "/opt/ryra-test-registry/").await?;
1203    }
1204
1205    Ok(())
1206}
1207
1208/// Copy a local project directory (quadlet files + test.toml) into a running VM.
1209///
1210/// Files are placed at /opt/ryra-test-project/ so the test runner can copy them
1211/// into the systemd quadlet directory.
1212pub async fn copy_project_to_vm(machine: &Machine, project_dir: &Path) -> Result<()> {
1213    if !project_dir.exists() {
1214        anyhow::bail!("project directory not found: {}", project_dir.display());
1215    }
1216
1217    machine
1218        .exec("sudo mkdir -p /opt/ryra-test-project && sudo chown ryra:ryra /opt/ryra-test-project")
1219        .await?;
1220
1221    // Copy individual files (not directories) — quadlet files and test.toml
1222    let quadlet_extensions = ["container", "volume", "network", "pod", "kube", "toml"];
1223    let mut entries = tokio::fs::read_dir(project_dir)
1224        .await
1225        .with_context(|| format!("failed to read project directory {}", project_dir.display()))?;
1226
1227    while let Some(entry) = entries.next_entry().await? {
1228        let path = entry.path();
1229        if !path.is_file() {
1230            continue;
1231        }
1232        if let Some(ext) = path.extension().and_then(|e| e.to_str())
1233            && quadlet_extensions.contains(&ext)
1234        {
1235            scp_to_vm(machine, &path, "/opt/ryra-test-project/").await?;
1236        }
1237    }
1238
1239    Ok(())
1240}