Skip to main content

ryra_test/
lib.rs

1pub mod executor;
2pub mod registry;
3pub mod reports;
4pub mod runner;
5pub mod scenario;
6pub mod test_toml;
7
8use std::collections::BTreeSet;
9use std::path::{Path, PathBuf};
10use std::time::Duration;
11
12use anyhow::{Context, Result};
13use clap::Parser;
14use tokio::sync::Semaphore;
15
16use ryra_vm::image::Distro;
17use ryra_vm::machine::{self, Machine, SpawnOpts};
18use ryra_vm::{image, ports};
19use scenario::{Outcome, ScenarioResult};
20
21/// Install a Ctrl-C handler that kills all active VMs and exits.
22fn install_signal_handler() {
23    // We use the raw libc handler (not tokio::signal) so it works even if
24    // the tokio runtime is blocked or mid-shutdown.
25    unsafe {
26        libc::signal(
27            libc::SIGINT,
28            signal_handler as *const () as libc::sighandler_t,
29        );
30    }
31}
32
33extern "C" fn signal_handler(_sig: libc::c_int) {
34    // Write to stderr manually (signal-safe). Stay mode-agnostic here —
35    // cleanup_all_vms reports the VM count only when there's actually one.
36    let msg = b"\nInterrupted\n";
37    unsafe {
38        libc::write(2, msg.as_ptr() as *const libc::c_void, msg.len());
39    }
40    machine::cleanup_all_vms();
41    std::process::exit(130); // 128 + SIGINT
42}
43
44/// Render `--list` output. Two sections:
45///  1. **Service tests** — grouped under the owning service name
46///     (derived from `registry/<svc>/test.toml`).
47///  2. **Service-agnostic tests** — flat list from `registry/tests/*.toml`.
48///
49/// Each line shows the test name, step count, `[browser]` flag, and
50/// distinct step kinds so `playwright`/`shell`/`http` tell you what
51/// the test does at a glance.
52///
53/// When `verbose` is set, each test also gets a breakdown of every step
54/// (commands, URLs, polls, heredoc bodies) so the caller can see exactly
55/// what the test runs without opening the `.toml`.
56fn render_list(discovered: &[registry::DiscoveredTest], registry_path: &Path, verbose: bool) {
57    if discovered.is_empty() {
58        println!("No tests discovered.");
59        return;
60    }
61
62    let tests_dir = registry_path.join("tests");
63    let is_cross_cutting = |p: &Path| p.starts_with(&tests_dir);
64
65    // Group service tests by owning directory name; keep cross-cutting
66    // tests flat since each file already contains a single test.
67    let mut service_groups: Vec<(String, Vec<&registry::DiscoveredTest>)> = Vec::new();
68    let mut cross_cutting: Vec<&registry::DiscoveredTest> = Vec::new();
69    for test in discovered {
70        let src = test.source();
71        if is_cross_cutting(src) {
72            cross_cutting.push(test);
73            continue;
74        }
75        let svc = src
76            .parent()
77            .and_then(|p| p.file_name())
78            .and_then(|n| n.to_str())
79            .unwrap_or("<unknown>")
80            .to_string();
81        if let Some((_, bucket)) = service_groups.iter_mut().find(|(s, _)| s == &svc) {
82            bucket.push(test);
83        } else {
84            service_groups.push((svc, vec![test]));
85        }
86    }
87    service_groups.sort_by(|a, b| a.0.cmp(&b.0));
88    cross_cutting.sort_by(|a, b| a.name().cmp(b.name()));
89
90    let total_tests: usize = discovered.len();
91    let file_count = service_groups.len() + cross_cutting.len();
92    println!("{total_tests} tests across {file_count} files");
93
94    let line = |t: &registry::DiscoveredTest, indent: &str| {
95        let kinds = t.step_kinds().join(" → ");
96        let browser = if t.needs_browser() { " [browser]" } else { "" };
97        let step_count = t.test_count();
98        println!(
99            "{indent}{:<34} {} step{}{browser}  · {kinds}",
100            t.name(),
101            step_count,
102            if step_count == 1 { "" } else { "s" },
103        );
104        if !verbose {
105            return;
106        }
107        // Verbose: print each step's details. Use a deeper indent so the
108        // hierarchy (group → test → step lines) stays readable.
109        let step_indent = format!("{indent}    ");
110        if let registry::DiscoveredTest::Lifecycle { steps, .. } = t {
111            for (i, step) in steps.iter().enumerate() {
112                let described = step.describe();
113                if let Some((head, rest)) = described.split_first() {
114                    println!("{step_indent}{:>2}. {head}", i + 1);
115                    for l in rest {
116                        println!("{step_indent}    {l}");
117                    }
118                }
119            }
120        } else if let registry::DiscoveredTest::Simple { tests, .. } = t {
121            for (i, entry) in tests.iter().enumerate() {
122                println!(
123                    "{step_indent}{:>2}. shell '{}'  (timeout={}s)",
124                    i + 1,
125                    entry.name,
126                    entry.timeout_secs
127                );
128                for l in entry.run.trim().lines() {
129                    println!("{step_indent}    | {l}");
130                }
131            }
132        }
133    };
134
135    if !service_groups.is_empty() {
136        println!("─── Service tests  (registry/<service>/test.toml) ───");
137        for (svc, tests) in &service_groups {
138            println!("{svc}");
139            for t in tests {
140                line(t, "  ");
141            }
142        }
143    }
144
145    if !cross_cutting.is_empty() {
146        println!("─── Service-agnostic tests  (registry/tests/*.toml) ───");
147        for t in &cross_cutting {
148            line(t, "");
149        }
150    }
151}
152
153#[derive(Parser, Debug)]
154#[command(
155    name = "ryra-e2e",
156    about = "E2E test runner for ryra — spins up QEMU VMs for integration testing"
157)]
158pub struct Args {
159    /// Max concurrent VMs
160    #[arg(long, default_value_t = 1)]
161    pub parallel: usize,
162
163    /// Base image distro
164    #[arg(long, default_value_t = Distro::Debian13)]
165    pub distro: Distro,
166
167    /// Re-download the base cloud image
168    #[arg(long)]
169    pub redownload: bool,
170
171    /// Path to ryra binary
172    #[arg(long)]
173    pub ryra_bin: Option<PathBuf>,
174
175    /// Don't destroy VMs for failed tests (for debugging via SSH)
176    #[arg(long)]
177    pub keep_failed: bool,
178
179    /// Keep VM alive after tests complete (or boot without running tests).
180    /// Prints SSH connection command for interactive use.
181    #[arg(long)]
182    pub keep_alive: bool,
183
184    /// Disable KVM acceleration (use software emulation — slower)
185    #[arg(long)]
186    pub no_kvm: bool,
187
188    /// Run tests directly on the host without a VM
189    #[arg(long)]
190    pub no_vm: bool,
191
192    /// Skip setup steps (add/wait/remove/reset) and only run shell/playwright
193    /// steps. Use to re-run tests quickly when services are already installed.
194    #[arg(long)]
195    pub retest: bool,
196
197    /// VM memory in MB (overrides auto-detection from service requirements)
198    #[arg(long)]
199    pub memory: Option<u32>,
200
201    /// VM CPU count
202    #[arg(long, default_value_t = 2)]
203    pub cpus: u32,
204
205    /// Show serial log output on failure
206    #[arg(long, short)]
207    pub verbose: bool,
208
209    /// Path to registry directory (auto-detected if omitted)
210    #[arg(long)]
211    pub registry: Option<PathBuf>,
212
213    /// Path to a local project directory with test.toml (+ optional quadlet files)
214    #[arg(long)]
215    pub project: Option<PathBuf>,
216
217    /// List available tests
218    #[arg(long)]
219    pub list: bool,
220
221    /// Test names to run (runs all if empty, supports substring match)
222    pub tests: Vec<String>,
223}
224
225fn find_ryra_binary() -> Result<PathBuf> {
226    // The currently running binary is the one being tested — `ryra test` is a
227    // subcommand of `ryra` itself, so whichever binary the user launched is by
228    // definition the one we want to copy into VMs. Using current_exe avoids the
229    // old footgun where we'd silently prefer target/release/ryra even when the
230    // user had just rebuilt debug.
231    let exe = std::env::current_exe()
232        .context("failed to resolve current executable path for ryra binary")?;
233    std::fs::canonicalize(&exe).context("failed to canonicalize current executable path")
234}
235
236/// Walk `crates/` looking for any `.rs` or `Cargo.toml` newer than `binary`.
237/// Returns the newest offending source file, if any. Cheap (~few ms for <1000
238/// files) because we only stat metadata, not read contents.
239fn newest_source_newer_than(binary: &Path) -> Result<Option<(PathBuf, std::time::SystemTime)>> {
240    let bin_mtime = std::fs::metadata(binary)
241        .with_context(|| format!("stat binary {}", binary.display()))?
242        .modified()
243        .context("binary modified-time")?;
244    let workspace_root = PathBuf::from(env!("CARGO_MANIFEST_DIR")).join("../..");
245    let crates_dir = match std::fs::canonicalize(workspace_root.join("crates")) {
246        Ok(p) => p,
247        // Running outside the workspace (e.g. an installed binary) — no check.
248        Err(_) => return Ok(None),
249    };
250
251    fn is_source(path: &Path) -> bool {
252        if path.extension().and_then(|s| s.to_str()) == Some("rs") {
253            return true;
254        }
255        matches!(
256            path.file_name().and_then(|n| n.to_str()),
257            Some("Cargo.toml")
258        )
259    }
260
261    fn walk(
262        dir: &Path,
263        bin_mtime: std::time::SystemTime,
264        newest: &mut Option<(PathBuf, std::time::SystemTime)>,
265    ) -> Result<()> {
266        for entry in
267            std::fs::read_dir(dir).with_context(|| format!("read_dir {}", dir.display()))?
268        {
269            let entry = entry?;
270            let path = entry.path();
271            let ft = entry.file_type()?;
272            if ft.is_dir() {
273                // Skip build output dirs — they contain generated files we don't care about.
274                if matches!(
275                    path.file_name().and_then(|n| n.to_str()),
276                    Some("target") | Some(".git") | Some("node_modules")
277                ) {
278                    continue;
279                }
280                walk(&path, bin_mtime, newest)?;
281            } else if ft.is_file() && is_source(&path) {
282                let mtime = entry.metadata()?.modified()?;
283                if mtime > bin_mtime && newest.as_ref().is_none_or(|(_, t)| mtime > *t) {
284                    *newest = Some((path, mtime));
285                }
286            }
287        }
288        Ok(())
289    }
290
291    let mut newest = None;
292    walk(&crates_dir, bin_mtime, &mut newest)?;
293    Ok(newest)
294}
295
296/// Error out if the `ryra` binary we're about to ship into VMs is older than
297/// any workspace source file. This is the stale-binary footgun: `cargo build -p
298/// ryra-test` rebuilds the lib but leaves `target/release/ryra` untouched, so
299/// tests silently run against old behavior.
300fn ensure_binary_fresh(binary: &Path) -> Result<()> {
301    let Some((src, _)) = newest_source_newer_than(binary)? else {
302        return Ok(());
303    };
304    anyhow::bail!(
305        "ryra binary is older than source {}.\n  \
306         Binary:  {}\n  \
307         Rebuild: cargo build --release --bin ryra\n  \
308         (or pass --ryra-bin <path> to skip this check)",
309        src.display(),
310        binary.display(),
311    )
312}
313
314fn print_summary(results: &[ScenarioResult], wall_clock: std::time::Duration) {
315    println!("\n========================================");
316    println!("  Results");
317    println!("========================================\n");
318
319    // Only the *failures* get their full step trace dumped here — that's the
320    // bit you actually need to read inline. Passing tests would just spew
321    // every step's captured stdout; their full logs are saved to
322    // `reports/<test>/run.log` and pointed at by the path summary below.
323    let any_failed = results.iter().any(|r| r.outcome.is_fail());
324    for result in results.iter().filter(|r| r.outcome.is_fail()) {
325        print!("{result}");
326    }
327    if any_failed {
328        println!();
329    }
330
331    let passed = results.iter().filter(|r| r.passed()).count();
332    let failed = results
333        .iter()
334        .filter(|r| matches!(r.outcome, Outcome::Failed(_)))
335        .count();
336    let skipped = results
337        .iter()
338        .filter(|r| matches!(r.outcome, Outcome::Skipped))
339        .count();
340
341    println!("----------------------------------------");
342    println!(
343        "{passed} passed, {failed} failed, {skipped} skipped, {} total ({} wall clock)",
344        results.len(),
345        reports::humanize_secs(wall_clock.as_secs()),
346    );
347    println!("========================================");
348}
349
350fn save_results(results: &[ScenarioResult], wall_clock: std::time::Duration) -> Result<()> {
351    reports::save_run_results(results)?;
352    reports::print_results_paths(results, wall_clock);
353    Ok(())
354}
355
356/// Safety margin (MB) kept free beyond the VMs' own needs — for host processes,
357/// QEMU overhead, the kernel page cache, and the GPU compositor. Running this
358/// tight causes kernel-level thrashing and on Asahi can freeze the display.
359const HOST_RESERVE_MB: u64 = 1024;
360
361/// Decide how many VMs can safely run in parallel given current host memory.
362/// Returns the clamped parallel count (never more than `requested`, never below 1),
363/// and prints a report. Uses `sorted_mems_desc` so we pack the largest VMs first.
364fn plan_parallelism(requested: usize, sorted_mems_desc: &[u32]) -> usize {
365    let mem = match ryra_vm::read_host_memory() {
366        Some(m) => m,
367        None => {
368            let total_mb: u64 = sorted_mems_desc
369                .iter()
370                .take(requested)
371                .map(|m| *m as u64)
372                .sum();
373            println!("\nMax concurrent VM RAM: {total_mb}MB (host memory unknown)");
374            return requested.max(1);
375        }
376    };
377
378    let used_mb = mem.total_mb.saturating_sub(mem.available_mb);
379    println!(
380        "\nHost RAM: {}MB used / {}MB total ({}MB available, {}MB in swap)",
381        used_mb, mem.total_mb, mem.available_mb, mem.swap_used_mb
382    );
383
384    let budget = mem.available_mb.saturating_sub(HOST_RESERVE_MB);
385    let mut fit = 0usize;
386    let mut total = 0u64;
387    for m in sorted_mems_desc.iter().take(requested) {
388        let next = total + *m as u64;
389        if next > budget {
390            break;
391        }
392        total = next;
393        fit += 1;
394    }
395
396    let first_vm_mb = sorted_mems_desc.first().copied().unwrap_or(0) as u64;
397    if fit == 0 && first_vm_mb > 0 {
398        // Even one VM doesn't fit in budget — warn loudly but still let it run at
399        // parallel=1 so the user can choose to override with --memory.
400        eprintln!(
401            "WARNING: largest VM needs {}MB but only {}MB free after {}MB host reserve. \
402             Running anyway at --parallel=1 — expect swap pressure. Close apps or lower \
403             VM size with --memory.",
404            first_vm_mb, budget, HOST_RESERVE_MB
405        );
406        fit = 1;
407    }
408
409    let clamped = fit.min(requested).max(1);
410    if clamped < requested {
411        eprintln!(
412            "Reducing --parallel from {requested} to {clamped} to fit in {budget}MB RAM budget \
413             (total host RAM {}MB, {}MB reserved for host)",
414            mem.total_mb, HOST_RESERVE_MB
415        );
416    }
417    println!("Max concurrent VM RAM: {total}MB (parallel={clamped})");
418    clamped
419}
420
421/// Find the registry path — explicit arg, or auto-detect.
422fn resolve_registry_path(explicit: Option<&PathBuf>) -> Result<PathBuf> {
423    if let Some(p) = explicit {
424        return std::fs::canonicalize(p)
425            .with_context(|| format!("registry path not found: {}", p.display()));
426    }
427
428    let candidates = [
429        PathBuf::from("registry"),
430        PathBuf::from("crates/ryra-core/registry"),
431    ];
432    for c in &candidates {
433        if c.exists() {
434            return std::fs::canonicalize(c)
435                .with_context(|| format!("failed to resolve {}", c.display()));
436        }
437    }
438
439    anyhow::bail!("no registry found. Pass --registry <path> or run from the repo root")
440}
441
442/// Run the E2E test suite with the given arguments.
443pub async fn run(args: Args) -> Result<()> {
444    install_signal_handler();
445
446    // Check for local project first, then fall back to registry
447    let registry_path = resolve_registry_path(args.registry.as_ref());
448
449    let mut discovered = Vec::new();
450
451    // Discover local project tests (--project flag)
452    if let Some(ref project_dir) = args.project {
453        match registry::discover_local_project(project_dir)? {
454            Some(test) => discovered.push(test),
455            None => {
456                anyhow::bail!(
457                    "no test.toml found in project directory: {}",
458                    project_dir.display()
459                );
460            }
461        }
462    }
463
464    // Discover registry tests (only if no explicit --project or if registry is also available)
465    if let Ok(ref reg_path) = registry_path
466        && let Ok(reg_tests) = registry::discover(reg_path)
467    {
468        // If --project was explicitly passed, skip registry tests
469        if args.project.is_none() {
470            discovered.extend(reg_tests);
471        }
472    }
473
474    // Need a registry path for dependency resolution even with local projects
475    let registry_path = registry_path.unwrap_or_else(|_| PathBuf::from("registry"));
476
477    if args.list {
478        // Respect positional filters: `ryra test --list whoami` shows only
479        // whoami tests. Same substring-contains semantics as the run path.
480        let filtered: Vec<registry::DiscoveredTest> = if args.tests.is_empty() {
481            discovered
482        } else {
483            discovered
484                .into_iter()
485                .filter(|t| args.tests.iter().any(|f| t.name().contains(f.as_str())))
486                .collect()
487        };
488        render_list(&filtered, registry_path.as_path(), args.verbose);
489        return Ok(());
490    }
491
492    // --keep-alive with no tests: boot a VM and block until Ctrl-C.
493    // This path needs VM prerequisites, so handle it after the no-vm branch below.
494    let keep_alive_interactive = args.keep_alive && args.tests.is_empty();
495
496    if discovered.is_empty() && !keep_alive_interactive {
497        anyhow::bail!("no tests found in registry at {}", registry_path.display());
498    }
499
500    // Filter tests (independent of VM prep — safe to do first)
501    let to_run: Vec<_> = if args.tests.is_empty() {
502        discovered.iter().collect()
503    } else {
504        discovered
505            .iter()
506            .filter(|t| args.tests.iter().any(|f| t.name().contains(f.as_str())))
507            .collect()
508    };
509
510    if to_run.is_empty() && !keep_alive_interactive {
511        anyhow::bail!("no tests matched the given filters");
512    }
513
514    // --no-vm: run entirely on the host. Skip all VM prerequisites, binary
515    // lookup, and image preparation since none of it is needed in bare mode.
516    if args.no_vm {
517        return run_bare(&args, &to_run, &registry_path).await;
518    }
519
520    let use_kvm = !args.no_kvm;
521    ryra_vm::check_prerequisites(use_kvm)?;
522
523    let memory_override = args.memory;
524    let spawn_opts = std::sync::Arc::new(SpawnOpts {
525        use_kvm,
526        memory_mb: memory_override.unwrap_or(2048),
527        cpus: args.cpus,
528        disk_gb: 20,
529    });
530
531    let ryra_bin = match &args.ryra_bin {
532        // Explicit --ryra-bin: trust the user, don't check freshness (the path
533        // may be from a different tree, CI artefact, etc.).
534        Some(p) => std::fs::canonicalize(p)?,
535        None => {
536            let bin = find_ryra_binary()?;
537            ensure_binary_fresh(&bin)?;
538            bin
539        }
540    };
541
542    // Compute max RAM needed across the tests we're actually running.
543    // The snapshot must be created at this size so all VMs can restore from it.
544    let max_memory: u32 = to_run
545        .iter()
546        .map(|t| memory_override.unwrap_or_else(|| registry::vm_memory_for_test(&registry_path, t)))
547        .max()
548        .unwrap_or(1024);
549
550    let base_image =
551        image::ensure_image(&args.distro, args.redownload, use_kvm, max_memory).await?;
552
553    if keep_alive_interactive {
554        return run_interactive_vm(&base_image, &spawn_opts, &ryra_bin, &registry_path).await;
555    }
556
557    let base_image = std::sync::Arc::new(base_image);
558    let registry_path = std::sync::Arc::new(registry_path);
559
560    // Prepare browser image only if a filtered test actually needs it
561    let any_needs_browser = to_run.iter().any(|t| t.needs_browser());
562    let browser_image = if any_needs_browser {
563        Some(std::sync::Arc::new(
564            image::ensure_browser_image(
565                &base_image,
566                &args.distro,
567                args.redownload,
568                use_kvm,
569                max_memory,
570            )
571            .await?,
572        ))
573    } else {
574        None
575    };
576
577    // Pre-pull all container images before spawning VMs.
578    let mut all_images: Vec<String> = to_run
579        .iter()
580        .flat_map(|t| registry::images_for_test(&registry_path, t))
581        .collect();
582    all_images.sort();
583    all_images.dedup();
584
585    println!("Pre-caching {} container images...", all_images.len());
586    for img in &all_images {
587        machine::ensure_image_cached(img).await?;
588    }
589
590    // Compute per-test memory first (needed for accurate parallelism calculation)
591    let test_memories: Vec<(&str, u32)> = to_run
592        .iter()
593        .map(|t| {
594            let mem =
595                memory_override.unwrap_or_else(|| registry::vm_memory_for_test(&registry_path, t));
596            (t.name(), mem)
597        })
598        .collect();
599
600    let mut sorted_mems: Vec<u32> = test_memories.iter().map(|(_, m)| *m).collect();
601    sorted_mems.sort_unstable_by(|a, b| b.cmp(a));
602    let effective_parallel = plan_parallelism(args.parallel, &sorted_mems);
603    for (name, mem) in &test_memories {
604        println!("  {name}: {mem}MB");
605    }
606    println!(
607        "\nRunning {} tests (parallel={})\n",
608        to_run.len(),
609        effective_parallel
610    );
611
612    let wall_clock = std::time::Instant::now();
613    let semaphore = std::sync::Arc::new(Semaphore::new(effective_parallel));
614    let mut handles = vec![];
615    let total_tests = to_run.len();
616    // Shared progress counters — each task increments these when its VM
617    // ends so the tail of the output doubles as a live progress ticker
618    // (works under --parallel, order-independent).
619    let progress_done = std::sync::Arc::new(std::sync::atomic::AtomicUsize::new(0));
620    let progress_passed = std::sync::Arc::new(std::sync::atomic::AtomicUsize::new(0));
621    // Start-order counter so each VM START line carries an [N/total] marker
622    // too. Under --parallel this is the order tests *begin*, not finish, but
623    // it still tells you how far into the run you are at a glance.
624    let progress_started = std::sync::Arc::new(std::sync::atomic::AtomicUsize::new(0));
625
626    for test in to_run {
627        let permit = semaphore.clone().acquire_owned().await?;
628        let test_image: std::sync::Arc<image::Image> = if test.needs_browser() {
629            match browser_image.as_ref() {
630                Some(img) => img.clone(),
631                None => {
632                    anyhow::bail!(
633                        "test '{}' requires a browser image but none was prepared",
634                        test.name()
635                    );
636                }
637            }
638        } else {
639            base_image.clone()
640        };
641        let test_memory =
642            memory_override.unwrap_or_else(|| registry::vm_memory_for_test(&registry_path, test));
643        let test_disk = registry::vm_disk_for_test(&registry_path, test);
644        let spawn_opts = std::sync::Arc::new(SpawnOpts {
645            use_kvm,
646            memory_mb: test_memory,
647            cpus: args.cpus,
648            disk_gb: test_disk,
649        });
650        let ryra_bin = ryra_bin.clone();
651        let registry_path = registry_path.clone();
652        let keep_failed = args.keep_failed;
653        let keep_alive = args.keep_alive;
654        let verbose = args.verbose;
655        let single_test = total_tests == 1;
656        let name = test.name().to_string();
657        let has_quadlets = test.has_quadlets();
658        let progress_done = progress_done.clone();
659        let progress_passed = progress_passed.clone();
660        let progress_started = progress_started.clone();
661        // Extract quadlet_dir before spawning task (DiscoveredTest isn't Send)
662        let quadlet_dir = match test {
663            registry::DiscoveredTest::Simple { setup, .. } => setup.quadlet_dir.clone(),
664            registry::DiscoveredTest::Lifecycle { .. } => None,
665        };
666
667        handles.push(tokio::spawn(async move {
668            // `permit` holds a slot in the `--parallel` semaphore; must be
669            // alive until the task finishes. Kept as an explicit local so
670            // Drop order is obvious to readers (and to the compiler —
671            // `let _x = ...` used to be load-bearing here; drop at end
672            // via explicit bind + final drop avoids any NLL surprises).
673            let permit_guard = permit;
674            let id = machine::random_id();
675            let ssh_port = ports::allocate_ssh_port();
676            let start = std::time::Instant::now();
677            let started =
678                progress_started.fetch_add(1, std::sync::atomic::Ordering::SeqCst) + 1;
679            println!("[{name}] ---- VM START [{started}/{total_tests}] ryra-test-{id} (ssh port {ssh_port}, {test_memory}MB RAM) ----");
680
681            // All fallible work lives in an inner async block so every exit
682            // path — including early returns for VM-boot or file-copy failures —
683            // flows through the single VM END reporting block below. Without
684            // this, a `return fail_result(...)` would skip the VM END print and
685            // the user would see back-to-back VM STARTs with no indication of
686            // what went wrong on the previous test.
687            let result: ScenarioResult = async {
688                let fail_result = |msg: String| ScenarioResult {
689                    name: name.clone(),
690                    events: vec![],
691                    duration: start.elapsed(),
692                    outcome: scenario::Outcome::Failed(msg),
693                };
694
695                // Re-discover tests inside task (DiscoveredTest isn't Send due to lifetime)
696                let test = if has_quadlets {
697                    let qdir = match quadlet_dir.as_ref() {
698                        Some(d) => d,
699                        None => return fail_result("quadlet_dir must be set for quadlet tests".into()),
700                    };
701                    match registry::discover_local_project(qdir) {
702                        Ok(Some(t)) => t,
703                        Ok(None) => return fail_result("local project not found (internal error)".into()),
704                        Err(e) => return fail_result(format!("local project discovery failed: {e:#}")),
705                    }
706                } else {
707                    let discovered = match registry::discover(&registry_path) {
708                        Ok(d) => d,
709                        Err(e) => return fail_result(format!("registry discovery failed: {e:#}")),
710                    };
711                    match discovered.into_iter().find(|t| t.name() == name) {
712                        Some(t) => t,
713                        None => return fail_result("test not found (internal error)".into()),
714                    }
715                };
716
717                // Spawn VM
718                let phase = std::time::Instant::now();
719                println!("[{name}] booting VM...");
720                let vm = match Machine::spawn(&test_image, &id, ssh_port, &spawn_opts).await {
721                    Ok(vm) => vm,
722                    Err(e) => return fail_result(format!("failed to spawn VM: {e:#}")),
723                };
724                println!("[{name}] VM ready ({:.1}s)", phase.elapsed().as_secs_f64());
725
726                // Copy ryra binary into VM
727                let phase = std::time::Instant::now();
728                if let Err(e) = machine::copy_ryra_to_vm(&vm, &ryra_bin).await {
729                    let _ = vm.destroy().await;
730                    return fail_result(format!("failed to copy ryra to VM: {e:#}"));
731                }
732
733                // Copy registry into VM (needed for dependency resolution)
734                if registry_path.exists()
735                    && let Err(e) = machine::copy_fixtures_to_vm(&vm, &registry_path).await {
736                        let _ = vm.destroy().await;
737                        return fail_result(format!("failed to copy registry to VM: {e:#}"));
738                    }
739
740                // Copy quadlet project files into VM
741                if let Some(ref qdir) = quadlet_dir
742                    && let Err(e) = machine::copy_project_to_vm(&vm, qdir).await {
743                        let _ = vm.destroy().await;
744                        return fail_result(format!("failed to copy project to VM: {e:#}"));
745                    }
746                println!("[{name}] files copied ({:.1}s)", phase.elapsed().as_secs_f64());
747
748                // Load cached container images into VM
749                let images = registry::images_for_test(&registry_path, &test);
750                if !images.is_empty() {
751                    let phase = std::time::Instant::now();
752                    if let Err(e) = machine::load_images_into_vm(&vm, &images).await {
753                        let _ = vm.destroy().await;
754                        return fail_result(format!("failed to load container images: {e:#}"));
755                    }
756                    println!("[{name}] images loaded ({:.1}s, {} images)", phase.elapsed().as_secs_f64(), images.len());
757                }
758
759                let setup_time = start.elapsed();
760                println!("[{name}] running tests (setup took {:.1}s)...", setup_time.as_secs_f64());
761                let executor = crate::executor::VmExecutor::new(&vm);
762                let vm_registry = std::path::Path::new("/opt/ryra-test-registry");
763                let result = match &test {
764                    registry::DiscoveredTest::Lifecycle { steps, .. } => {
765                        runner::run_lifecycle_test(&executor, &name, steps, verbose, !single_test, vm_registry, false, None).await
766                    }
767                    registry::DiscoveredTest::Simple { .. } => {
768                        runner::run_registry_test(&executor, &test, !single_test, None).await
769                    }
770                };
771
772                // On failure, save serial log to logs dir
773                if !result.passed() {
774                    let serial_log = vm.work_dir.join("serial.log");
775                    if let Ok(content) = tokio::fs::read_to_string(&serial_log).await {
776                        let workspace_root = PathBuf::from(env!("CARGO_MANIFEST_DIR")).join("../..");
777                        let fail_log_dir = workspace_root.join("crates/ryra-test/logs");
778                        let _ = tokio::fs::create_dir_all(&fail_log_dir).await;
779                        let dest = fail_log_dir.join(format!("{name}-serial.log"));
780                        let _ = tokio::fs::write(&dest, &content).await;
781                        eprintln!("[{name}] serial log saved to: {}", dest.display());
782
783                        if verbose {
784                            let lines: Vec<&str> = content.lines().collect();
785                            let start_idx = lines.len().saturating_sub(50);
786                            eprintln!("[{name}] --- serial log (last 50 lines) ---");
787                            for line in &lines[start_idx..] {
788                                eprintln!("  {line}");
789                            }
790                            eprintln!("[{name}] --- end serial log ---");
791                        }
792                    }
793                }
794
795                // Decide whether to keep the VM alive
796                let should_keep = keep_alive || (keep_failed && !result.passed());
797                if should_keep {
798                    println!("[{name}] keeping VM alive:");
799                    vm.keep_alive();
800                } else if let Err(e) = vm.destroy().await {
801                    eprintln!("[{name}] warning: failed to destroy VM: {e}");
802                }
803
804                result
805            }
806            .await;
807
808            // Single end-of-task reporting path — runs for every outcome above,
809            // so the user always sees a VM END line (with the failure reason
810            // for fails) before the next test's VM START prints.
811            use std::sync::atomic::Ordering;
812            let done = progress_done.fetch_add(1, Ordering::SeqCst) + 1;
813            if result.passed() {
814                progress_passed.fetch_add(1, Ordering::SeqCst);
815            }
816            let passed_so_far = progress_passed.load(Ordering::SeqCst);
817            let failed_so_far = done - passed_so_far;
818            let wall = wall_clock.elapsed().as_secs();
819            let (mins, secs) = (wall / 60, wall % 60);
820            let status = match &result.outcome {
821                scenario::Outcome::Passed => "PASS".to_string(),
822                scenario::Outcome::Skipped => "SKIP".to_string(),
823                scenario::Outcome::Failed(msg) => {
824                    let first = msg.lines().next().unwrap_or("");
825                    let trimmed: String = first.chars().take(140).collect();
826                    if first.chars().count() > 140 {
827                        format!("FAIL: {trimmed}…")
828                    } else {
829                        format!("FAIL: {trimmed}")
830                    }
831                }
832            };
833            println!(
834                "[{name}] ---- VM END ({status}, test {:.1}s) ---- \
835                 [{done}/{total_tests} · {passed_so_far} pass · {failed_so_far} fail · \
836                 total {mins}:{secs:02}]",
837                start.elapsed().as_secs_f64()
838            );
839            drop(permit_guard); // release the --parallel slot AFTER reporting
840            result
841        }));
842    }
843
844    let mut results = vec![];
845    for handle in handles {
846        results.push(handle.await?);
847    }
848
849    let total_elapsed = wall_clock.elapsed();
850    print_summary(&results, total_elapsed);
851    save_results(&results, total_elapsed)?;
852
853    if results.iter().any(|r| !r.passed()) {
854        std::process::exit(1);
855    }
856
857    Ok(())
858}
859
860/// Boot a VM with ryra + registry installed, print SSH command, block until Ctrl-C.
861async fn run_interactive_vm(
862    base_image: &image::Image,
863    spawn_opts: &SpawnOpts,
864    ryra_bin: &Path,
865    registry_path: &Path,
866) -> Result<()> {
867    let id = machine::random_id();
868    let ssh_port = ports::allocate_ssh_port();
869
870    println!("Booting interactive VM ryra-test-{id} (ssh port {ssh_port})...");
871    let vm = Machine::spawn(base_image, &id, ssh_port, spawn_opts).await?;
872    println!("VM ready.");
873
874    println!("Copying ryra binary...");
875    machine::copy_ryra_to_vm(&vm, ryra_bin).await?;
876
877    println!("Copying registry...");
878    machine::copy_fixtures_to_vm(&vm, registry_path).await?;
879
880    println!("\nVM is ready. Connect with:\n");
881    println!(
882        "  ssh -o StrictHostKeyChecking=no -o UserKnownHostsFile=/dev/null \
883         -i {}/id_ed25519 -p {} ryra@{}",
884        vm.work_dir.display(),
885        vm.ssh_port,
886        vm.ssh_host,
887    );
888    println!("\nRegistry is at /opt/ryra-test-registry in the VM.");
889    println!("Press Ctrl-C to stop the VM.\n");
890
891    tokio::signal::ctrl_c().await?;
892
893    println!("\nShutting down VM...");
894    vm.destroy().await?;
895    Ok(())
896}
897
898/// Root of the host-test sandbox. Everything a host run reads or writes that
899/// isn't a quadlet symlink lives under here, on real disk: service data
900/// (`services/`), the preferences sandbox (`config/`), the ledger, and run
901/// reports (`reports/`). It's `~/.local/share/services-test/` (honouring
902/// `XDG_DATA_HOME`), a sibling of the real `~/.local/share/services/`, so the
903/// whole test footprint is one folder you can `rm -rf`. `None` if `$HOME` is
904/// unset.
905pub fn test_sandbox_root() -> Option<PathBuf> {
906    let base = match std::env::var_os("XDG_DATA_HOME") {
907        Some(v) if !v.is_empty() => PathBuf::from(v),
908        _ => PathBuf::from(std::env::var_os("HOME")?).join(".local/share"),
909    };
910    Some(base.join("services-test"))
911}
912
913/// Path to the host-managed-services ledger: the services this harness has
914/// installed on the host but not yet torn down. Persisted across runs so a
915/// later run can tell *its own* leftovers (from an aborted run — safe to
916/// reclaim) apart from services the user installed for real (must never be
917/// touched). Lives in the sandbox root (real disk — it must survive reboots,
918/// so never `/tmp`). Returns `None` only if `$HOME` is unset.
919fn host_ledger_path() -> Option<PathBuf> {
920    Some(test_sandbox_root()?.join("ledger"))
921}
922
923/// Ledger entries still installed on the host: leftovers from a previous
924/// aborted run. The ledger only ever records harness installs, so purging
925/// these is always safe: user-installed services are never in it.
926pub fn host_leftovers() -> Vec<String> {
927    let ledger = ledger_load();
928    let installed = scan_installed();
929    ledger.intersection(&installed).cloned().collect()
930}
931
932/// Load the ledger (newline-separated service names). Missing file → empty.
933pub fn ledger_load() -> BTreeSet<String> {
934    let Some(path) = host_ledger_path() else {
935        return BTreeSet::new();
936    };
937    match std::fs::read_to_string(&path) {
938        Ok(s) => s
939            .lines()
940            .map(str::trim)
941            .filter(|l| !l.is_empty())
942            .map(String::from)
943            .collect(),
944        Err(_) => BTreeSet::new(),
945    }
946}
947
948/// Persist the ledger. Best-effort: a write failure only degrades the
949/// next run to the *conservative* side (it would treat our leftovers as
950/// user-owned and skip them rather than delete anything), so we warn but
951/// don't abort the test run.
952fn ledger_save(set: &BTreeSet<String>) {
953    let Some(path) = host_ledger_path() else {
954        return;
955    };
956    if let Some(parent) = path.parent()
957        && let Err(e) = std::fs::create_dir_all(parent)
958    {
959        eprintln!("warning: could not create ledger dir: {e}");
960        return;
961    }
962    let body = set.iter().cloned().collect::<Vec<_>>().join("\n");
963    if let Err(e) = std::fs::write(&path, body) {
964        eprintln!("warning: could not write host-managed-services ledger: {e}");
965    }
966}
967
968/// Purge a test's own services from the host, dependents before
969/// dependencies (reverse install order). Failures are non-fatal: a
970/// not-installed service is a no-op. Callers guarantee these services are
971/// harness-owned (never user-installed), so purging is always safe.
972pub async fn purge_services(
973    executor: &crate::executor::LocalExecutor,
974    svcs: &[String],
975    when: &str,
976) {
977    use crate::executor::Executor;
978    for svc in svcs.iter().rev() {
979        println!("  cleaning up {svc} (purge) {when}");
980        let _ = executor
981            .exec(&format!("ryra remove --purge {svc} -y"))
982            .await;
983    }
984}
985
986/// Snapshot the ryra-managed services currently installed on the host.
987/// A scan failure degrades to "none" so the caller never deletes blindly.
988fn scan_installed() -> BTreeSet<String> {
989    match ryra_core::scan_managed_services() {
990        Ok(v) => v.into_iter().collect(),
991        Err(e) => {
992            eprintln!("warning: could not scan installed services ({e}); assuming none");
993            BTreeSet::new()
994        }
995    }
996}
997
998/// Collect every `<label>.internal` hostname appearing in `s` into `out`.
999fn scan_internal_hosts(s: &str, out: &mut BTreeSet<String>) {
1000    const SUFFIX: &str = ".internal";
1001    let bytes = s.as_bytes();
1002    for (idx, _) in s.match_indices(SUFFIX) {
1003        let mut start = idx;
1004        while start > 0 {
1005            let c = bytes[start - 1];
1006            if c.is_ascii_alphanumeric() || c == b'-' {
1007                start -= 1;
1008            } else {
1009                break;
1010            }
1011        }
1012        if start < idx {
1013            out.insert(s[start..idx + SUFFIX.len()].to_ascii_lowercase());
1014        }
1015    }
1016}
1017
1018/// The `*.internal` hostnames the selected tests will actually contact, so the
1019/// runner can prime sudo (for `/etc/hosts` writes) *only* when a needed host is
1020/// missing — never on a run whose hosts already resolve.
1021///
1022/// Walks parsed lifecycle steps (`add` args/env, shell bodies, http
1023/// url/body/headers, playwright env) and reads each referenced playwright spec
1024/// file — its `*.internal` URL default catches auto-promoted hosts that never
1025/// appear in the toml. Simple tests (basic 127.0.0.1 installs) are scanned too,
1026/// cheaply, for completeness.
1027fn referenced_internal_hosts(
1028    tests: &[&registry::DiscoveredTest],
1029    registry_path: &Path,
1030) -> BTreeSet<String> {
1031    use crate::test_toml::StepDef;
1032    let browser_dir = registry_path.join("tests").join("browser");
1033    let mut out = BTreeSet::new();
1034    for t in tests {
1035        match t {
1036            registry::DiscoveredTest::Lifecycle { steps, .. } => {
1037                for step in steps {
1038                    match step {
1039                        StepDef::Add { args, env, .. } => {
1040                            if let Some(a) = args {
1041                                scan_internal_hosts(a, &mut out);
1042                            }
1043                            env.values().for_each(|v| scan_internal_hosts(v, &mut out));
1044                        }
1045                        StepDef::Shell { run, .. } => scan_internal_hosts(run, &mut out),
1046                        StepDef::Http {
1047                            url, body, headers, ..
1048                        } => {
1049                            scan_internal_hosts(url, &mut out);
1050                            if let Some(b) = body {
1051                                scan_internal_hosts(b, &mut out);
1052                            }
1053                            headers
1054                                .values()
1055                                .for_each(|v| scan_internal_hosts(v, &mut out));
1056                        }
1057                        StepDef::Playwright { spec, env, .. } => {
1058                            env.values().for_each(|v| scan_internal_hosts(v, &mut out));
1059                            if let Ok(txt) = std::fs::read_to_string(browser_dir.join(spec)) {
1060                                scan_internal_hosts(&txt, &mut out);
1061                            }
1062                        }
1063                        _ => {}
1064                    }
1065                }
1066            }
1067            registry::DiscoveredTest::Simple { tests: entries, .. } => {
1068                for e in entries {
1069                    scan_internal_hosts(&e.run, &mut out);
1070                    e.env
1071                        .values()
1072                        .for_each(|v| scan_internal_hosts(v, &mut out));
1073                }
1074            }
1075        }
1076    }
1077    out
1078}
1079
1080/// The `*.internal` hostnames the selected tests contact that don't already
1081/// resolve via `/etc/hosts` — the ones ryra will have to add (a privileged
1082/// write). Empty when every contacted host already resolves.
1083fn missing_internal_hosts(needed: &BTreeSet<String>) -> Vec<String> {
1084    let hosts = std::fs::read_to_string("/etc/hosts").unwrap_or_default();
1085    let present = |h: &str| {
1086        hosts.lines().any(|l| {
1087            let l = l.trim();
1088            !l.starts_with('#') && l.split_whitespace().any(|w| w == h)
1089        })
1090    };
1091    needed.iter().filter(|h| !present(h)).cloned().collect()
1092}
1093
1094/// Acquire sudo once, up front, for a run that has privileged steps — so the
1095/// `sudo -n` those steps issue (inside captured, non-TTY shells that can't
1096/// themselves prompt) succeed silently for the whole run.
1097///
1098/// "Privileged steps" is a general notion, not a hosts special-case: a run
1099/// qualifies if it must add `*.internal` hostnames to `/etc/hosts` (detected
1100/// automatically) *or* any selected test declares `requires_sudo` (the escape
1101/// hatch for tests that shell out to sudo for any other reason). `reasons` is
1102/// the human-readable list of why; empty means nothing privileged → no-op.
1103///
1104/// Returns a keep-alive task that refreshes the credential every 60s for the
1105/// run's duration (sudo's default `timestamp_timeout` is far shorter than a
1106/// full suite). Behaviour:
1107/// - No reasons → `None`; sudo is never touched.
1108/// - Passwordless sudo → `None`; per-step `sudo -n` already works.
1109/// - Password required + a TTY → one prompt here, listing the reasons.
1110/// - Password required + no TTY (CI capturing output) → `None`, degrade
1111///   gracefully. CI uses `--vm`, which provisions its own passwordless sudo.
1112async fn acquire_run_sudo(reasons: &[String]) -> Option<tokio::task::JoinHandle<()>> {
1113    use std::io::IsTerminal;
1114    use std::time::Duration;
1115
1116    if reasons.is_empty() {
1117        return None;
1118    }
1119
1120    let passwordless = tokio::process::Command::new("sudo")
1121        .args(["-n", "true"])
1122        .status()
1123        .await
1124        .map(|s| s.success())
1125        .unwrap_or(false);
1126    if passwordless {
1127        return None;
1128    }
1129    if !std::io::stderr().is_terminal() {
1130        return None;
1131    }
1132
1133    eprintln!("\n  This run needs sudo for:");
1134    for r in reasons {
1135        eprintln!("    - {r}");
1136    }
1137    eprintln!("  Caching sudo once so it doesn't prompt mid-test:");
1138    let primed = tokio::process::Command::new("sudo")
1139        .arg("-v")
1140        .status()
1141        .await
1142        .map(|s| s.success())
1143        .unwrap_or(false);
1144    if !primed {
1145        eprintln!("  (skipped — privileged steps may fail; they'll say which.)\n");
1146        return None;
1147    }
1148
1149    Some(tokio::spawn(async move {
1150        loop {
1151            tokio::time::sleep(Duration::from_secs(60)).await;
1152            // `-n`: a keep-alive must never block on a prompt. If the cache
1153            // ever lapses, the next privileged step re-warms it itself.
1154            let _ = tokio::process::Command::new("sudo")
1155                .args(["-n", "-v"])
1156                .status()
1157                .await;
1158        }
1159    }))
1160}
1161
1162/// Run tests directly on the host without a VM.
1163///
1164/// Bare mode shares the *real* host's ryra state, so isolation is built
1165/// from three guarantees:
1166///   1. Preferences are redirected to a throwaway dir (`RYRA_CONFIG_DIR`),
1167///      so tests never read or clobber the user's SMTP/auth/backup creds.
1168///   2. Services the user already installed are detected up front and left
1169///      strictly untouched; any test that would install over one is skipped.
1170///   3. Every test purges its own services afterwards so they don't pile up
1171///      and exhaust RAM — and a ledger records harness-owned installs so a
1172///      later run can reclaim leftovers from an aborted run.
1173async fn run_bare(
1174    args: &Args,
1175    to_run: &[&registry::DiscoveredTest],
1176    registry_path: &Path,
1177) -> Result<()> {
1178    use crate::executor::Executor;
1179    let wall_clock = std::time::Instant::now();
1180
1181    // Acquire sudo once, up front, if (and only if) this run has privileged
1182    // steps: `*.internal` hostnames the tests contact that aren't in /etc/hosts
1183    // yet (ryra adds them), or a test that declares `requires_sudo`. Held warm
1184    // for the run so captured, non-TTY steps' `sudo -n` succeed; aborted before
1185    // we return. A run with nothing privileged never touches sudo.
1186    let mut sudo_reasons: Vec<String> = Vec::new();
1187    let missing_hosts = missing_internal_hosts(&referenced_internal_hosts(to_run, registry_path));
1188    if !missing_hosts.is_empty() {
1189        sudo_reasons.push(format!(
1190            "adding {} to /etc/hosts (OIDC/HTTPS service URLs)",
1191            missing_hosts.join(", ")
1192        ));
1193    }
1194    let sudo_tests: Vec<&str> = to_run
1195        .iter()
1196        .filter(|t| t.requires_sudo())
1197        .map(|t| t.name())
1198        .collect();
1199    if !sudo_tests.is_empty() {
1200        sudo_reasons.push(format!(
1201            "test(s) that declare requires_sudo: {}",
1202            sudo_tests.join(", ")
1203        ));
1204    }
1205    let sudo_keepalive = acquire_run_sudo(&sudo_reasons).await;
1206
1207    // 1. Sandbox the whole run under ~/.local/share/services-test/ (real disk,
1208    //    a sibling of the real services dir). Service data, preferences, the
1209    //    ledger, and reports all live here — one folder, one wipe. Only the
1210    //    quadlet *symlinks* land outside it, in the systemd-mandated dir. Tests
1211    //    resolve data paths through ${RYRA_DATA_DIR:-…}, so they find the
1212    //    sandbox here and fall back to the real dir under --vm / normal use.
1213    let sandbox = test_sandbox_root().context("cannot resolve test sandbox root ($HOME unset)")?;
1214
1215    // Base executor for cleanup operations (no per-test sandbox needed).
1216    let base_executor = crate::executor::LocalExecutor::with_registry(registry_path);
1217
1218    // 2. Anything installed that we didn't install is the user's — off-limits.
1219    let mut ledger = ledger_load();
1220    let installed = scan_installed();
1221    let user_owned: BTreeSet<String> = installed.difference(&ledger).cloned().collect();
1222    if !user_owned.is_empty() {
1223        let list = user_owned.iter().cloned().collect::<Vec<_>>().join(", ");
1224        println!(
1225            "Leaving {} already-installed service(s) untouched: {list}",
1226            user_owned.len()
1227        );
1228        println!("  Tests installing these are skipped. If they're leftovers from an aborted run,");
1229        println!("  purge them yourself with `ryra remove --purge <name> -y`.");
1230    }
1231
1232    // 3. Reclaim our own leftovers from a previous aborted run (frees RAM).
1233    let leftovers: Vec<String> = ledger.intersection(&installed).cloned().collect();
1234    for svc in &leftovers {
1235        println!("  reclaiming leftover {svc} (purge) from a previous run");
1236        let _ = base_executor
1237            .exec(&format!("ryra remove --purge {svc} -y"))
1238            .await;
1239        ledger.remove(svc);
1240    }
1241    if !leftovers.is_empty() {
1242        ledger_save(&ledger);
1243    }
1244
1245    let mut results = Vec::new();
1246    let total = to_run.len();
1247    println!("\nRunning {total} tests on host (bare mode)\n");
1248
1249    for (idx, test) in to_run.iter().enumerate() {
1250        let n = idx + 1;
1251        let name = test.name().to_string();
1252        let svcs: Vec<String> = test.services().iter().map(|s| s.to_string()).collect();
1253
1254        // Skip any test that would install over a user-owned service.
1255        if let Some(conflict) = svcs.iter().find(|s| user_owned.contains(*s)) {
1256            println!(
1257                "---- SKIP [{n}/{total}] {name}: '{conflict}' already installed (left untouched) ----"
1258            );
1259            results.push(ScenarioResult {
1260                name,
1261                events: Vec::new(),
1262                duration: Duration::ZERO,
1263                outcome: Outcome::Skipped,
1264            });
1265            continue;
1266        }
1267
1268        println!("---- START [{n}/{total}] {name} (bare) ----");
1269
1270        // Record intent before installing, so an abort mid-test still leaves a
1271        // breadcrumb the next run can reclaim.
1272        for svc in &svcs {
1273            ledger.insert(svc.clone());
1274        }
1275        ledger_save(&ledger);
1276
1277        // Per-test sandbox: each test gets its own config and data dirs so
1278        // no state leaks between tests (same pattern as per-test results).
1279        let test_dir = sandbox.join("tests").join(&name);
1280        let config_dir = test_dir.join("config");
1281        let data_dir = test_dir.join("services");
1282        let _ = std::fs::remove_dir_all(&config_dir);
1283        std::fs::create_dir_all(&config_dir)
1284            .with_context(|| format!("failed to create {}", config_dir.display()))?;
1285        std::fs::create_dir_all(&data_dir)
1286            .with_context(|| format!("failed to create {}", data_dir.display()))?;
1287        let executor = crate::executor::LocalExecutor::with_registry(registry_path)
1288            .with_config_dir(&config_dir)
1289            .with_data_dir(&data_dir);
1290
1291        purge_services(&executor, &svcs, "before test").await;
1292        let _ = executor
1293            .exec("rm -rf \"${XDG_CACHE_HOME:-$HOME/.cache}/services/default\"")
1294            .await;
1295
1296        let start = std::time::Instant::now();
1297        let result = match test {
1298            registry::DiscoveredTest::Lifecycle { steps, .. } => {
1299                runner::run_lifecycle_test(
1300                    &executor,
1301                    &name,
1302                    steps,
1303                    args.verbose,
1304                    false,
1305                    registry_path,
1306                    args.retest,
1307                    None,
1308                )
1309                .await
1310            }
1311            registry::DiscoveredTest::Simple { .. } => {
1312                runner::run_registry_test(&executor, test, false, None).await
1313            }
1314        };
1315
1316        let status = if result.passed() { "PASS" } else { "FAIL" };
1317        println!(
1318            "---- END [{n}/{total}] {name} ({status}, {:.1}s) ----",
1319            start.elapsed().as_secs_f64()
1320        );
1321
1322        // Tear down everything this test put on the host so nothing
1323        // accumulates and eats RAM.
1324        purge_services(&executor, &svcs, "after test").await;
1325        let leaked: Vec<String> = scan_installed()
1326            .into_iter()
1327            .filter(|s| !user_owned.contains(s) && !svcs.contains(s))
1328            .collect();
1329        if !leaked.is_empty() {
1330            purge_services(&executor, &leaked, "after test (side-effect)").await;
1331        }
1332        for svc in svcs.iter().chain(leaked.iter()) {
1333            ledger.remove(svc);
1334        }
1335        ledger_save(&ledger);
1336
1337        results.push(result);
1338    }
1339
1340    if let Some(h) = sudo_keepalive {
1341        h.abort();
1342    }
1343
1344    let total_elapsed = wall_clock.elapsed();
1345    print_summary(&results, total_elapsed);
1346    save_results(&results, total_elapsed)?;
1347
1348    if results
1349        .iter()
1350        .any(|r| matches!(r.outcome, Outcome::Failed(_)))
1351    {
1352        std::process::exit(1);
1353    }
1354
1355    Ok(())
1356}