Skip to main content

ryra_test/
lib.rs

1pub mod executor;
2pub mod registry;
3mod reports;
4mod runner;
5mod scenario;
6pub mod test_toml;
7
8use std::collections::BTreeSet;
9use std::path::{Path, PathBuf};
10use std::time::Duration;
11
12use anyhow::{Context, Result};
13use clap::Parser;
14use tokio::sync::Semaphore;
15
16use ryra_vm::image::Distro;
17use ryra_vm::machine::{self, Machine, SpawnOpts};
18use ryra_vm::{image, ports};
19use scenario::{Outcome, ScenarioResult};
20
21/// Install a Ctrl-C handler that kills all active VMs and exits.
22fn install_signal_handler() {
23    // We use the raw libc handler (not tokio::signal) so it works even if
24    // the tokio runtime is blocked or mid-shutdown.
25    unsafe {
26        libc::signal(
27            libc::SIGINT,
28            signal_handler as *const () as libc::sighandler_t,
29        );
30    }
31}
32
33extern "C" fn signal_handler(_sig: libc::c_int) {
34    // Write to stderr manually (signal-safe). Stay mode-agnostic here —
35    // cleanup_all_vms reports the VM count only when there's actually one.
36    let msg = b"\nInterrupted\n";
37    unsafe {
38        libc::write(2, msg.as_ptr() as *const libc::c_void, msg.len());
39    }
40    machine::cleanup_all_vms();
41    std::process::exit(130); // 128 + SIGINT
42}
43
44/// Render `--list` output. Two sections:
45///  1. **Service tests** — grouped under the owning service name
46///     (derived from `registry/<svc>/test.toml`).
47///  2. **Service-agnostic tests** — flat list from `registry/tests/*.toml`.
48///
49/// Each line shows the test name, step count, `[browser]` flag, and
50/// distinct step kinds so `playwright`/`shell`/`http` tell you what
51/// the test does at a glance.
52///
53/// When `verbose` is set, each test also gets a breakdown of every step
54/// (commands, URLs, polls, heredoc bodies) so the caller can see exactly
55/// what the test runs without opening the `.toml`.
56fn render_list(discovered: &[registry::DiscoveredTest], registry_path: &Path, verbose: bool) {
57    if discovered.is_empty() {
58        println!("No tests discovered.");
59        return;
60    }
61
62    let tests_dir = registry_path.join("tests");
63    let is_cross_cutting = |p: &Path| p.starts_with(&tests_dir);
64
65    // Group service tests by owning directory name; keep cross-cutting
66    // tests flat since each file already contains a single test.
67    let mut service_groups: Vec<(String, Vec<&registry::DiscoveredTest>)> = Vec::new();
68    let mut cross_cutting: Vec<&registry::DiscoveredTest> = Vec::new();
69    for test in discovered {
70        let src = test.source();
71        if is_cross_cutting(src) {
72            cross_cutting.push(test);
73            continue;
74        }
75        let svc = src
76            .parent()
77            .and_then(|p| p.file_name())
78            .and_then(|n| n.to_str())
79            .unwrap_or("<unknown>")
80            .to_string();
81        if let Some((_, bucket)) = service_groups.iter_mut().find(|(s, _)| s == &svc) {
82            bucket.push(test);
83        } else {
84            service_groups.push((svc, vec![test]));
85        }
86    }
87    service_groups.sort_by(|a, b| a.0.cmp(&b.0));
88    cross_cutting.sort_by(|a, b| a.name().cmp(b.name()));
89
90    let total_tests: usize = discovered.len();
91    let file_count = service_groups.len() + cross_cutting.len();
92    println!("{total_tests} tests across {file_count} files");
93
94    let line = |t: &registry::DiscoveredTest, indent: &str| {
95        let kinds = t.step_kinds().join(" → ");
96        let browser = if t.needs_browser() { " [browser]" } else { "" };
97        let step_count = t.test_count();
98        println!(
99            "{indent}{:<34} {} step{}{browser}  · {kinds}",
100            t.name(),
101            step_count,
102            if step_count == 1 { "" } else { "s" },
103        );
104        if !verbose {
105            return;
106        }
107        // Verbose: print each step's details. Use a deeper indent so the
108        // hierarchy (group → test → step lines) stays readable.
109        let step_indent = format!("{indent}    ");
110        if let registry::DiscoveredTest::Lifecycle { steps, .. } = t {
111            for (i, step) in steps.iter().enumerate() {
112                let described = step.describe();
113                if let Some((head, rest)) = described.split_first() {
114                    println!("{step_indent}{:>2}. {head}", i + 1);
115                    for l in rest {
116                        println!("{step_indent}    {l}");
117                    }
118                }
119            }
120        } else if let registry::DiscoveredTest::Simple { tests, .. } = t {
121            for (i, entry) in tests.iter().enumerate() {
122                println!(
123                    "{step_indent}{:>2}. shell '{}'  (timeout={}s)",
124                    i + 1,
125                    entry.name,
126                    entry.timeout_secs
127                );
128                for l in entry.run.trim().lines() {
129                    println!("{step_indent}    | {l}");
130                }
131            }
132        }
133    };
134
135    if !service_groups.is_empty() {
136        println!("─── Service tests  (registry/<service>/test.toml) ───");
137        for (svc, tests) in &service_groups {
138            println!("{svc}");
139            for t in tests {
140                line(t, "  ");
141            }
142        }
143    }
144
145    if !cross_cutting.is_empty() {
146        println!("─── Service-agnostic tests  (registry/tests/*.toml) ───");
147        for t in &cross_cutting {
148            line(t, "");
149        }
150    }
151}
152
153#[derive(Parser, Debug)]
154#[command(
155    name = "ryra-e2e",
156    about = "E2E test runner for ryra — spins up QEMU VMs for integration testing"
157)]
158pub struct Args {
159    /// Max concurrent VMs
160    #[arg(long, default_value_t = 1)]
161    pub parallel: usize,
162
163    /// Base image distro
164    #[arg(long, default_value_t = Distro::Debian13)]
165    pub distro: Distro,
166
167    /// Re-download the base cloud image
168    #[arg(long)]
169    pub redownload: bool,
170
171    /// Path to ryra binary
172    #[arg(long)]
173    pub ryra_bin: Option<PathBuf>,
174
175    /// Don't destroy VMs for failed tests (for debugging via SSH)
176    #[arg(long)]
177    pub keep_failed: bool,
178
179    /// Keep VM alive after tests complete (or boot without running tests).
180    /// Prints SSH connection command for interactive use.
181    #[arg(long)]
182    pub keep_alive: bool,
183
184    /// Disable KVM acceleration (use software emulation — slower)
185    #[arg(long)]
186    pub no_kvm: bool,
187
188    /// Run tests directly on the host without a VM
189    #[arg(long)]
190    pub no_vm: bool,
191
192    /// Skip setup steps (add/wait/remove/reset) and only run shell/playwright
193    /// steps. Use to re-run tests quickly when services are already installed.
194    #[arg(long)]
195    pub retest: bool,
196
197    /// VM memory in MB (overrides auto-detection from service requirements)
198    #[arg(long)]
199    pub memory: Option<u32>,
200
201    /// VM CPU count
202    #[arg(long, default_value_t = 2)]
203    pub cpus: u32,
204
205    /// Show serial log output on failure
206    #[arg(long, short)]
207    pub verbose: bool,
208
209    /// Path to registry directory (auto-detected if omitted)
210    #[arg(long)]
211    pub registry: Option<PathBuf>,
212
213    /// Path to a local project directory with test.toml (+ optional quadlet files)
214    #[arg(long)]
215    pub project: Option<PathBuf>,
216
217    /// List available tests
218    #[arg(long)]
219    pub list: bool,
220
221    /// Test names to run (runs all if empty, supports substring match)
222    pub tests: Vec<String>,
223}
224
225fn find_ryra_binary() -> Result<PathBuf> {
226    // The currently running binary is the one being tested — `ryra test` is a
227    // subcommand of `ryra` itself, so whichever binary the user launched is by
228    // definition the one we want to copy into VMs. Using current_exe avoids the
229    // old footgun where we'd silently prefer target/release/ryra even when the
230    // user had just rebuilt debug.
231    let exe = std::env::current_exe()
232        .context("failed to resolve current executable path for ryra binary")?;
233    std::fs::canonicalize(&exe).context("failed to canonicalize current executable path")
234}
235
236/// Walk `crates/` looking for any `.rs` or `Cargo.toml` newer than `binary`.
237/// Returns the newest offending source file, if any. Cheap (~few ms for <1000
238/// files) because we only stat metadata, not read contents.
239fn newest_source_newer_than(binary: &Path) -> Result<Option<(PathBuf, std::time::SystemTime)>> {
240    let bin_mtime = std::fs::metadata(binary)
241        .with_context(|| format!("stat binary {}", binary.display()))?
242        .modified()
243        .context("binary modified-time")?;
244    let workspace_root = PathBuf::from(env!("CARGO_MANIFEST_DIR")).join("../..");
245    let crates_dir = match std::fs::canonicalize(workspace_root.join("crates")) {
246        Ok(p) => p,
247        // Running outside the workspace (e.g. an installed binary) — no check.
248        Err(_) => return Ok(None),
249    };
250
251    fn is_source(path: &Path) -> bool {
252        if path.extension().and_then(|s| s.to_str()) == Some("rs") {
253            return true;
254        }
255        matches!(
256            path.file_name().and_then(|n| n.to_str()),
257            Some("Cargo.toml")
258        )
259    }
260
261    fn walk(
262        dir: &Path,
263        bin_mtime: std::time::SystemTime,
264        newest: &mut Option<(PathBuf, std::time::SystemTime)>,
265    ) -> Result<()> {
266        for entry in
267            std::fs::read_dir(dir).with_context(|| format!("read_dir {}", dir.display()))?
268        {
269            let entry = entry?;
270            let path = entry.path();
271            let ft = entry.file_type()?;
272            if ft.is_dir() {
273                // Skip build output dirs — they contain generated files we don't care about.
274                if matches!(
275                    path.file_name().and_then(|n| n.to_str()),
276                    Some("target") | Some(".git") | Some("node_modules")
277                ) {
278                    continue;
279                }
280                walk(&path, bin_mtime, newest)?;
281            } else if ft.is_file() && is_source(&path) {
282                let mtime = entry.metadata()?.modified()?;
283                if mtime > bin_mtime && newest.as_ref().is_none_or(|(_, t)| mtime > *t) {
284                    *newest = Some((path, mtime));
285                }
286            }
287        }
288        Ok(())
289    }
290
291    let mut newest = None;
292    walk(&crates_dir, bin_mtime, &mut newest)?;
293    Ok(newest)
294}
295
296/// Error out if the `ryra` binary we're about to ship into VMs is older than
297/// any workspace source file. This is the stale-binary footgun: `cargo build -p
298/// ryra-test` rebuilds the lib but leaves `target/release/ryra` untouched, so
299/// tests silently run against old behavior.
300fn ensure_binary_fresh(binary: &Path) -> Result<()> {
301    let Some((src, _)) = newest_source_newer_than(binary)? else {
302        return Ok(());
303    };
304    anyhow::bail!(
305        "ryra binary is older than source {}.\n  \
306         Binary:  {}\n  \
307         Rebuild: cargo build --release --bin ryra\n  \
308         (or pass --ryra-bin <path> to skip this check)",
309        src.display(),
310        binary.display(),
311    )
312}
313
314fn print_summary(results: &[ScenarioResult], wall_clock: std::time::Duration) {
315    println!("\n========================================");
316    println!("  Results");
317    println!("========================================\n");
318
319    // Only the *failures* get their full step trace dumped here — that's the
320    // bit you actually need to read inline. Passing tests would just spew
321    // every step's captured stdout; their full logs are saved to
322    // `reports/<test>/run.log` and pointed at by the path summary below.
323    let any_failed = results.iter().any(|r| r.outcome.is_fail());
324    for result in results.iter().filter(|r| r.outcome.is_fail()) {
325        print!("{result}");
326    }
327    if any_failed {
328        println!();
329    }
330
331    let passed = results.iter().filter(|r| r.passed()).count();
332    let failed = results
333        .iter()
334        .filter(|r| matches!(r.outcome, Outcome::Failed(_)))
335        .count();
336    let skipped = results
337        .iter()
338        .filter(|r| matches!(r.outcome, Outcome::Skipped))
339        .count();
340
341    println!("----------------------------------------");
342    println!(
343        "{passed} passed, {failed} failed, {skipped} skipped, {} total ({} wall clock)",
344        results.len(),
345        reports::humanize_secs(wall_clock.as_secs()),
346    );
347    println!("========================================");
348}
349
350fn save_results(results: &[ScenarioResult], wall_clock: std::time::Duration) -> Result<()> {
351    reports::save_run_results(results)?;
352    reports::print_results_paths(results, wall_clock);
353    Ok(())
354}
355
356/// Safety margin (MB) kept free beyond the VMs' own needs — for host processes,
357/// QEMU overhead, the kernel page cache, and the GPU compositor. Running this
358/// tight causes kernel-level thrashing and on Asahi can freeze the display.
359const HOST_RESERVE_MB: u64 = 1024;
360
361/// Decide how many VMs can safely run in parallel given current host memory.
362/// Returns the clamped parallel count (never more than `requested`, never below 1),
363/// and prints a report. Uses `sorted_mems_desc` so we pack the largest VMs first.
364fn plan_parallelism(requested: usize, sorted_mems_desc: &[u32]) -> usize {
365    let mem = match ryra_vm::read_host_memory() {
366        Some(m) => m,
367        None => {
368            let total_mb: u64 = sorted_mems_desc
369                .iter()
370                .take(requested)
371                .map(|m| *m as u64)
372                .sum();
373            println!("\nMax concurrent VM RAM: {total_mb}MB (host memory unknown)");
374            return requested.max(1);
375        }
376    };
377
378    let used_mb = mem.total_mb.saturating_sub(mem.available_mb);
379    println!(
380        "\nHost RAM: {}MB used / {}MB total ({}MB available, {}MB in swap)",
381        used_mb, mem.total_mb, mem.available_mb, mem.swap_used_mb
382    );
383
384    let budget = mem.available_mb.saturating_sub(HOST_RESERVE_MB);
385    let mut fit = 0usize;
386    let mut total = 0u64;
387    for m in sorted_mems_desc.iter().take(requested) {
388        let next = total + *m as u64;
389        if next > budget {
390            break;
391        }
392        total = next;
393        fit += 1;
394    }
395
396    let first_vm_mb = sorted_mems_desc.first().copied().unwrap_or(0) as u64;
397    if fit == 0 && first_vm_mb > 0 {
398        // Even one VM doesn't fit in budget — warn loudly but still let it run at
399        // parallel=1 so the user can choose to override with --memory.
400        eprintln!(
401            "WARNING: largest VM needs {}MB but only {}MB free after {}MB host reserve. \
402             Running anyway at --parallel=1 — expect swap pressure. Close apps or lower \
403             VM size with --memory.",
404            first_vm_mb, budget, HOST_RESERVE_MB
405        );
406        fit = 1;
407    }
408
409    let clamped = fit.min(requested).max(1);
410    if clamped < requested {
411        eprintln!(
412            "Reducing --parallel from {requested} to {clamped} to fit in {budget}MB RAM budget \
413             (total host RAM {}MB, {}MB reserved for host)",
414            mem.total_mb, HOST_RESERVE_MB
415        );
416    }
417    println!("Max concurrent VM RAM: {total}MB (parallel={clamped})");
418    clamped
419}
420
421/// Find the registry path — explicit arg, or auto-detect.
422fn resolve_registry_path(explicit: Option<&PathBuf>) -> Result<PathBuf> {
423    if let Some(p) = explicit {
424        return std::fs::canonicalize(p)
425            .with_context(|| format!("registry path not found: {}", p.display()));
426    }
427
428    let candidates = [
429        PathBuf::from("registry"),
430        PathBuf::from("crates/ryra-core/registry"),
431    ];
432    for c in &candidates {
433        if c.exists() {
434            return std::fs::canonicalize(c)
435                .with_context(|| format!("failed to resolve {}", c.display()));
436        }
437    }
438
439    anyhow::bail!("no registry found. Pass --registry <path> or run from the repo root")
440}
441
442/// Run the E2E test suite with the given arguments.
443pub async fn run(args: Args) -> Result<()> {
444    install_signal_handler();
445
446    // Check for local project first, then fall back to registry
447    let registry_path = resolve_registry_path(args.registry.as_ref());
448
449    let mut discovered = Vec::new();
450
451    // Discover local project tests (--project flag)
452    if let Some(ref project_dir) = args.project {
453        match registry::discover_local_project(project_dir)? {
454            Some(test) => discovered.push(test),
455            None => {
456                anyhow::bail!(
457                    "no test.toml found in project directory: {}",
458                    project_dir.display()
459                );
460            }
461        }
462    }
463
464    // Discover registry tests (only if no explicit --project or if registry is also available)
465    if let Ok(ref reg_path) = registry_path
466        && let Ok(reg_tests) = registry::discover(reg_path)
467    {
468        // If --project was explicitly passed, skip registry tests
469        if args.project.is_none() {
470            discovered.extend(reg_tests);
471        }
472    }
473
474    // Need a registry path for dependency resolution even with local projects
475    let registry_path = registry_path.unwrap_or_else(|_| PathBuf::from("registry"));
476
477    if args.list {
478        // Respect positional filters: `ryra test --list whoami` shows only
479        // whoami tests. Same substring-contains semantics as the run path.
480        let filtered: Vec<registry::DiscoveredTest> = if args.tests.is_empty() {
481            discovered
482        } else {
483            discovered
484                .into_iter()
485                .filter(|t| args.tests.iter().any(|f| t.name().contains(f.as_str())))
486                .collect()
487        };
488        render_list(&filtered, registry_path.as_path(), args.verbose);
489        return Ok(());
490    }
491
492    // --keep-alive with no tests: boot a VM and block until Ctrl-C.
493    // This path needs VM prerequisites, so handle it after the no-vm branch below.
494    let keep_alive_interactive = args.keep_alive && args.tests.is_empty();
495
496    if discovered.is_empty() && !keep_alive_interactive {
497        anyhow::bail!("no tests found in registry at {}", registry_path.display());
498    }
499
500    // Filter tests (independent of VM prep — safe to do first)
501    let to_run: Vec<_> = if args.tests.is_empty() {
502        discovered.iter().collect()
503    } else {
504        discovered
505            .iter()
506            .filter(|t| args.tests.iter().any(|f| t.name().contains(f.as_str())))
507            .collect()
508    };
509
510    if to_run.is_empty() && !keep_alive_interactive {
511        anyhow::bail!("no tests matched the given filters");
512    }
513
514    // Fresh report directory for this run. Previous run's output is discarded.
515    reports::wipe_reports_dir()?;
516
517    // --no-vm: run entirely on the host. Skip all VM prerequisites, binary
518    // lookup, and image preparation since none of it is needed in bare mode.
519    if args.no_vm {
520        return run_bare(&args, &to_run, &registry_path).await;
521    }
522
523    let use_kvm = !args.no_kvm;
524    ryra_vm::check_prerequisites(use_kvm)?;
525
526    let memory_override = args.memory;
527    let spawn_opts = std::sync::Arc::new(SpawnOpts {
528        use_kvm,
529        memory_mb: memory_override.unwrap_or(2048),
530        cpus: args.cpus,
531        disk_gb: 20,
532    });
533
534    let ryra_bin = match &args.ryra_bin {
535        // Explicit --ryra-bin: trust the user, don't check freshness (the path
536        // may be from a different tree, CI artefact, etc.).
537        Some(p) => std::fs::canonicalize(p)?,
538        None => {
539            let bin = find_ryra_binary()?;
540            ensure_binary_fresh(&bin)?;
541            bin
542        }
543    };
544
545    // Compute max RAM needed across the tests we're actually running.
546    // The snapshot must be created at this size so all VMs can restore from it.
547    let max_memory: u32 = to_run
548        .iter()
549        .map(|t| memory_override.unwrap_or_else(|| registry::vm_memory_for_test(&registry_path, t)))
550        .max()
551        .unwrap_or(1024);
552
553    let base_image =
554        image::ensure_image(&args.distro, args.redownload, use_kvm, max_memory).await?;
555
556    if keep_alive_interactive {
557        return run_interactive_vm(&base_image, &spawn_opts, &ryra_bin, &registry_path).await;
558    }
559
560    let base_image = std::sync::Arc::new(base_image);
561    let registry_path = std::sync::Arc::new(registry_path);
562
563    // Prepare browser image only if a filtered test actually needs it
564    let any_needs_browser = to_run.iter().any(|t| t.needs_browser());
565    let browser_image = if any_needs_browser {
566        Some(std::sync::Arc::new(
567            image::ensure_browser_image(
568                &base_image,
569                &args.distro,
570                args.redownload,
571                use_kvm,
572                max_memory,
573            )
574            .await?,
575        ))
576    } else {
577        None
578    };
579
580    // Pre-pull all container images before spawning VMs.
581    let mut all_images: Vec<String> = to_run
582        .iter()
583        .flat_map(|t| registry::images_for_test(&registry_path, t))
584        .collect();
585    all_images.sort();
586    all_images.dedup();
587
588    println!("Pre-caching {} container images...", all_images.len());
589    for img in &all_images {
590        machine::ensure_image_cached(img).await?;
591    }
592
593    // Compute per-test memory first (needed for accurate parallelism calculation)
594    let test_memories: Vec<(&str, u32)> = to_run
595        .iter()
596        .map(|t| {
597            let mem =
598                memory_override.unwrap_or_else(|| registry::vm_memory_for_test(&registry_path, t));
599            (t.name(), mem)
600        })
601        .collect();
602
603    let mut sorted_mems: Vec<u32> = test_memories.iter().map(|(_, m)| *m).collect();
604    sorted_mems.sort_unstable_by(|a, b| b.cmp(a));
605    let effective_parallel = plan_parallelism(args.parallel, &sorted_mems);
606    for (name, mem) in &test_memories {
607        println!("  {name}: {mem}MB");
608    }
609    println!(
610        "\nRunning {} tests (parallel={})\n",
611        to_run.len(),
612        effective_parallel
613    );
614
615    let wall_clock = std::time::Instant::now();
616    let semaphore = std::sync::Arc::new(Semaphore::new(effective_parallel));
617    let mut handles = vec![];
618    let total_tests = to_run.len();
619    // Shared progress counters — each task increments these when its VM
620    // ends so the tail of the output doubles as a live progress ticker
621    // (works under --parallel, order-independent).
622    let progress_done = std::sync::Arc::new(std::sync::atomic::AtomicUsize::new(0));
623    let progress_passed = std::sync::Arc::new(std::sync::atomic::AtomicUsize::new(0));
624    // Start-order counter so each VM START line carries an [N/total] marker
625    // too. Under --parallel this is the order tests *begin*, not finish, but
626    // it still tells you how far into the run you are at a glance.
627    let progress_started = std::sync::Arc::new(std::sync::atomic::AtomicUsize::new(0));
628
629    for test in to_run {
630        let permit = semaphore.clone().acquire_owned().await?;
631        let test_image: std::sync::Arc<image::Image> = if test.needs_browser() {
632            match browser_image.as_ref() {
633                Some(img) => img.clone(),
634                None => {
635                    anyhow::bail!(
636                        "test '{}' requires a browser image but none was prepared",
637                        test.name()
638                    );
639                }
640            }
641        } else {
642            base_image.clone()
643        };
644        let test_memory =
645            memory_override.unwrap_or_else(|| registry::vm_memory_for_test(&registry_path, test));
646        let test_disk = registry::vm_disk_for_test(&registry_path, test);
647        let spawn_opts = std::sync::Arc::new(SpawnOpts {
648            use_kvm,
649            memory_mb: test_memory,
650            cpus: args.cpus,
651            disk_gb: test_disk,
652        });
653        let ryra_bin = ryra_bin.clone();
654        let registry_path = registry_path.clone();
655        let keep_failed = args.keep_failed;
656        let keep_alive = args.keep_alive;
657        let verbose = args.verbose;
658        let single_test = total_tests == 1;
659        let name = test.name().to_string();
660        let has_quadlets = test.has_quadlets();
661        let progress_done = progress_done.clone();
662        let progress_passed = progress_passed.clone();
663        let progress_started = progress_started.clone();
664        // Extract quadlet_dir before spawning task (DiscoveredTest isn't Send)
665        let quadlet_dir = match test {
666            registry::DiscoveredTest::Simple { setup, .. } => setup.quadlet_dir.clone(),
667            registry::DiscoveredTest::Lifecycle { .. } => None,
668        };
669
670        handles.push(tokio::spawn(async move {
671            // `permit` holds a slot in the `--parallel` semaphore; must be
672            // alive until the task finishes. Kept as an explicit local so
673            // Drop order is obvious to readers (and to the compiler —
674            // `let _x = ...` used to be load-bearing here; drop at end
675            // via explicit bind + final drop avoids any NLL surprises).
676            let permit_guard = permit;
677            let id = machine::random_id();
678            let ssh_port = ports::allocate_ssh_port();
679            let start = std::time::Instant::now();
680            let started =
681                progress_started.fetch_add(1, std::sync::atomic::Ordering::SeqCst) + 1;
682            println!("[{name}] ---- VM START [{started}/{total_tests}] ryra-test-{id} (ssh port {ssh_port}, {test_memory}MB RAM) ----");
683
684            // All fallible work lives in an inner async block so every exit
685            // path — including early returns for VM-boot or file-copy failures —
686            // flows through the single VM END reporting block below. Without
687            // this, a `return fail_result(...)` would skip the VM END print and
688            // the user would see back-to-back VM STARTs with no indication of
689            // what went wrong on the previous test.
690            let result: ScenarioResult = async {
691                let fail_result = |msg: String| ScenarioResult {
692                    name: name.clone(),
693                    events: vec![],
694                    duration: start.elapsed(),
695                    outcome: scenario::Outcome::Failed(msg),
696                };
697
698                // Re-discover tests inside task (DiscoveredTest isn't Send due to lifetime)
699                let test = if has_quadlets {
700                    let qdir = match quadlet_dir.as_ref() {
701                        Some(d) => d,
702                        None => return fail_result("quadlet_dir must be set for quadlet tests".into()),
703                    };
704                    match registry::discover_local_project(qdir) {
705                        Ok(Some(t)) => t,
706                        Ok(None) => return fail_result("local project not found (internal error)".into()),
707                        Err(e) => return fail_result(format!("local project discovery failed: {e:#}")),
708                    }
709                } else {
710                    let discovered = match registry::discover(&registry_path) {
711                        Ok(d) => d,
712                        Err(e) => return fail_result(format!("registry discovery failed: {e:#}")),
713                    };
714                    match discovered.into_iter().find(|t| t.name() == name) {
715                        Some(t) => t,
716                        None => return fail_result("test not found (internal error)".into()),
717                    }
718                };
719
720                // Spawn VM
721                let phase = std::time::Instant::now();
722                println!("[{name}] booting VM...");
723                let vm = match Machine::spawn(&test_image, &id, ssh_port, &spawn_opts).await {
724                    Ok(vm) => vm,
725                    Err(e) => return fail_result(format!("failed to spawn VM: {e:#}")),
726                };
727                println!("[{name}] VM ready ({:.1}s)", phase.elapsed().as_secs_f64());
728
729                // Copy ryra binary into VM
730                let phase = std::time::Instant::now();
731                if let Err(e) = machine::copy_ryra_to_vm(&vm, &ryra_bin).await {
732                    let _ = vm.destroy().await;
733                    return fail_result(format!("failed to copy ryra to VM: {e:#}"));
734                }
735
736                // Copy registry into VM (needed for dependency resolution)
737                if registry_path.exists()
738                    && let Err(e) = machine::copy_fixtures_to_vm(&vm, &registry_path).await {
739                        let _ = vm.destroy().await;
740                        return fail_result(format!("failed to copy registry to VM: {e:#}"));
741                    }
742
743                // Copy quadlet project files into VM
744                if let Some(ref qdir) = quadlet_dir
745                    && let Err(e) = machine::copy_project_to_vm(&vm, qdir).await {
746                        let _ = vm.destroy().await;
747                        return fail_result(format!("failed to copy project to VM: {e:#}"));
748                    }
749                println!("[{name}] files copied ({:.1}s)", phase.elapsed().as_secs_f64());
750
751                // Load cached container images into VM
752                let images = registry::images_for_test(&registry_path, &test);
753                if !images.is_empty() {
754                    let phase = std::time::Instant::now();
755                    if let Err(e) = machine::load_images_into_vm(&vm, &images).await {
756                        let _ = vm.destroy().await;
757                        return fail_result(format!("failed to load container images: {e:#}"));
758                    }
759                    println!("[{name}] images loaded ({:.1}s, {} images)", phase.elapsed().as_secs_f64(), images.len());
760                }
761
762                let setup_time = start.elapsed();
763                println!("[{name}] running tests (setup took {:.1}s)...", setup_time.as_secs_f64());
764                let executor = crate::executor::VmExecutor::new(&vm);
765                let vm_registry = std::path::Path::new("/opt/ryra-test-registry");
766                let result = match &test {
767                    registry::DiscoveredTest::Lifecycle { steps, .. } => {
768                        runner::run_lifecycle_test(&executor, &name, steps, verbose, !single_test, vm_registry, false).await
769                    }
770                    registry::DiscoveredTest::Simple { .. } => {
771                        runner::run_registry_test(&executor, &test, !single_test).await
772                    }
773                };
774
775                // On failure, save serial log to logs dir
776                if !result.passed() {
777                    let serial_log = vm.work_dir.join("serial.log");
778                    if let Ok(content) = tokio::fs::read_to_string(&serial_log).await {
779                        let workspace_root = PathBuf::from(env!("CARGO_MANIFEST_DIR")).join("../..");
780                        let fail_log_dir = workspace_root.join("crates/ryra-test/logs");
781                        let _ = tokio::fs::create_dir_all(&fail_log_dir).await;
782                        let dest = fail_log_dir.join(format!("{name}-serial.log"));
783                        let _ = tokio::fs::write(&dest, &content).await;
784                        eprintln!("[{name}] serial log saved to: {}", dest.display());
785
786                        if verbose {
787                            let lines: Vec<&str> = content.lines().collect();
788                            let start_idx = lines.len().saturating_sub(50);
789                            eprintln!("[{name}] --- serial log (last 50 lines) ---");
790                            for line in &lines[start_idx..] {
791                                eprintln!("  {line}");
792                            }
793                            eprintln!("[{name}] --- end serial log ---");
794                        }
795                    }
796                }
797
798                // Decide whether to keep the VM alive
799                let should_keep = keep_alive || (keep_failed && !result.passed());
800                if should_keep {
801                    println!("[{name}] keeping VM alive:");
802                    vm.keep_alive();
803                } else if let Err(e) = vm.destroy().await {
804                    eprintln!("[{name}] warning: failed to destroy VM: {e}");
805                }
806
807                result
808            }
809            .await;
810
811            // Single end-of-task reporting path — runs for every outcome above,
812            // so the user always sees a VM END line (with the failure reason
813            // for fails) before the next test's VM START prints.
814            use std::sync::atomic::Ordering;
815            let done = progress_done.fetch_add(1, Ordering::SeqCst) + 1;
816            if result.passed() {
817                progress_passed.fetch_add(1, Ordering::SeqCst);
818            }
819            let passed_so_far = progress_passed.load(Ordering::SeqCst);
820            let failed_so_far = done - passed_so_far;
821            let wall = wall_clock.elapsed().as_secs();
822            let (mins, secs) = (wall / 60, wall % 60);
823            let status = match &result.outcome {
824                scenario::Outcome::Passed => "PASS".to_string(),
825                scenario::Outcome::Skipped => "SKIP".to_string(),
826                scenario::Outcome::Failed(msg) => {
827                    let first = msg.lines().next().unwrap_or("");
828                    let trimmed: String = first.chars().take(140).collect();
829                    if first.chars().count() > 140 {
830                        format!("FAIL: {trimmed}…")
831                    } else {
832                        format!("FAIL: {trimmed}")
833                    }
834                }
835            };
836            println!(
837                "[{name}] ---- VM END ({status}, test {:.1}s) ---- \
838                 [{done}/{total_tests} · {passed_so_far} pass · {failed_so_far} fail · \
839                 total {mins}:{secs:02}]",
840                start.elapsed().as_secs_f64()
841            );
842            drop(permit_guard); // release the --parallel slot AFTER reporting
843            result
844        }));
845    }
846
847    let mut results = vec![];
848    for handle in handles {
849        results.push(handle.await?);
850    }
851
852    let total_elapsed = wall_clock.elapsed();
853    print_summary(&results, total_elapsed);
854    save_results(&results, total_elapsed)?;
855
856    if results.iter().any(|r| !r.passed()) {
857        std::process::exit(1);
858    }
859
860    Ok(())
861}
862
863/// Boot a VM with ryra + registry installed, print SSH command, block until Ctrl-C.
864async fn run_interactive_vm(
865    base_image: &image::Image,
866    spawn_opts: &SpawnOpts,
867    ryra_bin: &Path,
868    registry_path: &Path,
869) -> Result<()> {
870    let id = machine::random_id();
871    let ssh_port = ports::allocate_ssh_port();
872
873    println!("Booting interactive VM ryra-test-{id} (ssh port {ssh_port})...");
874    let vm = Machine::spawn(base_image, &id, ssh_port, spawn_opts).await?;
875    println!("VM ready.");
876
877    println!("Copying ryra binary...");
878    machine::copy_ryra_to_vm(&vm, ryra_bin).await?;
879
880    println!("Copying registry...");
881    machine::copy_fixtures_to_vm(&vm, registry_path).await?;
882
883    println!("\nVM is ready. Connect with:\n");
884    println!(
885        "  ssh -o StrictHostKeyChecking=no -o UserKnownHostsFile=/dev/null \
886         -i {}/id_ed25519 -p {} ryra@{}",
887        vm.work_dir.display(),
888        vm.ssh_port,
889        vm.ssh_host,
890    );
891    println!("\nRegistry is at /opt/ryra-test-registry in the VM.");
892    println!("Press Ctrl-C to stop the VM.\n");
893
894    tokio::signal::ctrl_c().await?;
895
896    println!("\nShutting down VM...");
897    vm.destroy().await?;
898    Ok(())
899}
900
901/// Root of the host-test sandbox. Everything a host run reads or writes that
902/// isn't a quadlet symlink lives under here, on real disk: service data
903/// (`services/`), the preferences sandbox (`config/`), the ledger, and run
904/// reports (`reports/`). It's `~/.local/share/services-test/` (honouring
905/// `XDG_DATA_HOME`), a sibling of the real `~/.local/share/services/`, so the
906/// whole test footprint is one folder you can `rm -rf`. `None` if `$HOME` is
907/// unset.
908pub(crate) fn test_sandbox_root() -> Option<PathBuf> {
909    let base = match std::env::var_os("XDG_DATA_HOME") {
910        Some(v) if !v.is_empty() => PathBuf::from(v),
911        _ => PathBuf::from(std::env::var_os("HOME")?).join(".local/share"),
912    };
913    Some(base.join("services-test"))
914}
915
916/// Path to the host-managed-services ledger: the services this harness has
917/// installed on the host but not yet torn down. Persisted across runs so a
918/// later run can tell *its own* leftovers (from an aborted run — safe to
919/// reclaim) apart from services the user installed for real (must never be
920/// touched). Lives in the sandbox root (real disk — it must survive reboots,
921/// so never `/tmp`). Returns `None` only if `$HOME` is unset.
922fn host_ledger_path() -> Option<PathBuf> {
923    Some(test_sandbox_root()?.join("ledger"))
924}
925
926/// Load the ledger (newline-separated service names). Missing file → empty.
927fn ledger_load() -> BTreeSet<String> {
928    let Some(path) = host_ledger_path() else {
929        return BTreeSet::new();
930    };
931    match std::fs::read_to_string(&path) {
932        Ok(s) => s
933            .lines()
934            .map(str::trim)
935            .filter(|l| !l.is_empty())
936            .map(String::from)
937            .collect(),
938        Err(_) => BTreeSet::new(),
939    }
940}
941
942/// Persist the ledger. Best-effort: a write failure only degrades the
943/// next run to the *conservative* side (it would treat our leftovers as
944/// user-owned and skip them rather than delete anything), so we warn but
945/// don't abort the test run.
946fn ledger_save(set: &BTreeSet<String>) {
947    let Some(path) = host_ledger_path() else {
948        return;
949    };
950    if let Some(parent) = path.parent()
951        && let Err(e) = std::fs::create_dir_all(parent)
952    {
953        eprintln!("warning: could not create ledger dir: {e}");
954        return;
955    }
956    let body = set.iter().cloned().collect::<Vec<_>>().join("\n");
957    if let Err(e) = std::fs::write(&path, body) {
958        eprintln!("warning: could not write host-managed-services ledger: {e}");
959    }
960}
961
962/// Purge a test's own services from the host, dependents before
963/// dependencies (reverse install order). Failures are non-fatal: a
964/// not-installed service is a no-op. Callers guarantee these services are
965/// harness-owned (never user-installed), so purging is always safe.
966async fn purge_services(executor: &crate::executor::LocalExecutor, svcs: &[String], when: &str) {
967    use crate::executor::Executor;
968    for svc in svcs.iter().rev() {
969        println!("  cleaning up {svc} (purge) {when}");
970        let _ = executor
971            .exec(&format!("ryra remove --purge {svc} -y"))
972            .await;
973    }
974}
975
976/// Snapshot the ryra-managed services currently installed on the host.
977/// A scan failure degrades to "none" so the caller never deletes blindly.
978fn scan_installed() -> BTreeSet<String> {
979    match ryra_core::scan_managed_services() {
980        Ok(v) => v.into_iter().collect(),
981        Err(e) => {
982            eprintln!("warning: could not scan installed services ({e}); assuming none");
983            BTreeSet::new()
984        }
985    }
986}
987
988/// Collect every `<label>.internal` hostname appearing in `s` into `out`.
989fn scan_internal_hosts(s: &str, out: &mut BTreeSet<String>) {
990    const SUFFIX: &str = ".internal";
991    let bytes = s.as_bytes();
992    for (idx, _) in s.match_indices(SUFFIX) {
993        let mut start = idx;
994        while start > 0 {
995            let c = bytes[start - 1];
996            if c.is_ascii_alphanumeric() || c == b'-' {
997                start -= 1;
998            } else {
999                break;
1000            }
1001        }
1002        if start < idx {
1003            out.insert(s[start..idx + SUFFIX.len()].to_ascii_lowercase());
1004        }
1005    }
1006}
1007
1008/// The `*.internal` hostnames the selected tests will actually contact, so the
1009/// runner can prime sudo (for `/etc/hosts` writes) *only* when a needed host is
1010/// missing — never on a run whose hosts already resolve.
1011///
1012/// Walks parsed lifecycle steps (`add` args/env, shell bodies, http
1013/// url/body/headers, playwright env) and reads each referenced playwright spec
1014/// file — its `*.internal` URL default catches auto-promoted hosts that never
1015/// appear in the toml. Simple tests (basic 127.0.0.1 installs) are scanned too,
1016/// cheaply, for completeness.
1017fn referenced_internal_hosts(
1018    tests: &[&registry::DiscoveredTest],
1019    registry_path: &Path,
1020) -> BTreeSet<String> {
1021    use crate::test_toml::StepDef;
1022    let browser_dir = registry_path.join("tests").join("browser");
1023    let mut out = BTreeSet::new();
1024    for t in tests {
1025        match t {
1026            registry::DiscoveredTest::Lifecycle { steps, .. } => {
1027                for step in steps {
1028                    match step {
1029                        StepDef::Add { args, env, .. } => {
1030                            if let Some(a) = args {
1031                                scan_internal_hosts(a, &mut out);
1032                            }
1033                            env.values().for_each(|v| scan_internal_hosts(v, &mut out));
1034                        }
1035                        StepDef::Shell { run, .. } => scan_internal_hosts(run, &mut out),
1036                        StepDef::Http {
1037                            url, body, headers, ..
1038                        } => {
1039                            scan_internal_hosts(url, &mut out);
1040                            if let Some(b) = body {
1041                                scan_internal_hosts(b, &mut out);
1042                            }
1043                            headers
1044                                .values()
1045                                .for_each(|v| scan_internal_hosts(v, &mut out));
1046                        }
1047                        StepDef::Playwright { spec, env, .. } => {
1048                            env.values().for_each(|v| scan_internal_hosts(v, &mut out));
1049                            if let Ok(txt) = std::fs::read_to_string(browser_dir.join(spec)) {
1050                                scan_internal_hosts(&txt, &mut out);
1051                            }
1052                        }
1053                        _ => {}
1054                    }
1055                }
1056            }
1057            registry::DiscoveredTest::Simple { tests: entries, .. } => {
1058                for e in entries {
1059                    scan_internal_hosts(&e.run, &mut out);
1060                    e.env
1061                        .values()
1062                        .for_each(|v| scan_internal_hosts(v, &mut out));
1063                }
1064            }
1065        }
1066    }
1067    out
1068}
1069
1070/// The `*.internal` hostnames the selected tests contact that don't already
1071/// resolve via `/etc/hosts` — the ones ryra will have to add (a privileged
1072/// write). Empty when every contacted host already resolves.
1073fn missing_internal_hosts(needed: &BTreeSet<String>) -> Vec<String> {
1074    let hosts = std::fs::read_to_string("/etc/hosts").unwrap_or_default();
1075    let present = |h: &str| {
1076        hosts.lines().any(|l| {
1077            let l = l.trim();
1078            !l.starts_with('#') && l.split_whitespace().any(|w| w == h)
1079        })
1080    };
1081    needed.iter().filter(|h| !present(h)).cloned().collect()
1082}
1083
1084/// Acquire sudo once, up front, for a run that has privileged steps — so the
1085/// `sudo -n` those steps issue (inside captured, non-TTY shells that can't
1086/// themselves prompt) succeed silently for the whole run.
1087///
1088/// "Privileged steps" is a general notion, not a hosts special-case: a run
1089/// qualifies if it must add `*.internal` hostnames to `/etc/hosts` (detected
1090/// automatically) *or* any selected test declares `requires_sudo` (the escape
1091/// hatch for tests that shell out to sudo for any other reason). `reasons` is
1092/// the human-readable list of why; empty means nothing privileged → no-op.
1093///
1094/// Returns a keep-alive task that refreshes the credential every 60s for the
1095/// run's duration (sudo's default `timestamp_timeout` is far shorter than a
1096/// full suite). Behaviour:
1097/// - No reasons → `None`; sudo is never touched.
1098/// - Passwordless sudo → `None`; per-step `sudo -n` already works.
1099/// - Password required + a TTY → one prompt here, listing the reasons.
1100/// - Password required + no TTY (CI capturing output) → `None`, degrade
1101///   gracefully. CI uses `--vm`, which provisions its own passwordless sudo.
1102async fn acquire_run_sudo(reasons: &[String]) -> Option<tokio::task::JoinHandle<()>> {
1103    use std::io::IsTerminal;
1104    use std::time::Duration;
1105
1106    if reasons.is_empty() {
1107        return None;
1108    }
1109
1110    let passwordless = tokio::process::Command::new("sudo")
1111        .args(["-n", "true"])
1112        .status()
1113        .await
1114        .map(|s| s.success())
1115        .unwrap_or(false);
1116    if passwordless {
1117        return None;
1118    }
1119    if !std::io::stderr().is_terminal() {
1120        return None;
1121    }
1122
1123    eprintln!("\n  This run needs sudo for:");
1124    for r in reasons {
1125        eprintln!("    - {r}");
1126    }
1127    eprintln!("  Caching sudo once so it doesn't prompt mid-test:");
1128    let primed = tokio::process::Command::new("sudo")
1129        .arg("-v")
1130        .status()
1131        .await
1132        .map(|s| s.success())
1133        .unwrap_or(false);
1134    if !primed {
1135        eprintln!("  (skipped — privileged steps may fail; they'll say which.)\n");
1136        return None;
1137    }
1138
1139    Some(tokio::spawn(async move {
1140        loop {
1141            tokio::time::sleep(Duration::from_secs(60)).await;
1142            // `-n`: a keep-alive must never block on a prompt. If the cache
1143            // ever lapses, the next privileged step re-warms it itself.
1144            let _ = tokio::process::Command::new("sudo")
1145                .args(["-n", "-v"])
1146                .status()
1147                .await;
1148        }
1149    }))
1150}
1151
1152/// Run tests directly on the host without a VM.
1153///
1154/// Bare mode shares the *real* host's ryra state, so isolation is built
1155/// from three guarantees:
1156///   1. Preferences are redirected to a throwaway dir (`RYRA_CONFIG_DIR`),
1157///      so tests never read or clobber the user's SMTP/auth/backup creds.
1158///   2. Services the user already installed are detected up front and left
1159///      strictly untouched; any test that would install over one is skipped.
1160///   3. Every test purges its own services afterwards so they don't pile up
1161///      and exhaust RAM — and a ledger records harness-owned installs so a
1162///      later run can reclaim leftovers from an aborted run.
1163async fn run_bare(
1164    args: &Args,
1165    to_run: &[&registry::DiscoveredTest],
1166    registry_path: &Path,
1167) -> Result<()> {
1168    use crate::executor::Executor;
1169    let wall_clock = std::time::Instant::now();
1170
1171    // Acquire sudo once, up front, if (and only if) this run has privileged
1172    // steps: `*.internal` hostnames the tests contact that aren't in /etc/hosts
1173    // yet (ryra adds them), or a test that declares `requires_sudo`. Held warm
1174    // for the run so captured, non-TTY steps' `sudo -n` succeed; aborted before
1175    // we return. A run with nothing privileged never touches sudo.
1176    let mut sudo_reasons: Vec<String> = Vec::new();
1177    let missing_hosts = missing_internal_hosts(&referenced_internal_hosts(to_run, registry_path));
1178    if !missing_hosts.is_empty() {
1179        sudo_reasons.push(format!(
1180            "adding {} to /etc/hosts (OIDC/HTTPS service URLs)",
1181            missing_hosts.join(", ")
1182        ));
1183    }
1184    let sudo_tests: Vec<&str> = to_run
1185        .iter()
1186        .filter(|t| t.requires_sudo())
1187        .map(|t| t.name())
1188        .collect();
1189    if !sudo_tests.is_empty() {
1190        sudo_reasons.push(format!(
1191            "test(s) that declare requires_sudo: {}",
1192            sudo_tests.join(", ")
1193        ));
1194    }
1195    let sudo_keepalive = acquire_run_sudo(&sudo_reasons).await;
1196
1197    // 1. Sandbox the whole run under ~/.local/share/services-test/ (real disk,
1198    //    a sibling of the real services dir). Service data, preferences, the
1199    //    ledger, and reports all live here — one folder, one wipe. Only the
1200    //    quadlet *symlinks* land outside it, in the systemd-mandated dir. Tests
1201    //    resolve data paths through ${RYRA_DATA_DIR:-…}, so they find the
1202    //    sandbox here and fall back to the real dir under --vm / normal use.
1203    let sandbox = test_sandbox_root().context("cannot resolve test sandbox root ($HOME unset)")?;
1204    let data_dir = sandbox.join("services");
1205    let config_dir = sandbox.join("config");
1206    // Fresh preferences each run so a previous run's SMTP/auth/backup config
1207    // can't leak in. Service data is managed per-service (reclaimed/torn down);
1208    // the ledger persists; reports are wiped at run start.
1209    let _ = std::fs::remove_dir_all(&config_dir);
1210    std::fs::create_dir_all(&config_dir)
1211        .with_context(|| format!("failed to create {}", config_dir.display()))?;
1212    std::fs::create_dir_all(&data_dir)
1213        .with_context(|| format!("failed to create {}", data_dir.display()))?;
1214    let executor = crate::executor::LocalExecutor::with_registry(registry_path)
1215        .with_config_dir(&config_dir)
1216        .with_data_dir(&data_dir);
1217
1218    // 2. Anything installed that we didn't install is the user's — off-limits.
1219    let mut ledger = ledger_load();
1220    let installed = scan_installed();
1221    let user_owned: BTreeSet<String> = installed.difference(&ledger).cloned().collect();
1222    if !user_owned.is_empty() {
1223        let list = user_owned.iter().cloned().collect::<Vec<_>>().join(", ");
1224        println!(
1225            "Leaving {} already-installed service(s) untouched: {list}",
1226            user_owned.len()
1227        );
1228        println!("  Tests installing these are skipped. If they're leftovers from an aborted run,");
1229        println!("  purge them yourself with `ryra remove --purge <name> -y`.");
1230    }
1231
1232    // 3. Reclaim our own leftovers from a previous aborted run (frees RAM).
1233    let leftovers: Vec<String> = ledger.intersection(&installed).cloned().collect();
1234    for svc in &leftovers {
1235        println!("  reclaiming leftover {svc} (purge) from a previous run");
1236        let _ = executor
1237            .exec(&format!("ryra remove --purge {svc} -y"))
1238            .await;
1239        ledger.remove(svc);
1240    }
1241    if !leftovers.is_empty() {
1242        ledger_save(&ledger);
1243    }
1244
1245    let mut results = Vec::new();
1246    let total = to_run.len();
1247    println!("\nRunning {total} tests on host (bare mode)\n");
1248
1249    for (idx, test) in to_run.iter().enumerate() {
1250        let n = idx + 1;
1251        let name = test.name().to_string();
1252        let svcs: Vec<String> = test.services().iter().map(|s| s.to_string()).collect();
1253
1254        // Skip any test that would install over a user-owned service.
1255        if let Some(conflict) = svcs.iter().find(|s| user_owned.contains(*s)) {
1256            println!(
1257                "---- SKIP [{n}/{total}] {name}: '{conflict}' already installed (left untouched) ----"
1258            );
1259            results.push(ScenarioResult {
1260                name,
1261                events: Vec::new(),
1262                duration: Duration::ZERO,
1263                outcome: Outcome::Skipped,
1264            });
1265            continue;
1266        }
1267
1268        println!("---- START [{n}/{total}] {name} (bare) ----");
1269
1270        // Record intent before installing, so an abort mid-test still leaves a
1271        // breadcrumb the next run can reclaim.
1272        for svc in &svcs {
1273            ledger.insert(svc.clone());
1274        }
1275        ledger_save(&ledger);
1276
1277        // Reset the preferences sandbox so a previous test's `--smtp`/`--auth`/
1278        // backup config can't leak in. This matters: a `--smtp=inbucket` test
1279        // writes SMTP into preferences.toml, and without a reset the *next*
1280        // `--smtp` add sees SMTP already configured and skips installing
1281        // inbucket — so its mail step finds nothing. Per-test, not per-run.
1282        let _ = std::fs::remove_dir_all(&config_dir);
1283        let _ = std::fs::create_dir_all(&config_dir);
1284
1285        // Pre-clean this test's own services (a stale install from a crashed
1286        // earlier run of the same test shouldn't cascade in). The default-
1287        // registry cache is also cleared — tests like diff-whoami mutate it,
1288        // and it's a cache, not user data.
1289        purge_services(&executor, &svcs, "before test").await;
1290        let _ = executor
1291            .exec("rm -rf \"${XDG_CACHE_HOME:-$HOME/.cache}/services/default\"")
1292            .await;
1293
1294        let start = std::time::Instant::now();
1295        let result = match test {
1296            registry::DiscoveredTest::Lifecycle { steps, .. } => {
1297                runner::run_lifecycle_test(
1298                    &executor,
1299                    &name,
1300                    steps,
1301                    args.verbose,
1302                    // Bare runs are serial and each test is bracketed by a
1303                    // `---- START name ----` banner, so the per-line prefix is
1304                    // redundant — drop it.
1305                    false,
1306                    registry_path,
1307                    args.retest,
1308                )
1309                .await
1310            }
1311            registry::DiscoveredTest::Simple { .. } => {
1312                runner::run_registry_test(&executor, test, false).await
1313            }
1314        };
1315
1316        let status = if result.passed() { "PASS" } else { "FAIL" };
1317        println!(
1318            "---- END [{n}/{total}] {name} ({status}, {:.1}s) ----",
1319            start.elapsed().as_secs_f64()
1320        );
1321
1322        // Tear down everything this test put on the host so nothing
1323        // accumulates and eats RAM — pass or fail. That's the declared
1324        // services (purged first, in reverse order so dependents go before
1325        // dependencies) plus anything installed as a side-effect with no
1326        // explicit add step (e.g. inbucket via `--smtp=inbucket`). The
1327        // leftover sweep is safe: `user_owned` was fixed at startup and is
1328        // never in the set, so we only ever remove this test's own footprint.
1329        purge_services(&executor, &svcs, "after test").await;
1330        let leaked: Vec<String> = scan_installed()
1331            .into_iter()
1332            .filter(|s| !user_owned.contains(s) && !svcs.contains(s))
1333            .collect();
1334        if !leaked.is_empty() {
1335            purge_services(&executor, &leaked, "after test (side-effect)").await;
1336        }
1337        for svc in svcs.iter().chain(leaked.iter()) {
1338            ledger.remove(svc);
1339        }
1340        ledger_save(&ledger);
1341
1342        results.push(result);
1343    }
1344
1345    if let Some(h) = sudo_keepalive {
1346        h.abort();
1347    }
1348
1349    let total_elapsed = wall_clock.elapsed();
1350    print_summary(&results, total_elapsed);
1351    save_results(&results, total_elapsed)?;
1352
1353    if results
1354        .iter()
1355        .any(|r| matches!(r.outcome, Outcome::Failed(_)))
1356    {
1357        std::process::exit(1);
1358    }
1359
1360    Ok(())
1361}