Skip to main content

ryra_test/
runner.rs

1use std::time::{Duration, Instant};
2
3use anyhow::Result;
4
5use crate::executor::Executor;
6use crate::registry::{DiscoveredTest, TestEntry};
7use crate::scenario::{Event, EventKind, OnEvent, Outcome, ScenarioResult};
8use crate::test_toml::StepDef;
9
10/// Shell expansion for the service-data root: the `RYRA_DATA_DIR` override
11/// (the host test sandbox, or a user's relocated services dir) or the default
12/// `~/.local/share/services`. The runner and registry tests resolve every
13/// service-data path through this so it works under the sandbox, in a VM
14/// (where the var is unset and it falls back), and for normal installs.
15const DATA_ROOT_SH: &str = "${RYRA_DATA_DIR:-$HOME/.local/share/services}";
16
17fn emit(events: &mut Vec<Event>, ev: Event, on_event: &Option<OnEvent>) {
18    if let Some(cb) = on_event {
19        cb(&ev);
20    }
21    events.push(ev);
22}
23
24fn print_event_result(prefix: &str, event: &Event) {
25    let elapsed = format!("{:.1}s", event.duration.as_secs_f64());
26    match &event.outcome {
27        Outcome::Passed => println!("{prefix}    ok ({elapsed})"),
28        Outcome::Failed(msg) => println!("{prefix}    FAIL ({elapsed}) — {msg}"),
29        Outcome::Skipped => println!("{prefix}    skip"),
30    }
31}
32
33/// Execute a simple (non-lifecycle) test suite inside a VM.
34///
35/// 1. Deploys registry services with `ryra add` (no explicit init —
36///    `ryra add` bootstraps config on its first run)
37/// 2. Waits for declared ports
38/// 3. If quadlets are present, copies them to systemd dir, reloads, starts them
39/// 4. Sources `.env` files
40/// 5. Runs each test command via SSH, checks exit code
41pub async fn run_registry_test(
42    vm: &dyn Executor,
43    test: &DiscoveredTest,
44    prefixed: bool,
45    on_event: Option<OnEvent>,
46) -> ScenarioResult {
47    let start = Instant::now();
48    let name = test.name();
49    // Line prefix: `[name] ` when output from parallel tests interleaves on one
50    // terminal, empty when a `---- START name ----` banner already scopes the
51    // block (serial bare runs). See run_lifecycle_test for the same convention.
52    let p = if prefixed {
53        format!("[{name}] ")
54    } else {
55        String::new()
56    };
57    let mut events = Vec::new();
58    let mut failed = false;
59
60    let (services, quadlets) = match test {
61        DiscoveredTest::Simple { setup, .. } => (&setup.services, &setup.quadlets),
62        DiscoveredTest::Lifecycle { .. } => {
63            // Lifecycle tests should use run_lifecycle_test instead
64            return ScenarioResult {
65                name: name.to_string(),
66                events: vec![],
67                duration: start.elapsed(),
68                outcome: Outcome::Failed("run_registry_test called for lifecycle test".to_string()),
69            };
70        }
71    };
72
73    // Collect env overrides from all tests — these may include values for
74    // required env vars that `ryra add` needs to succeed non-interactively.
75    let mut add_env_prefix = String::new();
76    {
77        let mut combined: std::collections::BTreeMap<String, String> =
78            std::collections::BTreeMap::new();
79        for entry in test.tests() {
80            for (key, val) in &entry.env {
81                combined.entry(key.clone()).or_insert_with(|| val.clone());
82            }
83        }
84        if !combined.is_empty() {
85            let exports: Vec<String> = combined.iter().map(|(k, v)| format!("{k}={v}")).collect();
86            add_env_prefix = exports.join(" ") + " ";
87        }
88    }
89
90    // Deploy each registry service
91    if !failed {
92        for service in services {
93            println!("{p}  ryra add {service}...");
94            let step_event = run_event(
95                vm,
96                EventKind::Step,
97                &format!("{add_env_prefix}ryra add {service}"),
98                300,
99            )
100            .await;
101            print_event_result(&p, &step_event);
102
103            if step_event.outcome.is_fail() {
104                failed = true;
105                emit(&mut events, step_event, &on_event);
106                break;
107            }
108            emit(&mut events, step_event, &on_event);
109
110            // Wait for service to be active
111            println!("{p}  waiting for {service} to start...");
112            let wait_event = wait_for_service(vm, &p, service).await;
113            print_event_result(&p, &wait_event);
114            if wait_event.outcome.is_fail() {
115                failed = true;
116                emit(&mut events, wait_event, &on_event);
117                break;
118            }
119            emit(&mut events, wait_event, &on_event);
120        }
121    }
122
123    // Wait for declared ports to be reachable (services may need startup time
124    // beyond what systemd "active" indicates).
125    if !failed {
126        for service in services {
127            let port_cmd =
128                format!("grep PORT {DATA_ROOT_SH}/{service}/.env 2>/dev/null | cut -d= -f2");
129            if let Ok(out) = vm.exec(&port_cmd).await {
130                for port in out.stdout.trim().lines() {
131                    let port = port.trim();
132                    if port.is_empty() {
133                        continue;
134                    }
135                    println!("{p}  waiting for port {port}...");
136                    let port_event = wait_for_port(vm, &p, port).await;
137                    print_event_result(&p, &port_event);
138                    if port_event.outcome.is_fail() {
139                        failed = true;
140                        emit(&mut events, port_event, &on_event);
141                        break;
142                    }
143                    emit(&mut events, port_event, &on_event);
144                }
145            }
146            if failed {
147                break;
148            }
149        }
150    }
151
152    // Deploy quadlet files if present
153    if !failed && !quadlets.is_empty() {
154        println!("{p}  deploying quadlet files...");
155        let deploy_cmd = "\
156            mkdir -p $HOME/.config/containers/systemd && \
157            cp /opt/ryra-test-project/*.container $HOME/.config/containers/systemd/ 2>/dev/null; \
158            cp /opt/ryra-test-project/*.volume $HOME/.config/containers/systemd/ 2>/dev/null; \
159            cp /opt/ryra-test-project/*.network $HOME/.config/containers/systemd/ 2>/dev/null; \
160            cp /opt/ryra-test-project/*.pod $HOME/.config/containers/systemd/ 2>/dev/null; \
161            systemctl --user daemon-reload";
162        let deploy_event = run_event(vm, EventKind::Step, deploy_cmd, 30).await;
163        print_event_result(&p, &deploy_event);
164        if deploy_event.outcome.is_fail() {
165            failed = true;
166        }
167        emit(&mut events, deploy_event, &on_event);
168
169        // Derive service names from .container file stems, start each
170        if !failed {
171            let quadlet_services: Vec<&str> = quadlets
172                .iter()
173                .filter(|q| q.ends_with(".container"))
174                .filter_map(|q| q.strip_suffix(".container"))
175                .collect();
176
177            for svc in &quadlet_services {
178                println!("{p}  starting {svc}...");
179                let start_cmd = format!("systemctl --user start {svc}.service");
180                let start_event = run_event(vm, EventKind::Step, &start_cmd, 120).await;
181                print_event_result(&p, &start_event);
182                if start_event.outcome.is_fail() {
183                    failed = true;
184                    emit(&mut events, start_event, &on_event);
185                    break;
186                }
187                emit(&mut events, start_event, &on_event);
188
189                println!("{p}  waiting for {svc}...");
190                let wait_event = wait_for_service(vm, &p, svc).await;
191                print_event_result(&p, &wait_event);
192                if wait_event.outcome.is_fail() {
193                    failed = true;
194                    emit(&mut events, wait_event, &on_event);
195                    break;
196                }
197                emit(&mut events, wait_event, &on_event);
198            }
199        }
200    }
201
202    // Build the env sourcing prefix for test commands
203    let env_prefix = if !failed {
204        match build_env_prefix(vm, test).await {
205            Ok(prefix) => prefix,
206            Err(e) => {
207                failed = true;
208                emit(
209                    &mut events,
210                    Event::bare(
211                        "source service env vars".to_string(),
212                        EventKind::Step,
213                        Outcome::Failed(format!("{e:#}")),
214                        Duration::ZERO,
215                    ),
216                    &on_event,
217                );
218                String::new()
219            }
220        }
221    } else {
222        String::new()
223    };
224
225    // Run each test command
226    for test_entry in test.tests() {
227        if failed {
228            emit(
229                &mut events,
230                Event::bare(
231                    format!("test: {}", test_entry.name),
232                    EventKind::Assertion,
233                    Outcome::Skipped,
234                    Duration::ZERO,
235                ),
236                &on_event,
237            );
238            println!("{p}  skip  {}", test_entry.name);
239            continue;
240        }
241
242        println!("{p}  test  {}...", test_entry.name);
243        let event = run_test_entry(vm, test_entry, &env_prefix).await;
244        print_event_result(&p, &event);
245        if event.outcome.is_fail() {
246            failed = true;
247        }
248        emit(&mut events, event, &on_event);
249    }
250
251    // Dump diagnostics on failure
252    if failed {
253        dump_diagnostics(vm, &p, &test.services()).await;
254    }
255
256    let outcome = if failed {
257        let first_failure = events
258            .iter()
259            .find_map(|e| match &e.outcome {
260                Outcome::Failed(msg) => Some(msg.clone()),
261                _ => None,
262            })
263            .unwrap_or_else(|| "unknown failure".to_string());
264        Outcome::Failed(first_failure)
265    } else {
266        Outcome::Passed
267    };
268
269    ScenarioResult {
270        name: test.name().to_string(),
271        events,
272        duration: start.elapsed(),
273        outcome,
274    }
275}
276
277/// Run a single test entry — wraps the `run` command with env sourcing and timeout.
278async fn run_test_entry(vm: &dyn Executor, entry: &TestEntry, env_prefix: &str) -> Event {
279    let t = Instant::now();
280
281    // Build the full command: env overrides + env sourcing + the test command
282    let mut parts = Vec::new();
283
284    // Inline env overrides from the test definition
285    for (key, val) in &entry.env {
286        parts.push(format!("export {key}={val}"));
287    }
288
289    // Source the env files
290    if !env_prefix.is_empty() {
291        parts.push(env_prefix.to_string());
292    }
293
294    // The actual test command
295    parts.push(entry.run.clone());
296
297    let full_cmd = parts.join(" && ");
298
299    // Run with timeout
300    let timeout = Duration::from_secs(entry.timeout_secs);
301    let result = tokio::time::timeout(timeout, vm.exec(&full_cmd)).await;
302
303    let outcome = match result {
304        Ok(Ok(_)) => Outcome::Passed,
305        Ok(Err(e)) => Outcome::Failed(format!("{e:#}")),
306        Err(_) => Outcome::Failed(format!("timed out after {}s", entry.timeout_secs)),
307    };
308
309    Event::bare(
310        format!("test: {}", entry.name),
311        EventKind::Assertion,
312        outcome,
313        t.elapsed(),
314    )
315}
316
317/// Shell snippet that loads an .env file into the current shell's exported
318/// environment **safely** — `read`-per-line + single split on `=`, so values
319/// containing whitespace or shell metacharacters (e.g. supabase's
320/// `DB_AFTER_CONNECT_QUERY=SET search_path TO …` or `ERL_AFLAGS=-proto_dist
321/// inet_tcp`) don't get parsed as inline commands like `. file` would.
322///
323/// The .env format ryra writes intentionally avoids quoting so podman's
324/// --env-file can consume it verbatim (podman does NOT strip quotes), which
325/// means bash `source` / `.` is unsafe against any value with whitespace.
326fn load_env_shell(path: &str) -> String {
327    format!(
328        "while IFS='=' read -r __k __v; do \
329         case \"$__k\" in \"\"|\\#*) continue ;; esac; \
330         export \"$__k=$__v\"; \
331         done < {path}"
332    )
333}
334
335/// Build a shell snippet that sources all relevant .env files.
336///
337/// Single-service: loads `<service>/.env` into the current shell unprefixed.
338/// Multi-service: reads each .env and exports with SERVICE__ prefix.
339async fn build_env_prefix(_vm: &dyn Executor, test: &DiscoveredTest) -> Result<String> {
340    match test {
341        DiscoveredTest::Simple { setup, .. } => {
342            if setup.services.len() == 1 {
343                Ok(load_env_shell(&format!(
344                    "{DATA_ROOT_SH}/{}/.env",
345                    setup.services[0]
346                )))
347            } else if setup.services.len() > 1 {
348                // For multi-service, we generate a script that reads each .env
349                // and re-exports vars with the service name prefix
350                let mut lines = Vec::new();
351                for service in &setup.services {
352                    let prefix = service.to_uppercase();
353                    lines.push(format!(
354                        "while IFS='=' read -r key val; do \
355                         case \"$key\" in \"\"|\\#*) continue ;; esac; \
356                         export {prefix}__$key=\"$val\"; \
357                         done < {DATA_ROOT_SH}/{service}/.env"
358                    ));
359                }
360                Ok(lines.join(" && "))
361            } else {
362                Ok(String::new())
363            }
364        }
365        DiscoveredTest::Lifecycle { .. } => {
366            // Lifecycle tests handle env sourcing within their step commands
367            Ok(String::new())
368        }
369    }
370}
371
372/// Wait for a service's systemd unit to become active (default 60s timeout).
373async fn wait_for_service(vm: &dyn Executor, prefix: &str, service: &str) -> Event {
374    wait_for_service_with_timeout(vm, prefix, service, 60).await
375}
376
377/// Wait for a service's systemd unit to become active with a custom timeout.
378/// `prefix` is the test's line prefix (`"[name] "` in parallel runs, empty when
379/// a banner already scopes the output), used to align the wait's heartbeat with
380/// surrounding lines.
381async fn wait_for_service_with_timeout(
382    vm: &dyn Executor,
383    prefix: &str,
384    service: &str,
385    timeout_secs: u64,
386) -> Event {
387    let t = Instant::now();
388    let timeout = Duration::from_secs(timeout_secs);
389
390    let unit = format!("{service}.service");
391    let result = vm
392        .wait_for_service(&unit, timeout, &format!("{prefix}    "))
393        .await;
394
395    let outcome = match result {
396        Ok(()) => Outcome::Passed,
397        Err(e) => Outcome::Failed(format!("service didn't start: {e:#}")),
398    };
399
400    Event::bare(
401        format!("wait for {service}"),
402        EventKind::Step,
403        outcome,
404        t.elapsed(),
405    )
406}
407
408/// Escape a value for embedding inside a single-quoted shell string.
409fn shell_escape(s: &str) -> String {
410    s.replace('\'', r"'\''")
411}
412
413/// Run a Playwright spec and save the HTML report.
414async fn run_browser_step(
415    vm: &dyn Executor,
416    test_name: &str,
417    step_name: &str,
418    spec: &str,
419    env: &std::collections::BTreeMap<String, String>,
420    timeout_secs: u64,
421    registry_path: &std::path::Path,
422) -> Event {
423    let t = Instant::now();
424
425    // Build env exports from the step's env map. Values are shell-quoted.
426    let mut env_exports = String::new();
427    for (key, val) in env {
428        let quoted = shell_escape(val);
429        env_exports.push_str(&format!("export {key}='{quoted}' && "));
430    }
431
432    let browser_dir = format!("{}/tests/browser", registry_path.display());
433    let browser_dir_esc = shell_escape(&browser_dir);
434    let spec_esc = shell_escape(spec);
435    // Where Playwright writes its HTML report, in the execution environment.
436    // Local = the host reports dir directly; VM = a staging path fetched out
437    // below. Either way the report ends up under services-test/reports/.
438    let out_dir = vm.playwright_out_dir(test_name);
439    let out_dir_esc = shell_escape(&out_dir);
440
441    // Shell command:
442    // 1. Source all service .env files so port vars (PORT_HTTP etc.) are
443    //    available to the spec. Static env from the toml step overrides these.
444    // 2. Pre-create the canonical report directory and tell playwright to
445    //    emit the HTML report directly there (no intermediate copy step).
446    // 3. cd into the browser test dir.
447    // 4. Ensure node_modules exists — symlink /opt/playwright/node_modules
448    //    in the VM image, or `bun install` on a bare host.
449    // 5. Export env vars from the step (overrides sourced vars).
450    // 6. Run playwright with the html reporter pointed at the canonical path.
451    //    Also use the list reporter so the user sees live progress.
452    // 7. Exit with playwright's own exit code.
453    // Per-file safe .env load — same rationale as load_env_shell(): raw
454    // `.` would choke on values with whitespace (supabase `DB_AFTER_*`, etc.)
455    let env_loop = format!(
456        "for __f in {DATA_ROOT_SH}/*/.env; do \
457           [ -f \"$__f\" ] && {loader}; \
458         done",
459        loader = load_env_shell("\"$__f\"")
460    );
461    // Expose inbucket to the spec as INBUCKET_URL when it's installed. Browser
462    // specs that read a mailed code (verification/OTT) consume this via the
463    // inbucket.ts helper — Playwright has no email primitive, so the runner
464    // owns service-layout discovery (which .env, which port) in one place
465    // rather than every spec hard-coding ~/.local/share/services/inbucket/.env.
466    // `;`-separated (not `&&`) so a missing inbucket leaves INBUCKET_URL unset
467    // without aborting the command. A step's own `env` (exported below)
468    // overrides this.
469    let inbucket_export = format!(
470        "INBUCKET_PORT=$(grep SERVICE_PORT_HTTP \
471         {DATA_ROOT_SH}/inbucket/.env 2>/dev/null | cut -d= -f2); \
472         [ -n \"$INBUCKET_PORT\" ] && \
473         export INBUCKET_URL=\"http://127.0.0.1:$INBUCKET_PORT\"; "
474    );
475    let cmd = format!(
476        "{env_loop} && \
477         {inbucket_export}\
478         DEST={out_dir_esc} && \
479         mkdir -p \"$DEST\" && \
480         cd '{browser_dir_esc}' && \
481         if [ ! -d node_modules ]; then \
482           if [ -d /opt/playwright/node_modules ]; then \
483             ln -sf /opt/playwright/node_modules .; \
484           else \
485             bun install playwright @playwright/test 2>&1; \
486           fi; \
487         fi && \
488         export PATH=\"$HOME/.bun/bin:$PATH\" && \
489         export PLAYWRIGHT_HTML_REPORT=\"$DEST\" && \
490         export PLAYWRIGHT_HTML_OPEN=never && \
491         {env_exports}\
492         bunx playwright test '{spec_esc}' --reporter=list,html"
493    );
494
495    let timeout = Duration::from_secs(timeout_secs);
496    let result = tokio::time::timeout(timeout, vm.exec_streaming(&cmd, test_name)).await;
497
498    let outcome = match result {
499        Ok(Ok(_)) => Outcome::Passed,
500        Ok(Err(e)) => Outcome::Failed(format!("{e:#}")),
501        Err(_) => Outcome::Failed(format!("timed out after {timeout_secs}s")),
502    };
503
504    // Pull the report into the host's canonical reports dir
505    // (services-test/reports/<test>/playwright), regardless of VM or bare mode.
506    // On a VM this copies from the staging path; on bare it's a no-op because
507    // Playwright already wrote straight there. We always try — even on failure,
508    // because traces on failure are the most valuable ones to inspect.
509    if let Ok(reports) = crate::reports::reports_dir() {
510        let local_dir = reports.join(test_name).join("playwright");
511        if let Err(e) = vm.fetch_dir(&out_dir, &local_dir).await {
512            eprintln!("warning: failed to fetch playwright report: {e:#}");
513        }
514    }
515
516    Event::bare(
517        format!("browser: {step_name}"),
518        EventKind::Assertion,
519        outcome,
520        t.elapsed(),
521    )
522}
523
524/// Execute a lifecycle test — interleaved actions and assertions.
525///
526/// Unlike `run_registry_test`, this processes a sequence of typed steps
527/// (add, remove, wait, run, assert) rather than "add all then test".
528#[allow(clippy::too_many_arguments)]
529pub async fn run_lifecycle_test(
530    vm: &dyn Executor,
531    name: &str,
532    steps: &[StepDef],
533    verbose: bool,
534    prefixed: bool,
535    registry_path: &std::path::Path,
536    retest: bool,
537    on_event: Option<OnEvent>,
538) -> ScenarioResult {
539    let start = Instant::now();
540    let mut events = Vec::new();
541    let mut failed = false;
542    // Line prefix: `[name] ` when output from parallel tests interleaves on one
543    // terminal, empty when a `---- START name ----` banner already scopes the
544    // block (serial bare runs).
545    let p = if prefixed {
546        format!("[{name}] ")
547    } else {
548        String::new()
549    };
550    let stream_prefix = if prefixed { name } else { "" };
551
552    for step in steps {
553        // In retest mode, skip setup steps and only run test/assertion steps.
554        if retest && step.is_setup() {
555            let desc = step.step_name();
556            println!("{p}  skip  {desc} (retest)");
557            continue;
558        }
559
560        if failed {
561            let desc = step.step_name();
562            emit(
563                &mut events,
564                Event::bare(
565                    desc.clone(),
566                    EventKind::Step,
567                    Outcome::Skipped,
568                    Duration::ZERO,
569                ),
570                &on_event,
571            );
572            println!("{p}  skip  {desc}");
573            continue;
574        }
575
576        match step {
577            StepDef::Add {
578                service,
579                args,
580                env,
581                timeout,
582                project_path,
583            } => {
584                println!("{p}  ryra add {service}...");
585                let mut cmd = String::new();
586                for (key, val) in env {
587                    let escaped = shell_escape(val);
588                    cmd.push_str(&format!("{key}='{escaped}' "));
589                }
590                // Local projects add by path so `ryra add ./project` reads
591                // the project's own service.toml; registry services add by
592                // name. Either way the install registers under `service`.
593                let add_target = match project_path {
594                    Some(p) => shell_escape(&p.display().to_string()),
595                    None => service.clone(),
596                };
597                cmd.push_str(&format!("ryra add {add_target}"));
598                if let Some(a) = args.as_deref()
599                    && !a.is_empty()
600                {
601                    cmd.push_str(&format!(" {a}"));
602                }
603                let event = run_event(vm, EventKind::Step, &cmd, *timeout).await;
604                print_event_result(&p, &event);
605                if event.outcome.is_fail() {
606                    failed = true;
607                }
608                emit(&mut events, event, &on_event);
609            }
610            StepDef::Remove { service } => {
611                println!("{p}  ryra remove --purge {service}...");
612                let event = run_event(
613                    vm,
614                    EventKind::Step,
615                    &format!("ryra remove --purge {service} -y"),
616                    120,
617                )
618                .await;
619                print_event_result(&p, &event);
620                if event.outcome.is_fail() {
621                    failed = true;
622                }
623                emit(&mut events, event, &on_event);
624            }
625            StepDef::Wait { service, timeout } => {
626                println!("{p}  waiting for {service}...");
627                let event = wait_for_service_with_timeout(vm, &p, service, *timeout).await;
628                print_event_result(&p, &event);
629                if event.outcome.is_fail() {
630                    failed = true;
631                }
632                emit(&mut events, event, &on_event);
633            }
634            StepDef::Shell {
635                name: step_name,
636                run,
637                timeout,
638                poll,
639            } => {
640                println!("{p}  run: {step_name}...");
641                let event = run_step_with_poll(
642                    vm,
643                    step_name,
644                    run,
645                    *timeout,
646                    poll.as_ref(),
647                    verbose,
648                    stream_prefix,
649                )
650                .await;
651                print_event_result(&p, &event);
652                if event.outcome.is_fail() {
653                    failed = true;
654                }
655                emit(&mut events, event, &on_event);
656            }
657            StepDef::Http {
658                name: http_name,
659                url,
660                method,
661                body,
662                content_type,
663                headers,
664                status,
665                service,
666                poll,
667                timeout,
668            } => {
669                let step_name = http_name.as_deref().unwrap_or(url);
670                println!("{p}  http: {step_name}...");
671                // Source service .env files for variable expansion ($SERVICE_PORT_HTTP etc.),
672                // follow redirects (-L), skip TLS verification (-k) for self-signed certs.
673                // URL uses double quotes so shell variables expand.
674                let url_esc = url.replace('"', r#"\""#);
675                let env_source = match service {
676                    Some(svc) => load_env_shell(&format!("{DATA_ROOT_SH}/{svc}/.env")),
677                    None => format!(
678                        "for __f in {DATA_ROOT_SH}/*/.env; do [ -f \"$__f\" ] && {}; done",
679                        load_env_shell("\"$__f\"")
680                    ),
681                };
682                // Assemble curl. For non-GET methods we prepend a heredoc so
683                // the body flows verbatim into a $BODY variable — this
684                // dodges all the shell-quoting edge cases of embedding
685                // arbitrary JSON/form bodies directly in the command string.
686                let verb = method.as_curl_arg();
687                let ct_esc = content_type.replace('"', r#"\""#);
688                // Extra headers — rendered as a sequence of `-H "K: V"` args.
689                // Values go through double-quotes so $VAR expansion against
690                // the sourced .env files works (useful for apikey, tokens).
691                let header_args = headers
692                    .iter()
693                    .map(|(k, v)| {
694                        let k = k.replace('"', r#"\""#);
695                        let v = v.replace('"', r#"\""#);
696                        format!(r#" -H "{k}: {v}""#)
697                    })
698                    .collect::<String>();
699                let curl = match body {
700                    Some(b) => format!(
701                        "BODY=$(cat <<'HTTP_BODY_EOF'\n{b}\nHTTP_BODY_EOF\n) && \
702                         HTTP_CODE=$(curl -skL -o /dev/null -w '%{{http_code}}' \
703                            -X {verb} \
704                            -H \"Content-Type: {ct_esc}\"{header_args} \
705                            --data-raw \"$BODY\" \
706                            \"{url_esc}\")"
707                    ),
708                    None => format!(
709                        "HTTP_CODE=$(curl -skL -o /dev/null -w '%{{http_code}}' -X {verb}{header_args} \"{url_esc}\")"
710                    ),
711                };
712                let cmd = format!(
713                    "set -a && {env_source} && set +a && {curl} && \
714                     [ \"$HTTP_CODE\" = \"{status}\" ]"
715                );
716                let event = run_step_with_poll(
717                    vm,
718                    step_name,
719                    &cmd,
720                    *timeout,
721                    poll.as_ref(),
722                    verbose,
723                    stream_prefix,
724                )
725                .await;
726                print_event_result(&p, &event);
727                if event.outcome.is_fail() {
728                    failed = true;
729                }
730                emit(&mut events, event, &on_event);
731            }
732            StepDef::Playwright {
733                name: browser_name,
734                spec,
735                env,
736                timeout,
737            } => {
738                let step_name = browser_name.as_deref().unwrap_or(spec);
739                println!("{p}  browser: {step_name}...");
740                let event = run_browser_step(
741                    vm,
742                    name,      // test name (for report paths)
743                    step_name, // step name (for event description)
744                    spec,
745                    env,
746                    *timeout,
747                    registry_path,
748                )
749                .await;
750                print_event_result(&p, &event);
751                if event.outcome.is_fail() {
752                    failed = true;
753                }
754                emit(&mut events, event, &on_event);
755            }
756            StepDef::Mail {
757                name: mail_name,
758                mailbox,
759                contains,
760                poll,
761                timeout,
762            } => {
763                let step_name = mail_name.as_deref().unwrap_or(mailbox);
764                println!("{p}  mail: {step_name}...");
765                // Single-shot probe: discover inbucket's port, fetch the
766                // mailbox JSON, check non-empty + (optional) substring.
767                // run_step_with_poll retries this until the mail lands.
768                let mailbox_esc = shell_escape(mailbox);
769                let contains_check = match contains {
770                    Some(c) => {
771                        format!(" && echo \"$BODY\" | grep -q -- '{}'", shell_escape(c),)
772                    }
773                    None => String::new(),
774                };
775                let cmd = format!(
776                    "INBUCKET_PORT=$(grep SERVICE_PORT_HTTP {DATA_ROOT_SH}/inbucket/.env 2>/dev/null | cut -d= -f2); \
777                     [ -n \"$INBUCKET_PORT\" ] || {{ echo 'inbucket not installed -- no ~/.local/share/services/inbucket/.env'; exit 2; }}; \
778                     BODY=$(curl -sf \"http://127.0.0.1:$INBUCKET_PORT/api/v1/mailbox/{mailbox_esc}\" 2>/dev/null); \
779                     [ -n \"$BODY\" ] && [ \"$BODY\" != '[]' ]{contains_check}"
780                );
781                let event = run_step_with_poll(
782                    vm,
783                    step_name,
784                    &cmd,
785                    *timeout,
786                    Some(poll),
787                    verbose,
788                    stream_prefix,
789                )
790                .await;
791                print_event_result(&p, &event);
792                if event.outcome.is_fail() {
793                    failed = true;
794                }
795                emit(&mut events, event, &on_event);
796            }
797        }
798    }
799
800    let outcome = if failed {
801        let first_failure = events
802            .iter()
803            .find_map(|e| match &e.outcome {
804                Outcome::Failed(msg) => Some(msg.clone()),
805                _ => None,
806            })
807            .unwrap_or_else(|| "unknown failure".to_string());
808        Outcome::Failed(first_failure)
809    } else {
810        Outcome::Passed
811    };
812
813    ScenarioResult {
814        name: name.to_string(),
815        events,
816        duration: start.elapsed(),
817        outcome,
818    }
819}
820
821/// Execute a run step, optionally retrying on failure via poll config.
822async fn run_step_with_poll(
823    vm: &dyn Executor,
824    step_name: &str,
825    cmd: &str,
826    timeout_secs: u64,
827    poll: Option<&crate::test_toml::PollConfig>,
828    verbose: bool,
829    stream_prefix: &str,
830) -> Event {
831    let t = Instant::now();
832
833    match poll {
834        None => {
835            // Single execution — same as before
836            if verbose {
837                run_event_streaming(vm, stream_prefix, EventKind::Step, cmd, timeout_secs).await
838            } else {
839                run_event(vm, EventKind::Step, cmd, timeout_secs).await
840            }
841        }
842        Some(poll_cfg) => {
843            // Retry loop. Print a progress tick every few attempts so long
844            // polls (e.g. 80 × 3s = 4min) don't look like the runner hung.
845            let mut last_err = String::new();
846            let tick_every = (poll_cfg.attempts / 10).max(1);
847            for attempt in 1..=poll_cfg.attempts {
848                let timeout = Duration::from_secs(timeout_secs);
849                let result = tokio::time::timeout(timeout, vm.exec(cmd)).await;
850
851                match result {
852                    Ok(Ok(out)) => {
853                        return Event {
854                            description: format!("run: {step_name}"),
855                            kind: EventKind::Step,
856                            outcome: Outcome::Passed,
857                            duration: t.elapsed(),
858                            stdout: out.stdout,
859                            stderr: out.stderr,
860                        };
861                    }
862                    Ok(Err(e)) => {
863                        last_err = format!("{e:#}");
864                    }
865                    Err(_) => {
866                        last_err = format!("timed out after {timeout_secs}s");
867                    }
868                }
869
870                if attempt < poll_cfg.attempts {
871                    if attempt == 1 || attempt % tick_every == 0 {
872                        let line = last_err.lines().next().unwrap_or("").trim();
873                        let snippet: String = line.chars().take(80).collect();
874                        if stream_prefix.is_empty() {
875                            println!("    retrying ({attempt}/{}): {snippet}", poll_cfg.attempts);
876                        } else {
877                            println!(
878                                "[{stream_prefix}]     retrying ({attempt}/{}): {snippet}",
879                                poll_cfg.attempts
880                            );
881                        }
882                    }
883                    tokio::time::sleep(Duration::from_secs(poll_cfg.interval)).await;
884                }
885            }
886
887            Event::bare(
888                format!("run: {step_name}"),
889                EventKind::Step,
890                Outcome::Failed(format!(
891                    "failed after {} attempts (interval={}s): {last_err}",
892                    poll_cfg.attempts, poll_cfg.interval
893                )),
894                t.elapsed(),
895            )
896        }
897    }
898}
899
900/// Wait for a port to accept TCP connections (not just be bound by rootlessport).
901///
902/// Uses bash `/dev/tcp` to test actual TCP connectivity through to the
903/// container, not just whether rootlessport is listening on the host side.
904async fn wait_for_port(vm: &dyn Executor, prefix: &str, port: &str) -> Event {
905    let t = Instant::now();
906    let timeout = Duration::from_secs(60);
907    let mut progress =
908        ryra_vm::progress::WaitProgress::new(format!("port {port}"), "tcp connect", timeout)
909            .with_prefix(format!("{prefix}    "));
910    // First few seconds: rootlessport is listening but the container app
911    // may not be ready yet. A successful bash /dev/tcp probe means the
912    // connection made it all the way to the container.
913    loop {
914        let cmd = format!("bash -c 'echo > /dev/tcp/127.0.0.1/{port}' 2>/dev/null");
915        if vm.exec(&cmd).await.is_ok() {
916            return Event::bare(
917                format!("port {port} ready"),
918                EventKind::Step,
919                Outcome::Passed,
920                t.elapsed(),
921            );
922        }
923
924        if progress.timed_out() {
925            return Event::bare(
926                format!("port {port} ready"),
927                EventKind::Step,
928                Outcome::Failed(format!(
929                    "port {port} not responding after {}s",
930                    timeout.as_secs()
931                )),
932                t.elapsed(),
933            );
934        }
935
936        progress.tick();
937        tokio::time::sleep(Duration::from_secs(3)).await;
938    }
939}
940
941/// Dump diagnostic info for each service when a test fails. `prefix` is the
942/// test's line prefix (see run_registry_test) so diagnostics align with the
943/// rest of the test's output.
944async fn dump_diagnostics(vm: &dyn Executor, prefix: &str, services: &[&str]) {
945    println!("{prefix}--- diagnostics ---");
946    for svc in services {
947        // Systemd service status
948        let cmd = format!("systemctl --user status {svc}.service 2>&1 | head -20 || true");
949        if let Ok(out) = vm.exec(&cmd).await {
950            let trimmed = out.stdout.trim();
951            if !trimmed.is_empty() {
952                println!("{prefix}  [{svc}] systemd status:");
953                for line in trimmed.lines() {
954                    println!("{prefix}    {line}");
955                }
956            }
957        }
958
959        // Container status
960        let cmd = "podman ps -a --format '{{.Names}} {{.Status}} {{.Ports}}' 2>&1 || true";
961        if let Ok(out) = vm.exec(cmd).await {
962            let trimmed = out.stdout.trim();
963            if !trimmed.is_empty() {
964                println!("{prefix}  [{svc}] containers: {trimmed}");
965            } else {
966                println!("{prefix}  [{svc}] containers: (none)");
967            }
968        }
969
970        // Journal logs
971        let cmd = format!("journalctl --user -u {svc}.service --no-pager -n 30 2>&1 || true");
972        if let Ok(out) = vm.exec(&cmd).await {
973            let trimmed = out.stdout.trim();
974            if !trimmed.is_empty() {
975                println!("{prefix}  [{svc}] logs:");
976                for line in trimmed.lines().take(30) {
977                    println!("{prefix}    {line}");
978                }
979            }
980        }
981
982        // Env file
983        let cmd = format!("cat {DATA_ROOT_SH}/{svc}/.env 2>&1 | grep PORT || true");
984        if let Ok(out) = vm.exec(&cmd).await {
985            let trimmed = out.stdout.trim();
986            if !trimmed.is_empty() {
987                println!("{prefix}  [{svc}] ports: {trimmed}");
988            }
989        }
990
991        // Check quadlet, container internals, and network
992        let cmd = format!(
993            "echo '=== quadlet ==='; grep -i exec $HOME/.config/containers/systemd/{svc}.container 2>/dev/null || true; \
994             echo '=== container process ==='; podman exec {svc} ps aux 2>&1 | head -10 || true; \
995             echo '=== container listeners ==='; podman exec {svc} cat /proc/net/tcp6 2>&1 | head -10 || true; \
996             echo '=== host listeners ==='; ss -tlnp 2>/dev/null | head -20; \
997             echo '=== curl ==='; curl -sv http://127.0.0.1:18789/ 2>&1 | head -10 || true"
998        );
999        if let Ok(out) = vm.exec(&cmd).await {
1000            let trimmed = out.stdout.trim();
1001            println!("{prefix}  [{svc}] network:");
1002            for line in trimmed.lines() {
1003                println!("{prefix}    {line}");
1004            }
1005        }
1006    }
1007    println!("{prefix}--- end diagnostics ---");
1008}
1009
1010/// Run a command in the VM with real-time output streaming and return an Event.
1011async fn run_event_streaming(
1012    vm: &dyn Executor,
1013    test_name: &str,
1014    kind: EventKind,
1015    cmd: &str,
1016    timeout_secs: u64,
1017) -> Event {
1018    let t = Instant::now();
1019    let timeout = Duration::from_secs(timeout_secs);
1020    let result = tokio::time::timeout(timeout, vm.exec_streaming(cmd, test_name)).await;
1021
1022    let (outcome, stdout, stderr) = match result {
1023        Ok(Ok(out)) => (Outcome::Passed, out.stdout, out.stderr),
1024        Ok(Err(e)) => (
1025            Outcome::Failed(format!("{e:#}")),
1026            String::new(),
1027            String::new(),
1028        ),
1029        Err(_) => (
1030            Outcome::Failed(format!("timed out after {timeout_secs}s")),
1031            String::new(),
1032            String::new(),
1033        ),
1034    };
1035
1036    Event {
1037        description: cmd.to_string(),
1038        kind,
1039        outcome,
1040        duration: t.elapsed(),
1041        stdout,
1042        stderr,
1043    }
1044}
1045
1046/// Run a command in the VM and return an Event.
1047async fn run_event(vm: &dyn Executor, kind: EventKind, cmd: &str, timeout_secs: u64) -> Event {
1048    let t = Instant::now();
1049    let timeout = Duration::from_secs(timeout_secs);
1050    let result = tokio::time::timeout(timeout, vm.exec(cmd)).await;
1051
1052    let (outcome, stdout, stderr) = match result {
1053        Ok(Ok(out)) => (Outcome::Passed, out.stdout, out.stderr),
1054        Ok(Err(e)) => (
1055            Outcome::Failed(format!("{e:#}")),
1056            String::new(),
1057            String::new(),
1058        ),
1059        Err(_) => (
1060            Outcome::Failed(format!("timed out after {timeout_secs}s")),
1061            String::new(),
1062            String::new(),
1063        ),
1064    };
1065
1066    Event {
1067        description: cmd.to_string(),
1068        kind,
1069        outcome,
1070        duration: t.elapsed(),
1071        stdout,
1072        stderr,
1073    }
1074}