Skip to main content

ftui_harness/
shadow_run.rs

1#![forbid(unsafe_code)]
2
3//! Shadow-run comparison harness for validating execution-path equivalence.
4//!
5//! Runs the same [`Model`] and event sequence through two independent
6//! [`LabSession`] instances (baseline vs candidate) and compares frame
7//! checksums, event counts, and timing. This is the primary mechanism for
8//! proving that a runtime migration (e.g., threading → Asupersync executor)
9//! preserves rendering determinism.
10//!
11//! # Design
12//!
13//! A [`ShadowRun`] takes two [`LabConfig`]s (baseline and candidate), a
14//! model factory, and a scenario closure. It executes the scenario twice—
15//! once per lane—under deterministic seeds, then compares the frame records.
16//! All comparison evidence is emitted to JSONL via [`TestJsonlLogger`].
17//!
18//! # Example
19//!
20//! ```ignore
21//! use ftui_harness::shadow_run::{ShadowRun, ShadowRunConfig, ShadowVerdict};
22//!
23//! let config = ShadowRunConfig::new("migration_test", "tick_counter", 42)
24//!     .viewport(80, 24);
25//!
26//! let result = ShadowRun::compare(config, || MyModel::new(), |session| {
27//!     session.init();
28//!     session.tick();
29//!     session.capture_frame();
30//! });
31//!
32//! assert_eq!(result.verdict, ShadowVerdict::Match);
33//! ```
34
35use std::sync::atomic::{AtomicU64, Ordering};
36
37use crate::determinism::{JsonValue, TestJsonlLogger};
38use crate::lab_integration::{Lab, LabConfig, LabOutput, LabSession};
39use ftui_runtime::program::Model;
40use tracing::info_span;
41
42/// Global counter for shadow runs executed.
43static SHADOW_RUNS_TOTAL: AtomicU64 = AtomicU64::new(0);
44
45/// Read the total number of shadow runs executed in-process.
46#[must_use]
47pub fn shadow_runs_total() -> u64 {
48    SHADOW_RUNS_TOTAL.load(Ordering::Relaxed)
49}
50
51// ============================================================================
52// Configuration
53// ============================================================================
54
55/// Configuration for a shadow-run comparison.
56#[derive(Debug, Clone)]
57pub struct ShadowRunConfig {
58    /// Shared prefix for JSONL logger and run IDs.
59    pub prefix: String,
60    /// Scenario name used in tracing spans and JSONL.
61    pub scenario_name: String,
62    /// Deterministic seed (shared across both lanes).
63    pub seed: u64,
64    /// Viewport width for frame captures.
65    pub viewport_width: u16,
66    /// Viewport height for frame captures.
67    pub viewport_height: u16,
68    /// Time step in milliseconds for deterministic clocks.
69    pub time_step_ms: u64,
70    /// Label for the baseline lane (default: "baseline").
71    pub baseline_label: String,
72    /// Label for the candidate lane (default: "candidate").
73    pub candidate_label: String,
74}
75
76impl ShadowRunConfig {
77    /// Create a new shadow-run configuration with defaults.
78    ///
79    /// Defaults: 80×24 viewport, 16ms time step.
80    pub fn new(prefix: &str, scenario_name: &str, seed: u64) -> Self {
81        Self {
82            prefix: prefix.to_string(),
83            scenario_name: scenario_name.to_string(),
84            seed,
85            viewport_width: 80,
86            viewport_height: 24,
87            time_step_ms: 16,
88            baseline_label: "baseline".to_string(),
89            candidate_label: "candidate".to_string(),
90        }
91    }
92
93    /// Set the viewport dimensions.
94    #[must_use]
95    pub fn viewport(mut self, width: u16, height: u16) -> Self {
96        self.viewport_width = width;
97        self.viewport_height = height;
98        self
99    }
100
101    /// Set the deterministic time step in milliseconds.
102    #[must_use]
103    pub fn time_step_ms(mut self, ms: u64) -> Self {
104        self.time_step_ms = ms;
105        self
106    }
107
108    /// Set custom lane labels.
109    #[must_use]
110    pub fn lane_labels(mut self, baseline: &str, candidate: &str) -> Self {
111        self.baseline_label = baseline.to_string();
112        self.candidate_label = candidate.to_string();
113        self
114    }
115
116    /// Build a [`LabConfig`] for a given lane.
117    fn lab_config(&self, lane: &str) -> LabConfig {
118        LabConfig::new(
119            &format!("{}_{}", self.prefix, lane),
120            &self.scenario_name,
121            self.seed,
122        )
123        .viewport(self.viewport_width, self.viewport_height)
124        .time_step_ms(self.time_step_ms)
125    }
126}
127
128// ============================================================================
129// Verdict and result
130// ============================================================================
131
132/// Outcome of a shadow-run comparison.
133#[derive(Debug, Clone, Copy, PartialEq, Eq)]
134pub enum ShadowVerdict {
135    /// All frame checksums matched between baseline and candidate.
136    Match,
137    /// Frame checksums diverged at one or more positions.
138    Diverged,
139}
140
141/// Per-frame comparison detail.
142#[derive(Debug, Clone)]
143pub struct FrameComparison {
144    /// Frame index (0-based).
145    pub index: usize,
146    /// Baseline frame checksum.
147    pub baseline_checksum: u64,
148    /// Candidate frame checksum.
149    pub candidate_checksum: u64,
150    /// Whether this frame matched.
151    pub matched: bool,
152}
153
154/// Full result of a shadow-run comparison.
155#[derive(Debug, Clone)]
156pub struct ShadowRunResult {
157    /// Overall verdict.
158    pub verdict: ShadowVerdict,
159    /// Scenario name.
160    pub scenario_name: String,
161    /// Seed used for both lanes.
162    pub seed: u64,
163    /// Per-frame comparison details.
164    pub frame_comparisons: Vec<FrameComparison>,
165    /// Index of the first divergent frame (if any).
166    pub first_divergence: Option<usize>,
167    /// Number of frames compared.
168    pub frames_compared: usize,
169    /// Baseline lane output.
170    pub baseline: LabOutput,
171    /// Candidate lane output.
172    pub candidate: LabOutput,
173    /// Baseline lane label.
174    pub baseline_label: String,
175    /// Candidate lane label.
176    pub candidate_label: String,
177    /// Total shadow runs executed in-process (including this one).
178    pub run_total: u64,
179}
180
181impl ShadowRunResult {
182    /// Number of frames that diverged.
183    #[must_use]
184    pub fn diverged_count(&self) -> usize {
185        self.frame_comparisons.iter().filter(|c| !c.matched).count()
186    }
187
188    /// Fraction of frames that matched (0.0–1.0).
189    #[must_use]
190    pub fn match_ratio(&self) -> f64 {
191        if self.frames_compared == 0 {
192            return 1.0;
193        }
194        let matched = self.frame_comparisons.iter().filter(|c| c.matched).count();
195        matched as f64 / self.frames_compared as f64
196    }
197}
198
199// ============================================================================
200// Shadow-run executor
201// ============================================================================
202
203/// Shadow-run comparison harness.
204///
205/// Runs the same model and event sequence through two independent LabSession
206/// instances and compares their frame outputs.
207pub struct ShadowRun;
208
209impl ShadowRun {
210    /// Run a shadow comparison between baseline and candidate lanes.
211    ///
212    /// Both lanes execute the same `scenario_fn` with the same seed and
213    /// configuration. Frame checksums are compared after both runs complete.
214    ///
215    /// The `model_factory` is called twice (once per lane) to produce
216    /// independent model instances.
217    ///
218    /// # Evidence
219    ///
220    /// Emits structured JSONL to stderr with events:
221    /// - `shadow.start`: comparison parameters
222    /// - `shadow.lane.done`: per-lane summary
223    /// - `shadow.frame.diverged`: each divergent frame
224    /// - `shadow.verdict`: final pass/fail with statistics
225    pub fn compare<M, MF, SF>(
226        config: ShadowRunConfig,
227        model_factory: MF,
228        scenario_fn: SF,
229    ) -> ShadowRunResult
230    where
231        M: Model,
232        MF: Fn() -> M,
233        SF: Fn(&mut LabSession<M>),
234    {
235        let _span = info_span!(
236            "shadow_run",
237            scenario_name = config.scenario_name.as_str(),
238            seed = config.seed,
239            baseline = config.baseline_label.as_str(),
240            candidate = config.candidate_label.as_str(),
241        )
242        .entered();
243
244        let mut logger = TestJsonlLogger::new_with(
245            &format!("{}_shadow", config.prefix),
246            config.seed,
247            true,
248            config.time_step_ms,
249        );
250        logger.add_context_str("scenario_name", &config.scenario_name);
251        logger.add_context_str("baseline_label", &config.baseline_label);
252        logger.add_context_str("candidate_label", &config.candidate_label);
253
254        // Log start
255        logger.log(
256            "shadow.start",
257            &[
258                ("scenario_name", JsonValue::str(&config.scenario_name)),
259                ("seed", JsonValue::u64(config.seed)),
260                (
261                    "viewport",
262                    JsonValue::raw(format!(
263                        "[{},{}]",
264                        config.viewport_width, config.viewport_height
265                    )),
266                ),
267            ],
268        );
269
270        // Run baseline lane
271        let baseline_config = config.lab_config(&config.baseline_label);
272        let baseline_run = Lab::run_scenario(baseline_config, model_factory(), |s| scenario_fn(s));
273
274        logger.log(
275            "shadow.lane.done",
276            &[
277                ("lane", JsonValue::str(&config.baseline_label)),
278                (
279                    "frame_count",
280                    JsonValue::u64(baseline_run.output.frame_count as u64),
281                ),
282                (
283                    "event_count",
284                    JsonValue::u64(baseline_run.output.event_count as u64),
285                ),
286                ("tick_count", JsonValue::u64(baseline_run.output.tick_count)),
287                (
288                    "anomaly_count",
289                    JsonValue::u64(baseline_run.output.anomaly_count),
290                ),
291            ],
292        );
293
294        // Run candidate lane
295        let candidate_config = config.lab_config(&config.candidate_label);
296        let candidate_run =
297            Lab::run_scenario(candidate_config, model_factory(), |s| scenario_fn(s));
298
299        logger.log(
300            "shadow.lane.done",
301            &[
302                ("lane", JsonValue::str(&config.candidate_label)),
303                (
304                    "frame_count",
305                    JsonValue::u64(candidate_run.output.frame_count as u64),
306                ),
307                (
308                    "event_count",
309                    JsonValue::u64(candidate_run.output.event_count as u64),
310                ),
311                (
312                    "tick_count",
313                    JsonValue::u64(candidate_run.output.tick_count),
314                ),
315                (
316                    "anomaly_count",
317                    JsonValue::u64(candidate_run.output.anomaly_count),
318                ),
319            ],
320        );
321
322        // Compare frame checksums
323        let baseline_frames = &baseline_run.output.frame_records;
324        let candidate_frames = &candidate_run.output.frame_records;
325        let frames_compared = baseline_frames.len().min(candidate_frames.len());
326        let mut frame_comparisons = Vec::with_capacity(frames_compared);
327        let mut first_divergence: Option<usize> = None;
328
329        for i in 0..frames_compared {
330            let matched = baseline_frames[i].checksum == candidate_frames[i].checksum;
331            frame_comparisons.push(FrameComparison {
332                index: i,
333                baseline_checksum: baseline_frames[i].checksum,
334                candidate_checksum: candidate_frames[i].checksum,
335                matched,
336            });
337            if !matched && first_divergence.is_none() {
338                first_divergence = Some(i);
339                logger.log(
340                    "shadow.frame.diverged",
341                    &[
342                        ("frame_idx", JsonValue::u64(i as u64)),
343                        (
344                            "baseline_checksum",
345                            JsonValue::str(format!("{:016x}", baseline_frames[i].checksum)),
346                        ),
347                        (
348                            "candidate_checksum",
349                            JsonValue::str(format!("{:016x}", candidate_frames[i].checksum)),
350                        ),
351                    ],
352                );
353            }
354        }
355
356        // Handle frame count mismatch (also counts as divergence)
357        if baseline_frames.len() != candidate_frames.len() && first_divergence.is_none() {
358            first_divergence = Some(frames_compared);
359        }
360
361        let verdict = if first_divergence.is_some() {
362            ShadowVerdict::Diverged
363        } else {
364            ShadowVerdict::Match
365        };
366
367        let diverged_count = frame_comparisons.iter().filter(|c| !c.matched).count();
368
369        // Log verdict
370        logger.log(
371            "shadow.verdict",
372            &[
373                (
374                    "verdict",
375                    JsonValue::str(match verdict {
376                        ShadowVerdict::Match => "match",
377                        ShadowVerdict::Diverged => "diverged",
378                    }),
379                ),
380                ("frames_compared", JsonValue::u64(frames_compared as u64)),
381                ("diverged_count", JsonValue::u64(diverged_count as u64)),
382                (
383                    "baseline_frames",
384                    JsonValue::u64(baseline_frames.len() as u64),
385                ),
386                (
387                    "candidate_frames",
388                    JsonValue::u64(candidate_frames.len() as u64),
389                ),
390            ],
391        );
392
393        let run_total = SHADOW_RUNS_TOTAL
394            .fetch_add(1, Ordering::Relaxed)
395            .saturating_add(1);
396
397        ShadowRunResult {
398            verdict,
399            scenario_name: config.scenario_name,
400            seed: config.seed,
401            frame_comparisons,
402            first_divergence,
403            frames_compared,
404            baseline: baseline_run.output,
405            candidate: candidate_run.output,
406            baseline_label: config.baseline_label,
407            candidate_label: config.candidate_label,
408            run_total,
409        }
410    }
411
412    /// Assert that baseline and candidate produce identical frames.
413    ///
414    /// Convenience wrapper around [`compare`](Self::compare) that panics
415    /// on divergence with a diagnostic message.
416    ///
417    /// # Panics
418    ///
419    /// Panics if any frame checksum diverges between lanes.
420    pub fn assert_match<M, MF, SF>(
421        config: ShadowRunConfig,
422        model_factory: MF,
423        scenario_fn: SF,
424    ) -> ShadowRunResult
425    where
426        M: Model,
427        MF: Fn() -> M,
428        SF: Fn(&mut LabSession<M>),
429    {
430        let result = Self::compare(config, model_factory, scenario_fn);
431        if result.verdict == ShadowVerdict::Diverged {
432            let diverged = result.diverged_count();
433            let first = result
434                .first_divergence
435                .map(|i| format!("frame {i}"))
436                .unwrap_or_else(|| "frame count mismatch".to_string());
437            panic!(
438                "shadow-run divergence: {} of {} frames diverged, first at {}",
439                diverged, result.frames_compared, first
440            );
441        }
442        result
443    }
444}
445
446#[cfg(test)]
447mod tests {
448    use super::*;
449    use ftui_core::event::Event;
450    use ftui_render::frame::Frame;
451    use ftui_runtime::program::{Cmd, Model};
452
453    // Minimal counter model for testing.
454    struct Counter {
455        value: u64,
456    }
457
458    #[derive(Debug, Clone)]
459    enum CounterMsg {
460        Increment,
461        Quit,
462    }
463
464    impl From<Event> for CounterMsg {
465        fn from(e: Event) -> Self {
466            match e {
467                Event::Tick => CounterMsg::Increment,
468                _ => CounterMsg::Quit,
469            }
470        }
471    }
472
473    impl Model for Counter {
474        type Message = CounterMsg;
475
476        fn update(&mut self, msg: CounterMsg) -> Cmd<CounterMsg> {
477            match msg {
478                CounterMsg::Increment => {
479                    self.value += 1;
480                    Cmd::none()
481                }
482                CounterMsg::Quit => Cmd::quit(),
483            }
484        }
485
486        fn view(&self, frame: &mut Frame) {
487            use ftui_core::geometry::Rect;
488            use ftui_widgets::paragraph::Paragraph;
489            let text = format!("Count: {}", self.value);
490            let area = Rect::new(0, 0, frame.width(), 1);
491            Paragraph::new(text).render(area, frame);
492        }
493    }
494
495    // Helper trait for rendering in view
496    use ftui_widgets::Widget;
497
498    #[test]
499    fn shadow_run_identical_models_match() {
500        let config = ShadowRunConfig::new("test_shadow", "counter_match", 42);
501        let result = ShadowRun::compare(
502            config,
503            || Counter { value: 0 },
504            |session| {
505                session.init();
506                session.tick();
507                session.capture_frame();
508                session.tick();
509                session.capture_frame();
510            },
511        );
512        assert_eq!(result.verdict, ShadowVerdict::Match);
513        assert_eq!(result.frames_compared, 2);
514        assert_eq!(result.diverged_count(), 0);
515        assert!((result.match_ratio() - 1.0).abs() < f64::EPSILON);
516        assert!(result.first_divergence.is_none());
517    }
518
519    #[test]
520    fn shadow_run_assert_match_succeeds_for_identical() {
521        let config = ShadowRunConfig::new("test_assert", "counter_assert", 42);
522        let result = ShadowRun::assert_match(
523            config,
524            || Counter { value: 0 },
525            |session| {
526                session.init();
527                session.tick();
528                session.capture_frame();
529            },
530        );
531        assert_eq!(result.verdict, ShadowVerdict::Match);
532    }
533
534    #[test]
535    fn shadow_run_config_custom_labels() {
536        let config = ShadowRunConfig::new("test_labels", "label_test", 7)
537            .lane_labels("threading", "asupersync");
538        assert_eq!(config.baseline_label, "threading");
539        assert_eq!(config.candidate_label, "asupersync");
540    }
541
542    #[test]
543    fn shadow_run_config_viewport() {
544        let config = ShadowRunConfig::new("test_vp", "vp_test", 0)
545            .viewport(120, 40)
546            .time_step_ms(8);
547        assert_eq!(config.viewport_width, 120);
548        assert_eq!(config.viewport_height, 40);
549        assert_eq!(config.time_step_ms, 8);
550    }
551
552    #[test]
553    fn shadow_runs_total_increments() {
554        let before = shadow_runs_total();
555        let config = ShadowRunConfig::new("test_total", "total_test", 1);
556        let _ = ShadowRun::compare(
557            config,
558            || Counter { value: 0 },
559            |session| {
560                session.init();
561                session.capture_frame();
562            },
563        );
564        assert!(shadow_runs_total() > before);
565    }
566
567    #[test]
568    fn lab_assert_outputs_match_succeeds_for_identical() {
569        let config = ShadowRunConfig::new("test_outputs", "outputs_test", 99);
570        let result = ShadowRun::compare(
571            config,
572            || Counter { value: 0 },
573            |session| {
574                session.init();
575                session.tick();
576                session.capture_frame();
577            },
578        );
579        // Both outputs came from identical runs so they should match.
580        crate::lab_integration::assert_outputs_match(&result.baseline, &result.candidate);
581    }
582
583    #[test]
584    fn match_ratio_empty_frames() {
585        let result = ShadowRunResult {
586            verdict: ShadowVerdict::Match,
587            scenario_name: "empty".to_string(),
588            seed: 0,
589            frame_comparisons: vec![],
590            first_divergence: None,
591            frames_compared: 0,
592            baseline: LabOutput {
593                frame_count: 0,
594                frame_records: vec![],
595                event_count: 0,
596                event_log: vec![],
597                tick_count: 0,
598                anomaly_count: 0,
599            },
600            candidate: LabOutput {
601                frame_count: 0,
602                frame_records: vec![],
603                event_count: 0,
604                event_log: vec![],
605                tick_count: 0,
606                anomaly_count: 0,
607            },
608            baseline_label: "baseline".to_string(),
609            candidate_label: "candidate".to_string(),
610            run_total: 1,
611        };
612        assert!((result.match_ratio() - 1.0).abs() < f64::EPSILON);
613    }
614}