1use std::collections::BTreeMap;
30
31use serde::{Deserialize, Serialize};
32
33use super::tape::{EventTape, TapeRecord, TapeRecordKind};
34
35#[derive(Debug, Clone, Copy, PartialEq, Eq, Serialize, Deserialize)]
37#[serde(rename_all = "snake_case")]
38pub enum FidelityMode {
39 ByteIdentical,
40 Semantic,
41 Outcome,
42}
43
44impl FidelityMode {
45 pub fn parse(label: &str) -> Result<Self, String> {
46 match label {
47 "byte" | "byte-identical" | "byte_identical" => Ok(Self::ByteIdentical),
48 "semantic" => Ok(Self::Semantic),
49 "outcome" => Ok(Self::Outcome),
50 other => Err(format!(
51 "unknown fidelity mode `{other}` — expected `byte-identical`, `semantic`, or `outcome`"
52 )),
53 }
54 }
55}
56
57#[derive(Debug, Clone, Serialize, Deserialize, PartialEq)]
60pub struct FidelityReport {
61 pub mode: FidelityMode,
62 pub recorded_records: usize,
65 pub replay_records: usize,
67 pub divergences: Vec<Divergence>,
71 pub score: f32,
74}
75
76impl FidelityReport {
77 pub fn is_byte_identical(&self) -> bool {
78 self.divergences.is_empty()
79 }
80}
81
82#[derive(Debug, Clone, Serialize, Deserialize, PartialEq)]
86pub struct Divergence {
87 pub seq: Option<u64>,
90 pub category: String,
93 pub message: String,
96}
97
98pub fn compare(recorded: &EventTape, replay: &EventTape, mode: FidelityMode) -> FidelityReport {
100 let divergences = match mode {
101 FidelityMode::ByteIdentical => compare_record_by_record(recorded, replay, true),
102 FidelityMode::Semantic => compare_record_by_record(recorded, replay, false),
103 FidelityMode::Outcome => compare_outcome(recorded, replay),
104 };
105 let baseline = recorded.records.len().max(replay.records.len()).max(1);
106 let score = match mode {
107 FidelityMode::ByteIdentical | FidelityMode::Semantic => {
108 1.0 - (divergences.len() as f32 / baseline as f32).min(1.0)
109 }
110 FidelityMode::Outcome => {
111 if divergences.is_empty() {
112 1.0
113 } else {
114 0.0
115 }
116 }
117 };
118 FidelityReport {
119 mode,
120 recorded_records: recorded.records.len(),
121 replay_records: replay.records.len(),
122 divergences,
123 score,
124 }
125}
126
127fn compare_record_by_record(
128 recorded: &EventTape,
129 replay: &EventTape,
130 byte_strict: bool,
131) -> Vec<Divergence> {
132 let mut out = Vec::new();
133 let max = recorded.records.len().max(replay.records.len());
134 for idx in 0..max {
135 match (recorded.records.get(idx), replay.records.get(idx)) {
136 (Some(rec), Some(rep)) => compare_pair(rec, rep, byte_strict, &mut out),
137 (Some(rec), None) => out.push(Divergence {
138 seq: Some(rec.seq),
139 category: "missing_in_replay".to_string(),
140 message: format!(
141 "replay tape ended at #{idx}; recorded had {} more record(s)",
142 recorded.records.len() - idx
143 ),
144 }),
145 (None, Some(rep)) => out.push(Divergence {
146 seq: Some(rep.seq),
147 category: "missing_in_recorded".to_string(),
148 message: format!(
149 "replay produced an extra record at #{idx} (kind={})",
150 record_kind_tag(&rep.kind)
151 ),
152 }),
153 (None, None) => break,
154 }
155 }
156 out
157}
158
159fn compare_pair(
160 recorded: &TapeRecord,
161 replay: &TapeRecord,
162 byte_strict: bool,
163 out: &mut Vec<Divergence>,
164) {
165 if record_kind_tag(&recorded.kind) != record_kind_tag(&replay.kind) {
166 out.push(Divergence {
167 seq: Some(recorded.seq),
168 category: "kind_mismatch".to_string(),
169 message: format!(
170 "record kind diverged: recorded={} replay={}",
171 record_kind_tag(&recorded.kind),
172 record_kind_tag(&replay.kind),
173 ),
174 });
175 return;
176 }
177 if byte_strict && recorded.virtual_time_ms != replay.virtual_time_ms {
178 out.push(Divergence {
179 seq: Some(recorded.seq),
180 category: "virtual_time_drift".to_string(),
181 message: format!(
182 "virtual_time_ms diverged: recorded={} replay={}",
183 recorded.virtual_time_ms, replay.virtual_time_ms,
184 ),
185 });
186 }
187 if byte_strict && recorded.monotonic_ms != replay.monotonic_ms {
188 out.push(Divergence {
189 seq: Some(recorded.seq),
190 category: "monotonic_drift".to_string(),
191 message: format!(
192 "monotonic_ms diverged: recorded={} replay={}",
193 recorded.monotonic_ms, replay.monotonic_ms,
194 ),
195 });
196 }
197 compare_kind(&recorded.kind, &replay.kind, recorded.seq, byte_strict, out);
198}
199
200fn compare_kind(
201 recorded: &TapeRecordKind,
202 replay: &TapeRecordKind,
203 seq: u64,
204 byte_strict: bool,
205 out: &mut Vec<Divergence>,
206) {
207 use TapeRecordKind::*;
208 match (recorded, replay) {
209 (
210 ClockRead {
211 source: r_source,
212 value_ms: r_val,
213 },
214 ClockRead {
215 source: p_source,
216 value_ms: p_val,
217 },
218 ) => {
219 if r_source != p_source {
220 out.push(Divergence {
221 seq: Some(seq),
222 category: "clock_read_source".to_string(),
223 message: format!(
224 "clock_read source diverged: recorded={r_source:?} replay={p_source:?}"
225 ),
226 });
227 }
228 if r_val != p_val {
229 out.push(Divergence {
230 seq: Some(seq),
231 category: "clock_read_value".to_string(),
232 message: format!(
233 "clock_read value_ms diverged: recorded={r_val} replay={p_val}"
234 ),
235 });
236 }
237 }
238 (
239 ClockSleep {
240 duration_ms: recorded_dur,
241 },
242 ClockSleep {
243 duration_ms: replay_dur,
244 },
245 ) => {
246 if recorded_dur != replay_dur {
247 out.push(Divergence {
248 seq: Some(seq),
249 category: "clock_sleep_duration".to_string(),
250 message: format!(
251 "sleep duration diverged: recorded={recorded_dur}ms replay={replay_dur}ms"
252 ),
253 });
254 }
255 }
256 (
257 LlmCall {
258 request_digest: recorded_req,
259 response: recorded_res,
260 },
261 LlmCall {
262 request_digest: replay_req,
263 response: replay_res,
264 },
265 ) => {
266 if recorded_req != replay_req {
267 out.push(Divergence {
268 seq: Some(seq),
269 category: "llm_request_digest".to_string(),
270 message: format!(
271 "LLM request digest diverged: recorded={recorded_req} replay={replay_req}"
272 ),
273 });
274 }
275 if recorded_res.content_hash() != replay_res.content_hash() {
276 out.push(Divergence {
277 seq: Some(seq),
278 category: "llm_response_hash".to_string(),
279 message: format!(
280 "LLM response hash diverged: recorded={} replay={}",
281 recorded_res.content_hash(),
282 replay_res.content_hash(),
283 ),
284 });
285 }
286 }
287 (
288 FileRead {
289 path: rp,
290 content_hash: rh,
291 len_bytes: rl,
292 },
293 FileRead {
294 path: pp,
295 content_hash: ph,
296 len_bytes: pl,
297 },
298 ) => compare_file(seq, "file_read", rp, rh, *rl, pp, ph, *pl, byte_strict, out),
299 (
300 FileWrite {
301 path: rp,
302 content_hash: rh,
303 len_bytes: rl,
304 },
305 FileWrite {
306 path: pp,
307 content_hash: ph,
308 len_bytes: pl,
309 },
310 ) => compare_file(
311 seq,
312 "file_write",
313 rp,
314 rh,
315 *rl,
316 pp,
317 ph,
318 *pl,
319 byte_strict,
320 out,
321 ),
322 (FileDelete { path: rp }, FileDelete { path: pp }) => {
323 if rp != pp {
324 out.push(Divergence {
325 seq: Some(seq),
326 category: "file_delete_path".to_string(),
327 message: format!("file_delete path diverged: recorded={rp} replay={pp}"),
328 });
329 }
330 }
331 (
332 ProcessSpawn {
333 program: r_program,
334 args: r_args,
335 cwd: r_cwd,
336 exit_code: r_exit,
337 duration_ms: r_dur,
338 stdout_payload: r_stdout,
339 stderr_payload: r_stderr,
340 },
341 ProcessSpawn {
342 program: p_program,
343 args: p_args,
344 cwd: p_cwd,
345 exit_code: p_exit,
346 duration_ms: p_dur,
347 stdout_payload: p_stdout,
348 stderr_payload: p_stderr,
349 },
350 ) => {
351 if r_program != p_program {
352 out.push(Divergence {
353 seq: Some(seq),
354 category: "process_program".to_string(),
355 message: format!(
356 "subprocess program diverged: recorded={r_program} replay={p_program}"
357 ),
358 });
359 }
360 if r_args != p_args {
361 out.push(Divergence {
362 seq: Some(seq),
363 category: "process_args".to_string(),
364 message: format!(
365 "subprocess args diverged: recorded={r_args:?} replay={p_args:?}"
366 ),
367 });
368 }
369 if r_cwd != p_cwd {
370 out.push(Divergence {
371 seq: Some(seq),
372 category: "process_cwd".to_string(),
373 message: format!(
374 "subprocess cwd diverged: recorded={r_cwd:?} replay={p_cwd:?}"
375 ),
376 });
377 }
378 if r_exit != p_exit {
379 out.push(Divergence {
380 seq: Some(seq),
381 category: "process_exit_code".to_string(),
382 message: format!(
383 "subprocess exit code diverged: recorded={r_exit} replay={p_exit}"
384 ),
385 });
386 }
387 if byte_strict && r_dur != p_dur {
388 out.push(Divergence {
389 seq: Some(seq),
390 category: "process_duration".to_string(),
391 message: format!(
392 "subprocess duration diverged: recorded={r_dur}ms replay={p_dur}ms"
393 ),
394 });
395 }
396 if r_stdout.content_hash() != p_stdout.content_hash() {
397 out.push(Divergence {
398 seq: Some(seq),
399 category: "process_stdout_hash".to_string(),
400 message: format!(
401 "subprocess stdout hash diverged: recorded={} replay={}",
402 r_stdout.content_hash(),
403 p_stdout.content_hash(),
404 ),
405 });
406 }
407 if r_stderr.content_hash() != p_stderr.content_hash() {
408 out.push(Divergence {
409 seq: Some(seq),
410 category: "process_stderr_hash".to_string(),
411 message: format!(
412 "subprocess stderr hash diverged: recorded={} replay={}",
413 r_stderr.content_hash(),
414 p_stderr.content_hash(),
415 ),
416 });
417 }
418 }
419 (Unknown, _) | (_, Unknown) => out.push(Divergence {
420 seq: Some(seq),
421 category: "unknown_kind".to_string(),
422 message: "encountered an unknown record kind — produced by a newer harn-vm version"
423 .to_string(),
424 }),
425 _ => out.push(Divergence {
430 seq: Some(seq),
431 category: "comparator_gap".to_string(),
432 message: format!(
433 "no comparator wired for record kind `{}`",
434 record_kind_tag(recorded)
435 ),
436 }),
437 }
438}
439
440#[allow(clippy::too_many_arguments)]
441fn compare_file(
442 seq: u64,
443 category: &str,
444 recorded_path: &str,
445 recorded_hash: &str,
446 recorded_len: u64,
447 replay_path: &str,
448 replay_hash: &str,
449 replay_len: u64,
450 byte_strict: bool,
451 out: &mut Vec<Divergence>,
452) {
453 if recorded_path != replay_path {
454 out.push(Divergence {
455 seq: Some(seq),
456 category: format!("{category}_path"),
457 message: format!(
458 "{category} path diverged: recorded={recorded_path} replay={replay_path}"
459 ),
460 });
461 }
462 if recorded_hash != replay_hash {
463 out.push(Divergence {
464 seq: Some(seq),
465 category: format!("{category}_hash"),
466 message: format!(
467 "{category} content hash diverged: recorded={recorded_hash} replay={replay_hash}"
468 ),
469 });
470 }
471 if byte_strict && recorded_len != replay_len {
472 out.push(Divergence {
473 seq: Some(seq),
474 category: format!("{category}_len"),
475 message: format!(
476 "{category} length diverged: recorded={recorded_len} replay={replay_len}"
477 ),
478 });
479 }
480}
481
482fn compare_outcome(recorded: &EventTape, replay: &EventTape) -> Vec<Divergence> {
483 let mut out = Vec::new();
484
485 let recorded_writes = collect_final_writes(recorded);
486 let replay_writes = collect_final_writes(replay);
487 if recorded_writes != replay_writes {
488 let recorded_paths: Vec<&String> = recorded_writes.keys().collect();
489 let replay_paths: Vec<&String> = replay_writes.keys().collect();
490 out.push(Divergence {
491 seq: None,
492 category: "outcome_fs_diff".to_string(),
493 message: format!(
494 "final FS write set diverged: recorded={recorded_paths:?} replay={replay_paths:?}"
495 ),
496 });
497 }
498
499 let recorded_exit = last_process_exit(recorded);
500 let replay_exit = last_process_exit(replay);
501 if recorded_exit != replay_exit {
502 out.push(Divergence {
503 seq: None,
504 category: "outcome_exit_code".to_string(),
505 message: format!(
506 "last subprocess exit code diverged: recorded={recorded_exit:?} replay={replay_exit:?}"
507 ),
508 });
509 }
510
511 let recorded_llm = count_llm_calls(recorded);
512 let replay_llm = count_llm_calls(replay);
513 if recorded_llm != replay_llm {
514 out.push(Divergence {
515 seq: None,
516 category: "outcome_llm_call_count".to_string(),
517 message: format!(
518 "LLM call count diverged: recorded={recorded_llm} replay={replay_llm}"
519 ),
520 });
521 }
522 out
523}
524
525fn collect_final_writes(tape: &EventTape) -> BTreeMap<String, Option<String>> {
526 let mut state: BTreeMap<String, Option<String>> = BTreeMap::new();
527 for record in &tape.records {
528 match &record.kind {
529 TapeRecordKind::FileWrite {
530 path, content_hash, ..
531 } => {
532 state.insert(path.clone(), Some(content_hash.clone()));
533 }
534 TapeRecordKind::FileDelete { path } => {
535 state.insert(path.clone(), None);
536 }
537 _ => {}
538 }
539 }
540 state
541}
542
543fn last_process_exit(tape: &EventTape) -> Option<i32> {
544 tape.records
545 .iter()
546 .rev()
547 .find_map(|record| match &record.kind {
548 TapeRecordKind::ProcessSpawn { exit_code, .. } => Some(*exit_code),
549 _ => None,
550 })
551}
552
553fn count_llm_calls(tape: &EventTape) -> usize {
554 tape.records
555 .iter()
556 .filter(|record| matches!(record.kind, TapeRecordKind::LlmCall { .. }))
557 .count()
558}
559
560fn record_kind_tag(kind: &TapeRecordKind) -> &'static str {
561 match kind {
562 TapeRecordKind::ClockRead { .. } => "clock_read",
563 TapeRecordKind::ClockSleep { .. } => "clock_sleep",
564 TapeRecordKind::LlmCall { .. } => "llm_call",
565 TapeRecordKind::FileRead { .. } => "file_read",
566 TapeRecordKind::FileWrite { .. } => "file_write",
567 TapeRecordKind::FileDelete { .. } => "file_delete",
568 TapeRecordKind::ProcessSpawn { .. } => "process_spawn",
569 TapeRecordKind::Unknown => "unknown",
570 }
571}
572
573#[cfg(test)]
574mod tests {
575 use super::*;
576 use crate::testbench::tape::{TapeHeader, TapePayload, TapeRecord};
577
578 fn empty_tape() -> EventTape {
579 EventTape::new(TapeHeader::current(None, None, Vec::new()))
580 }
581
582 fn record(seq: u64, kind: TapeRecordKind) -> TapeRecord {
583 TapeRecord {
584 seq,
585 virtual_time_ms: 0,
586 monotonic_ms: 0,
587 kind,
588 }
589 }
590
591 #[test]
592 fn byte_identical_matches_when_records_align() {
593 let mut a = empty_tape();
594 let mut b = empty_tape();
595 a.records
596 .push(record(0, TapeRecordKind::ClockSleep { duration_ms: 5 }));
597 b.records
598 .push(record(0, TapeRecordKind::ClockSleep { duration_ms: 5 }));
599 let report = compare(&a, &b, FidelityMode::ByteIdentical);
600 assert!(report.is_byte_identical(), "{report:?}");
601 assert_eq!(report.score, 1.0);
602 }
603
604 #[test]
605 fn byte_identical_flags_a_drifted_clock_read() {
606 let mut a = empty_tape();
609 let mut b = empty_tape();
610 a.records
611 .push(record(0, TapeRecordKind::ClockSleep { duration_ms: 5 }));
612 b.records
613 .push(record(0, TapeRecordKind::ClockSleep { duration_ms: 7 }));
614 let report = compare(&a, &b, FidelityMode::ByteIdentical);
615 assert_eq!(report.divergences.len(), 1);
616 assert_eq!(report.divergences[0].category, "clock_sleep_duration");
617 }
618
619 #[test]
620 fn semantic_mode_ignores_pure_timing_drift() {
621 let mut a = empty_tape();
622 let mut b = empty_tape();
623 let make = |seq: u64, vt: i64| TapeRecord {
624 seq,
625 virtual_time_ms: vt,
626 monotonic_ms: vt,
627 kind: TapeRecordKind::FileWrite {
628 path: "/tmp/out.txt".to_string(),
629 content_hash: "abc".to_string(),
630 len_bytes: 3,
631 },
632 };
633 a.records.push(make(0, 0));
634 b.records.push(make(0, 1)); let strict = compare(&a, &b, FidelityMode::ByteIdentical);
636 assert!(!strict.is_byte_identical());
637 let semantic = compare(&a, &b, FidelityMode::Semantic);
638 assert!(
639 semantic.is_byte_identical(),
640 "semantic should not flag pure timing drift, got {semantic:?}"
641 );
642 }
643
644 #[test]
645 fn outcome_mode_only_compares_final_writes_and_exit() {
646 let mut a = empty_tape();
647 let mut b = empty_tape();
648 a.records.push(record(
650 0,
651 TapeRecordKind::FileWrite {
652 path: "/tmp/a".to_string(),
653 content_hash: "h1".to_string(),
654 len_bytes: 1,
655 },
656 ));
657 a.records
658 .push(record(1, TapeRecordKind::ClockSleep { duration_ms: 1000 }));
659 b.records
660 .push(record(0, TapeRecordKind::ClockSleep { duration_ms: 50 }));
661 b.records.push(record(
662 1,
663 TapeRecordKind::FileWrite {
664 path: "/tmp/a".to_string(),
665 content_hash: "h1".to_string(),
666 len_bytes: 1,
667 },
668 ));
669 let report = compare(&a, &b, FidelityMode::Outcome);
670 assert!(
671 report.divergences.is_empty(),
672 "outcome mode should ignore intermediate diffs, got {report:?}"
673 );
674 assert_eq!(report.score, 1.0);
675 }
676
677 #[test]
678 fn outcome_mode_flags_exit_code_drift() {
679 let mut a = empty_tape();
680 let mut b = empty_tape();
681 let payload = TapePayload::Inline {
682 content_hash: "ehash".to_string(),
683 text: String::new(),
684 };
685 a.records.push(record(
686 0,
687 TapeRecordKind::ProcessSpawn {
688 program: "git".to_string(),
689 args: Vec::new(),
690 cwd: None,
691 exit_code: 0,
692 duration_ms: 1,
693 stdout_payload: payload.clone(),
694 stderr_payload: payload.clone(),
695 },
696 ));
697 b.records.push(record(
698 0,
699 TapeRecordKind::ProcessSpawn {
700 program: "git".to_string(),
701 args: Vec::new(),
702 cwd: None,
703 exit_code: 1,
704 duration_ms: 1,
705 stdout_payload: payload.clone(),
706 stderr_payload: payload,
707 },
708 ));
709 let report = compare(&a, &b, FidelityMode::Outcome);
710 assert_eq!(report.divergences.len(), 1);
711 assert_eq!(report.divergences[0].category, "outcome_exit_code");
712 }
713
714 #[test]
715 fn parse_mode_accepts_aliases() {
716 assert_eq!(
717 FidelityMode::parse("byte").unwrap(),
718 FidelityMode::ByteIdentical
719 );
720 assert_eq!(
721 FidelityMode::parse("byte-identical").unwrap(),
722 FidelityMode::ByteIdentical
723 );
724 assert_eq!(
725 FidelityMode::parse("semantic").unwrap(),
726 FidelityMode::Semantic
727 );
728 assert_eq!(
729 FidelityMode::parse("outcome").unwrap(),
730 FidelityMode::Outcome
731 );
732 assert!(FidelityMode::parse("nope").is_err());
733 }
734}