Skip to main content

harn_vm/orchestration/records/
diff.rs

1//! Myers diff primitives, unified-diff rendering, and the `diff_run_records` comparator.
2
3use std::collections::{BTreeMap, BTreeSet};
4use std::path::Path;
5
6use super::action_graph::derive_run_observability;
7use super::types::{
8    RunDiffReport, RunObservabilityDiffRecord, RunRecord, RunStageDiffRecord, RunStageRecord,
9    ToolCallDiffRecord, ToolCallRecord,
10};
11
12/// Edit operation in a diff sequence.
13#[derive(Clone, Copy, PartialEq, Eq, Debug)]
14pub(crate) enum DiffOp {
15    Equal,
16    Delete,
17    Insert,
18}
19
20/// Compute the shortest edit script using Myers' O(nd) algorithm.
21/// Returns a sequence of (DiffOp, line_index_in_before_or_after).
22/// Time: O(nd) where d = edit distance. Space: O(d * n).
23pub(crate) fn myers_diff(a: &[&str], b: &[&str]) -> Vec<(DiffOp, usize)> {
24    let n = a.len() as isize;
25    let m = b.len() as isize;
26    if n == 0 && m == 0 {
27        return Vec::new();
28    }
29    if n == 0 {
30        return (0..m as usize).map(|j| (DiffOp::Insert, j)).collect();
31    }
32    if m == 0 {
33        return (0..n as usize).map(|i| (DiffOp::Delete, i)).collect();
34    }
35
36    let max_d = (n + m) as usize;
37    let offset = max_d as isize;
38    let v_size = 2 * max_d + 1;
39    let mut v = vec![0isize; v_size];
40    // trace[d] holds the `v` snapshot BEFORE step d ran — required for backtrack.
41    let mut trace: Vec<Vec<isize>> = Vec::new();
42
43    'outer: for d in 0..=max_d as isize {
44        trace.push(v.clone());
45        let mut new_v = v.clone();
46        for k in (-d..=d).step_by(2) {
47            let ki = (k + offset) as usize;
48            // Myers diff: `k == -d` is the bottom boundary, `k != d` is the
49            // top boundary — they're distinct edge cases, not a typo. Same
50            // story for `x < n && y < m`: `n` bounds `a`, `m` bounds `b`.
51            #[allow(clippy::suspicious_operation_groupings)]
52            let mut x = if k == -d || (k != d && v[ki - 1] < v[ki + 1]) {
53                v[ki + 1]
54            } else {
55                v[ki - 1] + 1
56            };
57            let mut y = x - k;
58            #[allow(clippy::suspicious_operation_groupings)]
59            while x < n && y < m && a[x as usize] == b[y as usize] {
60                x += 1;
61                y += 1;
62            }
63            new_v[ki] = x;
64            if x >= n && y >= m {
65                let _ = new_v;
66                break 'outer;
67            }
68        }
69        v = new_v;
70    }
71
72    let mut ops: Vec<(DiffOp, usize)> = Vec::new();
73    let mut x = n;
74    let mut y = m;
75    for d in (1..trace.len() as isize).rev() {
76        let k = x - y;
77        let v_prev = &trace[d as usize];
78        let prev_k = if k == -d
79            || (k != d && v_prev[(k - 1 + offset) as usize] < v_prev[(k + 1 + offset) as usize])
80        {
81            k + 1
82        } else {
83            k - 1
84        };
85        let prev_x = v_prev[(prev_k + offset) as usize];
86        let prev_y = prev_x - prev_k;
87
88        while x > prev_x && y > prev_y {
89            x -= 1;
90            y -= 1;
91            ops.push((DiffOp::Equal, x as usize));
92        }
93        if prev_k < k {
94            x -= 1;
95            ops.push((DiffOp::Delete, x as usize));
96        } else {
97            y -= 1;
98            ops.push((DiffOp::Insert, y as usize));
99        }
100    }
101    while x > 0 && y > 0 {
102        x -= 1;
103        y -= 1;
104        ops.push((DiffOp::Equal, x as usize));
105    }
106    ops.reverse();
107    ops
108}
109
110pub fn render_unified_diff(path: Option<&str>, before: &str, after: &str) -> String {
111    let before_lines: Vec<&str> = before.lines().collect();
112    let after_lines: Vec<&str> = after.lines().collect();
113    let ops = myers_diff(&before_lines, &after_lines);
114
115    let mut diff = String::new();
116    let file = path.unwrap_or("artifact");
117    diff.push_str(&format!("--- a/{file}\n+++ b/{file}\n"));
118    for &(op, idx) in &ops {
119        match op {
120            DiffOp::Equal => diff.push_str(&format!(" {}\n", before_lines[idx])),
121            DiffOp::Delete => diff.push_str(&format!("-{}\n", before_lines[idx])),
122            DiffOp::Insert => diff.push_str(&format!("+{}\n", after_lines[idx])),
123        }
124    }
125    diff
126}
127
128pub fn diff_run_records(left: &RunRecord, right: &RunRecord) -> RunDiffReport {
129    let mut stage_diffs = Vec::new();
130    let mut all_node_ids = BTreeSet::new();
131    let left_by_id: BTreeMap<&str, &RunStageRecord> = left
132        .stages
133        .iter()
134        .map(|s| (s.node_id.as_str(), s))
135        .collect();
136    let right_by_id: BTreeMap<&str, &RunStageRecord> = right
137        .stages
138        .iter()
139        .map(|s| (s.node_id.as_str(), s))
140        .collect();
141    all_node_ids.extend(left_by_id.keys().copied());
142    all_node_ids.extend(right_by_id.keys().copied());
143
144    for node_id in all_node_ids {
145        let left_stage = left_by_id.get(node_id).copied();
146        let right_stage = right_by_id.get(node_id).copied();
147        match (left_stage, right_stage) {
148            (Some(_), None) => stage_diffs.push(RunStageDiffRecord {
149                node_id: node_id.to_string(),
150                change: "removed".to_string(),
151                details: vec!["stage missing from right run".to_string()],
152            }),
153            (None, Some(_)) => stage_diffs.push(RunStageDiffRecord {
154                node_id: node_id.to_string(),
155                change: "added".to_string(),
156                details: vec!["stage missing from left run".to_string()],
157            }),
158            (Some(left_stage), Some(right_stage)) => {
159                let mut details = Vec::new();
160                if left_stage.status != right_stage.status {
161                    details.push(format!(
162                        "status: {} -> {}",
163                        left_stage.status, right_stage.status
164                    ));
165                }
166                if left_stage.outcome != right_stage.outcome {
167                    details.push(format!(
168                        "outcome: {} -> {}",
169                        left_stage.outcome, right_stage.outcome
170                    ));
171                }
172                if left_stage.branch != right_stage.branch {
173                    details.push(format!(
174                        "branch: {:?} -> {:?}",
175                        left_stage.branch, right_stage.branch
176                    ));
177                }
178                if left_stage.produced_artifact_ids.len() != right_stage.produced_artifact_ids.len()
179                {
180                    details.push(format!(
181                        "produced_artifacts: {} -> {}",
182                        left_stage.produced_artifact_ids.len(),
183                        right_stage.produced_artifact_ids.len()
184                    ));
185                }
186                if left_stage.artifacts.len() != right_stage.artifacts.len() {
187                    details.push(format!(
188                        "artifact_records: {} -> {}",
189                        left_stage.artifacts.len(),
190                        right_stage.artifacts.len()
191                    ));
192                }
193                if !details.is_empty() {
194                    stage_diffs.push(RunStageDiffRecord {
195                        node_id: node_id.to_string(),
196                        change: "changed".to_string(),
197                        details,
198                    });
199                }
200            }
201            (None, None) => {}
202        }
203    }
204
205    let mut tool_diffs = Vec::new();
206    let left_tools: std::collections::BTreeMap<(String, String), &ToolCallRecord> = left
207        .tool_recordings
208        .iter()
209        .map(|r| ((r.tool_name.clone(), r.args_hash.clone()), r))
210        .collect();
211    let right_tools: std::collections::BTreeMap<(String, String), &ToolCallRecord> = right
212        .tool_recordings
213        .iter()
214        .map(|r| ((r.tool_name.clone(), r.args_hash.clone()), r))
215        .collect();
216    let all_tool_keys: std::collections::BTreeSet<_> = left_tools
217        .keys()
218        .chain(right_tools.keys())
219        .cloned()
220        .collect();
221    for key in &all_tool_keys {
222        let l = left_tools.get(key);
223        let r = right_tools.get(key);
224        let result_changed = match (l, r) {
225            (Some(a), Some(b)) => a.result != b.result,
226            _ => true,
227        };
228        if result_changed {
229            tool_diffs.push(ToolCallDiffRecord {
230                tool_name: key.0.clone(),
231                args_hash: key.1.clone(),
232                result_changed,
233                left_result: l.map(|t| t.result.clone()),
234                right_result: r.map(|t| t.result.clone()),
235            });
236        }
237    }
238
239    let left_observability = left.observability.clone().unwrap_or_else(|| {
240        derive_run_observability(left, left.persisted_path.as_deref().map(Path::new))
241    });
242    let right_observability = right.observability.clone().unwrap_or_else(|| {
243        derive_run_observability(right, right.persisted_path.as_deref().map(Path::new))
244    });
245    let mut observability_diffs = Vec::new();
246
247    let left_workers = left_observability
248        .worker_lineage
249        .iter()
250        .map(|worker| {
251            (
252                worker.worker_id.clone(),
253                (
254                    worker.status.clone(),
255                    worker.run_id.clone(),
256                    worker.run_path.clone(),
257                ),
258            )
259        })
260        .collect::<BTreeMap<_, _>>();
261    let right_workers = right_observability
262        .worker_lineage
263        .iter()
264        .map(|worker| {
265            (
266                worker.worker_id.clone(),
267                (
268                    worker.status.clone(),
269                    worker.run_id.clone(),
270                    worker.run_path.clone(),
271                ),
272            )
273        })
274        .collect::<BTreeMap<_, _>>();
275    let worker_ids = left_workers
276        .keys()
277        .chain(right_workers.keys())
278        .cloned()
279        .collect::<BTreeSet<_>>();
280    for worker_id in worker_ids {
281        match (left_workers.get(&worker_id), right_workers.get(&worker_id)) {
282            (Some(_), None) => observability_diffs.push(RunObservabilityDiffRecord {
283                section: "worker_lineage".to_string(),
284                label: worker_id,
285                details: vec!["worker missing from right run".to_string()],
286            }),
287            (None, Some(_)) => observability_diffs.push(RunObservabilityDiffRecord {
288                section: "worker_lineage".to_string(),
289                label: worker_id,
290                details: vec!["worker missing from left run".to_string()],
291            }),
292            (Some(left_worker), Some(right_worker)) if left_worker != right_worker => {
293                let mut details = Vec::new();
294                if left_worker.0 != right_worker.0 {
295                    details.push(format!("status: {} -> {}", left_worker.0, right_worker.0));
296                }
297                if left_worker.1 != right_worker.1 {
298                    details.push(format!(
299                        "run_id: {:?} -> {:?}",
300                        left_worker.1, right_worker.1
301                    ));
302                }
303                if left_worker.2 != right_worker.2 {
304                    details.push(format!(
305                        "run_path: {:?} -> {:?}",
306                        left_worker.2, right_worker.2
307                    ));
308                }
309                observability_diffs.push(RunObservabilityDiffRecord {
310                    section: "worker_lineage".to_string(),
311                    label: worker_id,
312                    details,
313                });
314            }
315            _ => {}
316        }
317    }
318
319    let left_rounds = left_observability
320        .planner_rounds
321        .iter()
322        .map(|round| (round.stage_id.clone(), round))
323        .collect::<BTreeMap<_, _>>();
324    let right_rounds = right_observability
325        .planner_rounds
326        .iter()
327        .map(|round| (round.stage_id.clone(), round))
328        .collect::<BTreeMap<_, _>>();
329    let round_ids = left_rounds
330        .keys()
331        .chain(right_rounds.keys())
332        .cloned()
333        .collect::<BTreeSet<_>>();
334    for stage_id in round_ids {
335        match (left_rounds.get(&stage_id), right_rounds.get(&stage_id)) {
336            (Some(_), None) => observability_diffs.push(RunObservabilityDiffRecord {
337                section: "planner_rounds".to_string(),
338                label: stage_id,
339                details: vec!["planner summary missing from right run".to_string()],
340            }),
341            (None, Some(_)) => observability_diffs.push(RunObservabilityDiffRecord {
342                section: "planner_rounds".to_string(),
343                label: stage_id,
344                details: vec!["planner summary missing from left run".to_string()],
345            }),
346            (Some(left_round), Some(right_round)) => {
347                let mut details = Vec::new();
348                if left_round.iteration_count != right_round.iteration_count {
349                    details.push(format!(
350                        "iterations: {} -> {}",
351                        left_round.iteration_count, right_round.iteration_count
352                    ));
353                }
354                if left_round.tool_execution_count != right_round.tool_execution_count {
355                    details.push(format!(
356                        "tool_executions: {} -> {}",
357                        left_round.tool_execution_count, right_round.tool_execution_count
358                    ));
359                }
360                if left_round.native_text_tool_fallback_count
361                    != right_round.native_text_tool_fallback_count
362                {
363                    details.push(format!(
364                        "native_text_tool_fallbacks: {} -> {}",
365                        left_round.native_text_tool_fallback_count,
366                        right_round.native_text_tool_fallback_count
367                    ));
368                }
369                if left_round.native_text_tool_fallback_rejection_count
370                    != right_round.native_text_tool_fallback_rejection_count
371                {
372                    details.push(format!(
373                        "native_text_tool_fallback_rejections: {} -> {}",
374                        left_round.native_text_tool_fallback_rejection_count,
375                        right_round.native_text_tool_fallback_rejection_count
376                    ));
377                }
378                if left_round.empty_completion_retry_count
379                    != right_round.empty_completion_retry_count
380                {
381                    details.push(format!(
382                        "empty_completion_retries: {} -> {}",
383                        left_round.empty_completion_retry_count,
384                        right_round.empty_completion_retry_count
385                    ));
386                }
387                if left_round.research_facts != right_round.research_facts {
388                    details.push(format!(
389                        "research_facts: {:?} -> {:?}",
390                        left_round.research_facts, right_round.research_facts
391                    ));
392                }
393                let left_deliverables = left_round
394                    .task_ledger
395                    .as_ref()
396                    .map(|ledger| {
397                        ledger
398                            .deliverables
399                            .iter()
400                            .map(|item| format!("{}:{}", item.id, item.status))
401                            .collect::<Vec<_>>()
402                    })
403                    .unwrap_or_default();
404                let right_deliverables = right_round
405                    .task_ledger
406                    .as_ref()
407                    .map(|ledger| {
408                        ledger
409                            .deliverables
410                            .iter()
411                            .map(|item| format!("{}:{}", item.id, item.status))
412                            .collect::<Vec<_>>()
413                    })
414                    .unwrap_or_default();
415                if left_deliverables != right_deliverables {
416                    details.push(format!(
417                        "deliverables: {left_deliverables:?} -> {right_deliverables:?}"
418                    ));
419                }
420                if left_round.successful_tools != right_round.successful_tools {
421                    details.push(format!(
422                        "successful_tools: {:?} -> {:?}",
423                        left_round.successful_tools, right_round.successful_tools
424                    ));
425                }
426                if !details.is_empty() {
427                    observability_diffs.push(RunObservabilityDiffRecord {
428                        section: "planner_rounds".to_string(),
429                        label: left_round.node_id.clone(),
430                        details,
431                    });
432                }
433            }
434            _ => {}
435        }
436    }
437
438    let left_pointers = left_observability
439        .transcript_pointers
440        .iter()
441        .map(|pointer| {
442            (
443                pointer.id.clone(),
444                (
445                    pointer.available,
446                    pointer.path.clone(),
447                    pointer.location.clone(),
448                ),
449            )
450        })
451        .collect::<BTreeMap<_, _>>();
452    let right_pointers = right_observability
453        .transcript_pointers
454        .iter()
455        .map(|pointer| {
456            (
457                pointer.id.clone(),
458                (
459                    pointer.available,
460                    pointer.path.clone(),
461                    pointer.location.clone(),
462                ),
463            )
464        })
465        .collect::<BTreeMap<_, _>>();
466    let pointer_ids = left_pointers
467        .keys()
468        .chain(right_pointers.keys())
469        .cloned()
470        .collect::<BTreeSet<_>>();
471    for pointer_id in pointer_ids {
472        match (
473            left_pointers.get(&pointer_id),
474            right_pointers.get(&pointer_id),
475        ) {
476            (Some(_), None) => observability_diffs.push(RunObservabilityDiffRecord {
477                section: "transcript_pointers".to_string(),
478                label: pointer_id,
479                details: vec!["pointer missing from right run".to_string()],
480            }),
481            (None, Some(_)) => observability_diffs.push(RunObservabilityDiffRecord {
482                section: "transcript_pointers".to_string(),
483                label: pointer_id,
484                details: vec!["pointer missing from left run".to_string()],
485            }),
486            (Some(left_pointer), Some(right_pointer)) if left_pointer != right_pointer => {
487                observability_diffs.push(RunObservabilityDiffRecord {
488                    section: "transcript_pointers".to_string(),
489                    label: pointer_id,
490                    details: vec![format!(
491                        "pointer: {:?} -> {:?}",
492                        left_pointer, right_pointer
493                    )],
494                });
495            }
496            _ => {}
497        }
498    }
499
500    let left_compactions = left_observability
501        .compaction_events
502        .iter()
503        .map(|event| {
504            (
505                event.id.clone(),
506                (
507                    event.strategy.clone(),
508                    event.archived_messages,
509                    event.snapshot_asset_id.clone(),
510                    event.available,
511                ),
512            )
513        })
514        .collect::<BTreeMap<_, _>>();
515    let right_compactions = right_observability
516        .compaction_events
517        .iter()
518        .map(|event| {
519            (
520                event.id.clone(),
521                (
522                    event.strategy.clone(),
523                    event.archived_messages,
524                    event.snapshot_asset_id.clone(),
525                    event.available,
526                ),
527            )
528        })
529        .collect::<BTreeMap<_, _>>();
530    let compaction_ids = left_compactions
531        .keys()
532        .chain(right_compactions.keys())
533        .cloned()
534        .collect::<BTreeSet<_>>();
535    for compaction_id in compaction_ids {
536        match (
537            left_compactions.get(&compaction_id),
538            right_compactions.get(&compaction_id),
539        ) {
540            (Some(_), None) => observability_diffs.push(RunObservabilityDiffRecord {
541                section: "compaction_events".to_string(),
542                label: compaction_id,
543                details: vec!["compaction event missing from right run".to_string()],
544            }),
545            (None, Some(_)) => observability_diffs.push(RunObservabilityDiffRecord {
546                section: "compaction_events".to_string(),
547                label: compaction_id,
548                details: vec!["compaction event missing from left run".to_string()],
549            }),
550            (Some(left_event), Some(right_event)) if left_event != right_event => {
551                observability_diffs.push(RunObservabilityDiffRecord {
552                    section: "compaction_events".to_string(),
553                    label: compaction_id,
554                    details: vec![format!("event: {:?} -> {:?}", left_event, right_event)],
555                });
556            }
557            _ => {}
558        }
559    }
560
561    let left_daemons = left_observability
562        .daemon_events
563        .iter()
564        .map(|event| {
565            (
566                (event.daemon_id.clone(), event.kind, event.timestamp.clone()),
567                (
568                    event.name.clone(),
569                    event.persist_path.clone(),
570                    event.payload_summary.clone(),
571                ),
572            )
573        })
574        .collect::<BTreeMap<_, _>>();
575    let right_daemons = right_observability
576        .daemon_events
577        .iter()
578        .map(|event| {
579            (
580                (event.daemon_id.clone(), event.kind, event.timestamp.clone()),
581                (
582                    event.name.clone(),
583                    event.persist_path.clone(),
584                    event.payload_summary.clone(),
585                ),
586            )
587        })
588        .collect::<BTreeMap<_, _>>();
589    let daemon_keys = left_daemons
590        .keys()
591        .chain(right_daemons.keys())
592        .cloned()
593        .collect::<BTreeSet<_>>();
594    for daemon_key in daemon_keys {
595        let label = format!("{}:{:?}:{}", daemon_key.0, daemon_key.1, daemon_key.2);
596        match (
597            left_daemons.get(&daemon_key),
598            right_daemons.get(&daemon_key),
599        ) {
600            (Some(_), None) => observability_diffs.push(RunObservabilityDiffRecord {
601                section: "daemon_events".to_string(),
602                label,
603                details: vec!["daemon event missing from right run".to_string()],
604            }),
605            (None, Some(_)) => observability_diffs.push(RunObservabilityDiffRecord {
606                section: "daemon_events".to_string(),
607                label,
608                details: vec!["daemon event missing from left run".to_string()],
609            }),
610            (Some(left_event), Some(right_event)) if left_event != right_event => {
611                observability_diffs.push(RunObservabilityDiffRecord {
612                    section: "daemon_events".to_string(),
613                    label,
614                    details: vec![format!("event: {:?} -> {:?}", left_event, right_event)],
615                });
616            }
617            _ => {}
618        }
619    }
620
621    let left_verification = left_observability
622        .verification_outcomes
623        .iter()
624        .map(|item| (item.stage_id.clone(), item))
625        .collect::<BTreeMap<_, _>>();
626    let right_verification = right_observability
627        .verification_outcomes
628        .iter()
629        .map(|item| (item.stage_id.clone(), item))
630        .collect::<BTreeMap<_, _>>();
631    let verification_ids = left_verification
632        .keys()
633        .chain(right_verification.keys())
634        .cloned()
635        .collect::<BTreeSet<_>>();
636    for stage_id in verification_ids {
637        match (
638            left_verification.get(&stage_id),
639            right_verification.get(&stage_id),
640        ) {
641            (Some(_), None) => observability_diffs.push(RunObservabilityDiffRecord {
642                section: "verification".to_string(),
643                label: stage_id,
644                details: vec!["verification missing from right run".to_string()],
645            }),
646            (None, Some(_)) => observability_diffs.push(RunObservabilityDiffRecord {
647                section: "verification".to_string(),
648                label: stage_id,
649                details: vec!["verification missing from left run".to_string()],
650            }),
651            (Some(left_item), Some(right_item)) if left_item != right_item => {
652                let mut details = Vec::new();
653                if left_item.passed != right_item.passed {
654                    details.push(format!(
655                        "passed: {:?} -> {:?}",
656                        left_item.passed, right_item.passed
657                    ));
658                }
659                if left_item.summary != right_item.summary {
660                    details.push(format!(
661                        "summary: {:?} -> {:?}",
662                        left_item.summary, right_item.summary
663                    ));
664                }
665                observability_diffs.push(RunObservabilityDiffRecord {
666                    section: "verification".to_string(),
667                    label: left_item.node_id.clone(),
668                    details,
669                });
670            }
671            _ => {}
672        }
673    }
674
675    let left_graph = (
676        left_observability.action_graph_nodes.len(),
677        left_observability.action_graph_edges.len(),
678    );
679    let right_graph = (
680        right_observability.action_graph_nodes.len(),
681        right_observability.action_graph_edges.len(),
682    );
683    if left_graph != right_graph {
684        observability_diffs.push(RunObservabilityDiffRecord {
685            section: "action_graph".to_string(),
686            label: "shape".to_string(),
687            details: vec![format!(
688                "nodes/edges: {}/{} -> {}/{}",
689                left_graph.0, left_graph.1, right_graph.0, right_graph.1
690            )],
691        });
692    }
693
694    let status_changed = left.status != right.status;
695    let identical = !status_changed
696        && stage_diffs.is_empty()
697        && tool_diffs.is_empty()
698        && observability_diffs.is_empty()
699        && left.transitions.len() == right.transitions.len()
700        && left.artifacts.len() == right.artifacts.len()
701        && left.checkpoints.len() == right.checkpoints.len();
702
703    RunDiffReport {
704        left_run_id: left.id.clone(),
705        right_run_id: right.id.clone(),
706        identical,
707        status_changed,
708        left_status: left.status.clone(),
709        right_status: right.status.clone(),
710        stage_diffs,
711        tool_diffs,
712        observability_diffs,
713        transition_count_delta: right.transitions.len() as isize - left.transitions.len() as isize,
714        artifact_count_delta: right.artifacts.len() as isize - left.artifacts.len() as isize,
715        checkpoint_count_delta: right.checkpoints.len() as isize - left.checkpoints.len() as isize,
716    }
717}