Skip to main content

vela_protocol/
diff.rs

1//! `vela diff` — structural comparison of two frontiers.
2
3use std::collections::{HashMap, HashSet};
4use std::path::Path;
5
6use colored::Colorize;
7
8use crate::cli_style as style;
9use serde::Serialize;
10
11use crate::bundle::{ReviewAction, ReviewEvent};
12use crate::events;
13use crate::project::{Project, ProjectDependency};
14use crate::proposals;
15use crate::repo;
16
17/// Result of comparing two frontiers.
18#[derive(Debug, Serialize)]
19pub struct DiffResult {
20    pub name_a: String,
21    pub name_b: String,
22    pub findings_a: usize,
23    pub findings_b: usize,
24    pub only_in_a: Vec<FindingSummary>,
25    pub only_in_b: Vec<FindingSummary>,
26    pub only_in_a_reviews: Vec<ReviewSummary>,
27    pub only_in_b_reviews: Vec<ReviewSummary>,
28    pub only_in_a_dependencies: Vec<DependencySummary>,
29    pub only_in_b_dependencies: Vec<DependencySummary>,
30    pub semantic_pairs: Vec<SemanticPair>,
31    pub field_changes: Vec<FieldChange>,
32    pub confidence_changes: Vec<ConfidenceChange>,
33    pub new_contradictions: Vec<ContradictionSummary>,
34    pub entities_only_in_a: Vec<String>,
35    pub entities_only_in_b: Vec<String>,
36    pub projections: ProjectionDiff,
37    pub proposal_state: ProposalStateDiff,
38    pub event_log: EventLogDiff,
39    pub proof_state: ProofStateDiff,
40    pub review_impacts: Vec<ReviewImpact>,
41    pub stats_comparison: StatsComparison,
42}
43
44#[derive(Debug, Serialize)]
45pub struct ProjectionDiff {
46    pub sources: (usize, usize),
47    pub evidence_atoms: (usize, usize),
48    pub condition_records: (usize, usize),
49}
50
51#[derive(Debug, Serialize)]
52pub struct ProposalStateDiff {
53    pub total: (usize, usize),
54    pub pending_review: (usize, usize),
55    pub applied: (usize, usize),
56}
57
58#[derive(Debug, Serialize)]
59pub struct EventLogDiff {
60    pub events: (usize, usize),
61    pub kinds_only_in_a: Vec<String>,
62    pub kinds_only_in_b: Vec<String>,
63}
64
65#[derive(Debug, Serialize)]
66pub struct ProofStateDiff {
67    pub status_a: String,
68    pub status_b: String,
69    pub stale_reason_a: Option<String>,
70    pub stale_reason_b: Option<String>,
71}
72
73#[derive(Debug, Serialize)]
74pub struct ReviewImpact {
75    pub kind: String,
76    pub message: String,
77}
78
79#[derive(Debug, Serialize)]
80pub struct DependencySummary {
81    pub name: String,
82    pub source: String,
83    pub version: String,
84}
85
86#[derive(Debug, Serialize)]
87pub struct ReviewSummary {
88    pub id: String,
89    pub finding_id: String,
90    pub reviewer: String,
91    pub action: String,
92    pub reason: String,
93}
94
95#[derive(Debug, Serialize)]
96pub struct FindingSummary {
97    pub id: String,
98    pub assertion: String,
99}
100
101#[derive(Debug, Serialize)]
102pub struct ConfidenceChange {
103    pub id: String,
104    pub assertion: String,
105    pub score_a: f64,
106    pub score_b: f64,
107    pub delta: f64,
108}
109
110#[derive(Debug, Serialize)]
111pub struct ContradictionSummary {
112    pub from_id: String,
113    pub target_id: String,
114    pub note: String,
115}
116
117#[derive(Debug, Serialize)]
118pub struct SemanticPair {
119    pub id_a: String,
120    pub id_b: String,
121    pub score: f64,
122    pub reason: String,
123    pub assertion_a: String,
124    pub assertion_b: String,
125}
126
127#[derive(Debug, Serialize)]
128pub struct FieldChange {
129    pub id_a: String,
130    pub id_b: String,
131    pub field: String,
132    pub value_a: serde_json::Value,
133    pub value_b: serde_json::Value,
134}
135
136#[derive(Debug, Serialize)]
137pub struct StatsComparison {
138    pub findings: (usize, usize),
139    pub links: (usize, usize),
140    pub replicated: (usize, usize),
141    pub gaps: (usize, usize),
142    pub contested: (usize, usize),
143    pub review_events: (usize, usize),
144    pub avg_confidence: (f64, f64),
145}
146
147#[derive(Debug, Serialize)]
148pub struct DiffJsonEnvelope<'a> {
149    pub schema: &'static str,
150    pub ok: bool,
151    pub generated_at: String,
152    pub command: &'static str,
153    pub sources: DiffSources<'a>,
154    pub summary: DiffSummary,
155    pub diff: &'a DiffResult,
156}
157
158#[derive(Debug, Serialize)]
159pub struct DiffSources<'a> {
160    pub a: &'a str,
161    pub b: &'a str,
162}
163
164#[derive(Debug, Serialize)]
165pub struct DiffSummary {
166    pub findings_a: usize,
167    pub findings_b: usize,
168    pub only_in_a: usize,
169    pub only_in_b: usize,
170    pub semantic_pairs: usize,
171    pub field_changes: usize,
172    pub confidence_changes: usize,
173    pub new_contradictions: usize,
174    pub review_events_only_in_a: usize,
175    pub review_events_only_in_b: usize,
176    pub review_impacts: usize,
177}
178
179fn truncate(s: &str, max: usize) -> String {
180    if s.len() <= max {
181        s.to_string()
182    } else {
183        let mut end = max;
184        while end > 0 && !s.is_char_boundary(end) {
185            end -= 1;
186        }
187        format!("{}...", &s[..end])
188    }
189}
190
191fn summarize_review(event: &ReviewEvent) -> ReviewSummary {
192    ReviewSummary {
193        id: event.id.clone(),
194        finding_id: event.finding_id.clone(),
195        reviewer: event.reviewer.clone(),
196        action: review_action_label(&event.action),
197        reason: event.reason.clone(),
198    }
199}
200
201fn summarize_dependency(dep: &ProjectDependency) -> DependencySummary {
202    DependencySummary {
203        name: dep.name.clone(),
204        source: dep.source.clone(),
205        version: dep.version.clone().unwrap_or_else(|| "-".into()),
206    }
207}
208
209fn review_action_label(action: &ReviewAction) -> String {
210    match action {
211        ReviewAction::Approved => "approved".to_string(),
212        ReviewAction::Qualified { .. } => "qualified".to_string(),
213        ReviewAction::Corrected { field, .. } => format!("corrected:{field}"),
214        ReviewAction::Flagged { flag_type } => format!("flagged:{flag_type}"),
215        ReviewAction::Disputed { .. } => "disputed".to_string(),
216    }
217}
218
219fn semantic_key(f: &crate::bundle::FindingBundle) -> String {
220    normalize_text(&format!(
221        "{} {} {}",
222        f.assertion.assertion_type, f.assertion.text, f.conditions.text
223    ))
224}
225
226fn normalize_text(value: &str) -> String {
227    value
228        .to_lowercase()
229        .chars()
230        .map(|c| if c.is_ascii_alphanumeric() { c } else { ' ' })
231        .collect::<String>()
232        .split_whitespace()
233        .collect::<Vec<_>>()
234        .join(" ")
235}
236
237fn token_set(value: &str) -> HashSet<String> {
238    normalize_text(value)
239        .split_whitespace()
240        .filter(|token| token.len() > 2)
241        .map(str::to_string)
242        .collect()
243}
244
245fn jaccard(a: &HashSet<String>, b: &HashSet<String>) -> f64 {
246    if a.is_empty() && b.is_empty() {
247        return 1.0;
248    }
249    let intersection = a.intersection(b).count() as f64;
250    let union = a.union(b).count() as f64;
251    if union == 0.0 {
252        0.0
253    } else {
254        intersection / union
255    }
256}
257
258fn semantic_similarity(
259    a: &crate::bundle::FindingBundle,
260    b: &crate::bundle::FindingBundle,
261) -> (f64, String) {
262    let key_a = semantic_key(a);
263    let key_b = semantic_key(b);
264    if key_a == key_b {
265        return (
266            1.0,
267            "normalized assertion/type/conditions match".to_string(),
268        );
269    }
270
271    let tokens_a = token_set(&key_a);
272    let tokens_b = token_set(&key_b);
273    let token_score = jaccard(&tokens_a, &tokens_b);
274    let doi_match = a.provenance.doi.is_some() && a.provenance.doi == b.provenance.doi;
275    let pmid_match = a.provenance.pmid.is_some() && a.provenance.pmid == b.provenance.pmid;
276    let type_match = a.assertion.assertion_type == b.assertion.assertion_type;
277    let provenance_boost = if doi_match || pmid_match { 0.25 } else { 0.0 };
278    let type_boost = if type_match { 0.1 } else { 0.0 };
279    let score = (token_score + provenance_boost + type_boost).min(1.0);
280    let reason = if doi_match {
281        "shared DOI with similar assertion".to_string()
282    } else if pmid_match {
283        "shared PMID with similar assertion".to_string()
284    } else if type_match {
285        "same assertion type with similar text".to_string()
286    } else {
287        "similar assertion text".to_string()
288    };
289    (score, reason)
290}
291
292fn value_str(value: impl Into<String>) -> serde_json::Value {
293    serde_json::Value::String(value.into())
294}
295
296fn push_field_change(
297    changes: &mut Vec<FieldChange>,
298    id_a: &str,
299    id_b: &str,
300    field: &str,
301    value_a: serde_json::Value,
302    value_b: serde_json::Value,
303) {
304    if value_a != value_b {
305        changes.push(FieldChange {
306            id_a: id_a.to_string(),
307            id_b: id_b.to_string(),
308            field: field.to_string(),
309            value_a,
310            value_b,
311        });
312    }
313}
314
315fn finding_field_changes(
316    id_a: &str,
317    a: &crate::bundle::FindingBundle,
318    id_b: &str,
319    b: &crate::bundle::FindingBundle,
320) -> Vec<FieldChange> {
321    let mut changes = Vec::new();
322    push_field_change(
323        &mut changes,
324        id_a,
325        id_b,
326        "assertion.text",
327        value_str(a.assertion.text.clone()),
328        value_str(b.assertion.text.clone()),
329    );
330    push_field_change(
331        &mut changes,
332        id_a,
333        id_b,
334        "assertion.assertion_type",
335        value_str(a.assertion.assertion_type.clone()),
336        value_str(b.assertion.assertion_type.clone()),
337    );
338    push_field_change(
339        &mut changes,
340        id_a,
341        id_b,
342        "conditions.text",
343        value_str(a.conditions.text.clone()),
344        value_str(b.conditions.text.clone()),
345    );
346    push_field_change(
347        &mut changes,
348        id_a,
349        id_b,
350        "confidence.score",
351        serde_json::json!(a.confidence.score),
352        serde_json::json!(b.confidence.score),
353    );
354    push_field_change(
355        &mut changes,
356        id_a,
357        id_b,
358        "evidence.evidence_type",
359        value_str(a.evidence.evidence_type.clone()),
360        value_str(b.evidence.evidence_type.clone()),
361    );
362    push_field_change(
363        &mut changes,
364        id_a,
365        id_b,
366        "evidence.method",
367        value_str(a.evidence.method.clone()),
368        value_str(b.evidence.method.clone()),
369    );
370    push_field_change(
371        &mut changes,
372        id_a,
373        id_b,
374        "evidence.replicated",
375        serde_json::json!(a.evidence.replicated),
376        serde_json::json!(b.evidence.replicated),
377    );
378    push_field_change(
379        &mut changes,
380        id_a,
381        id_b,
382        "flags.gap",
383        serde_json::json!(a.flags.gap),
384        serde_json::json!(b.flags.gap),
385    );
386    push_field_change(
387        &mut changes,
388        id_a,
389        id_b,
390        "flags.contested",
391        serde_json::json!(a.flags.contested),
392        serde_json::json!(b.flags.contested),
393    );
394    push_field_change(
395        &mut changes,
396        id_a,
397        id_b,
398        "provenance.title",
399        value_str(a.provenance.title.clone()),
400        value_str(b.provenance.title.clone()),
401    );
402    push_field_change(
403        &mut changes,
404        id_a,
405        id_b,
406        "provenance.doi",
407        serde_json::json!(a.provenance.doi.clone()),
408        serde_json::json!(b.provenance.doi.clone()),
409    );
410    changes
411}
412
413pub fn compare(a: &Project, b: &Project) -> DiffResult {
414    let ids_a: HashSet<&str> = a.findings.iter().map(|f| f.id.as_str()).collect();
415    let ids_b: HashSet<&str> = b.findings.iter().map(|f| f.id.as_str()).collect();
416
417    let map_a: HashMap<&str, &crate::bundle::FindingBundle> =
418        a.findings.iter().map(|f| (f.id.as_str(), f)).collect();
419    let map_b: HashMap<&str, &crate::bundle::FindingBundle> =
420        b.findings.iter().map(|f| (f.id.as_str(), f)).collect();
421    let review_ids_a: HashSet<&str> = a.review_events.iter().map(|r| r.id.as_str()).collect();
422    let review_ids_b: HashSet<&str> = b.review_events.iter().map(|r| r.id.as_str()).collect();
423    let review_map_a: HashMap<&str, &ReviewEvent> = a
424        .review_events
425        .iter()
426        .map(|event| (event.id.as_str(), event))
427        .collect();
428    let review_map_b: HashMap<&str, &ReviewEvent> = b
429        .review_events
430        .iter()
431        .map(|event| (event.id.as_str(), event))
432        .collect();
433    let dep_ids_a: HashSet<String> = a
434        .project
435        .dependencies
436        .iter()
437        .map(|dep| format!("{}::{}", dep.name, dep.source))
438        .collect();
439    let dep_ids_b: HashSet<String> = b
440        .project
441        .dependencies
442        .iter()
443        .map(|dep| format!("{}::{}", dep.name, dep.source))
444        .collect();
445    let dep_map_a: HashMap<String, &ProjectDependency> = a
446        .project
447        .dependencies
448        .iter()
449        .map(|dep| (format!("{}::{}", dep.name, dep.source), dep))
450        .collect();
451    let dep_map_b: HashMap<String, &ProjectDependency> = b
452        .project
453        .dependencies
454        .iter()
455        .map(|dep| (format!("{}::{}", dep.name, dep.source), dep))
456        .collect();
457
458    // Findings only in A / only in B
459    let only_in_a: Vec<FindingSummary> = ids_a
460        .difference(&ids_b)
461        .map(|id| {
462            let f = map_a[id];
463            FindingSummary {
464                id: f.id.clone(),
465                assertion: f.assertion.text.clone(),
466            }
467        })
468        .collect();
469
470    let only_in_b: Vec<FindingSummary> = ids_b
471        .difference(&ids_a)
472        .map(|id| {
473            let f = map_b[id];
474            FindingSummary {
475                id: f.id.clone(),
476                assertion: f.assertion.text.clone(),
477            }
478        })
479        .collect();
480
481    let only_in_a_reviews: Vec<ReviewSummary> = review_ids_a
482        .difference(&review_ids_b)
483        .map(|id| summarize_review(review_map_a[id]))
484        .collect();
485    let only_in_b_reviews: Vec<ReviewSummary> = review_ids_b
486        .difference(&review_ids_a)
487        .map(|id| summarize_review(review_map_b[id]))
488        .collect();
489    let only_in_a_dependencies: Vec<DependencySummary> = dep_ids_a
490        .difference(&dep_ids_b)
491        .map(|id| summarize_dependency(dep_map_a[id]))
492        .collect();
493    let only_in_b_dependencies: Vec<DependencySummary> = dep_ids_b
494        .difference(&dep_ids_a)
495        .map(|id| summarize_dependency(dep_map_b[id]))
496        .collect();
497
498    let mut semantic_pairs = Vec::new();
499    let mut paired_a: HashSet<String> = HashSet::new();
500    let mut paired_b: HashSet<String> = HashSet::new();
501    let only_a_ids: Vec<&str> = ids_a.difference(&ids_b).copied().collect();
502    let only_b_ids: Vec<&str> = ids_b.difference(&ids_a).copied().collect();
503    let mut candidates: Vec<(f64, String, &str, &str)> = Vec::new();
504    for id_a in &only_a_ids {
505        for id_b in &only_b_ids {
506            let (score, reason) = semantic_similarity(map_a[id_a], map_b[id_b]);
507            if score >= 0.72 {
508                candidates.push((score, reason, *id_a, *id_b));
509            }
510        }
511    }
512    candidates.sort_by(|a, b| b.0.partial_cmp(&a.0).unwrap());
513    for (score, reason, id_a, id_b) in candidates {
514        if paired_a.contains(id_a) || paired_b.contains(id_b) {
515            continue;
516        }
517        paired_a.insert(id_a.to_string());
518        paired_b.insert(id_b.to_string());
519        semantic_pairs.push(SemanticPair {
520            id_a: id_a.to_string(),
521            id_b: id_b.to_string(),
522            score: (score * 1000.0).round() / 1000.0,
523            reason,
524            assertion_a: map_a[id_a].assertion.text.clone(),
525            assertion_b: map_b[id_b].assertion.text.clone(),
526        });
527    }
528
529    // Shared findings with confidence changes
530    let shared: Vec<&str> = ids_a.intersection(&ids_b).copied().collect();
531    let mut confidence_changes: Vec<ConfidenceChange> = Vec::new();
532    let mut field_changes: Vec<FieldChange> = Vec::new();
533    for id in &shared {
534        let fa = map_a[id];
535        let fb = map_b[id];
536        field_changes.extend(finding_field_changes(id, fa, id, fb));
537        let delta = fb.confidence.score - fa.confidence.score;
538        if delta.abs() > 1e-6 {
539            confidence_changes.push(ConfidenceChange {
540                id: id.to_string(),
541                assertion: fa.assertion.text.clone(),
542                score_a: fa.confidence.score,
543                score_b: fb.confidence.score,
544                delta,
545            });
546        }
547    }
548    for pair in &semantic_pairs {
549        field_changes.extend(finding_field_changes(
550            &pair.id_a,
551            map_a[pair.id_a.as_str()],
552            &pair.id_b,
553            map_b[pair.id_b.as_str()],
554        ));
555    }
556    confidence_changes.sort_by(|a, b| b.delta.abs().partial_cmp(&a.delta.abs()).unwrap());
557    field_changes.sort_by(|a, b| {
558        a.id_a
559            .cmp(&b.id_a)
560            .then_with(|| a.id_b.cmp(&b.id_b))
561            .then_with(|| a.field.cmp(&b.field))
562    });
563
564    // Contradiction links in B that don't exist in A
565    let contradictions_a: HashSet<(String, String)> = a
566        .findings
567        .iter()
568        .flat_map(|f| {
569            f.links
570                .iter()
571                .filter(|l| l.link_type == "contradicts")
572                .map(move |l| (f.id.clone(), l.target.clone()))
573        })
574        .collect();
575
576    let new_contradictions: Vec<ContradictionSummary> = b
577        .findings
578        .iter()
579        .flat_map(|f| {
580            f.links
581                .iter()
582                .filter(|l| l.link_type == "contradicts")
583                .filter(|l| !contradictions_a.contains(&(f.id.clone(), l.target.clone())))
584                .map(move |l| ContradictionSummary {
585                    from_id: f.id.clone(),
586                    target_id: l.target.clone(),
587                    note: l.note.clone(),
588                })
589        })
590        .collect();
591
592    // Entity coverage: collect resolved entity names
593    fn resolved_entities(c: &Project) -> HashSet<String> {
594        c.findings
595            .iter()
596            .flat_map(|f| {
597                f.assertion.entities.iter().filter_map(|e| {
598                    if e.canonical_id.is_some() {
599                        Some(e.name.clone())
600                    } else {
601                        None
602                    }
603                })
604            })
605            .collect()
606    }
607
608    let entities_a = resolved_entities(a);
609    let entities_b = resolved_entities(b);
610
611    let mut entities_only_in_a: Vec<String> = entities_a.difference(&entities_b).cloned().collect();
612    let mut entities_only_in_b: Vec<String> = entities_b.difference(&entities_a).cloned().collect();
613    entities_only_in_a.sort();
614    entities_only_in_b.sort();
615
616    let proposal_summary_a = proposals::summary(a);
617    let proposal_summary_b = proposals::summary(b);
618    let event_summary_a = events::summarize(a);
619    let event_summary_b = events::summarize(b);
620    let kinds_a = event_summary_a
621        .kinds
622        .keys()
623        .cloned()
624        .collect::<HashSet<_>>();
625    let kinds_b = event_summary_b
626        .kinds
627        .keys()
628        .cloned()
629        .collect::<HashSet<_>>();
630    let mut kinds_only_in_a = kinds_a.difference(&kinds_b).cloned().collect::<Vec<_>>();
631    let mut kinds_only_in_b = kinds_b.difference(&kinds_a).cloned().collect::<Vec<_>>();
632    kinds_only_in_a.sort();
633    kinds_only_in_b.sort();
634
635    let mut review_impacts = Vec::new();
636    if a.proof_state.latest_packet.status != b.proof_state.latest_packet.status {
637        review_impacts.push(ReviewImpact {
638            kind: "proof_state".to_string(),
639            message: format!(
640                "Proof freshness changed: {} -> {}",
641                a.proof_state.latest_packet.status, b.proof_state.latest_packet.status
642            ),
643        });
644    }
645    if proposal_summary_a.pending_review != proposal_summary_b.pending_review {
646        review_impacts.push(ReviewImpact {
647            kind: "pending_review".to_string(),
648            message: format!(
649                "Pending proposals changed: {} -> {}",
650                proposal_summary_a.pending_review, proposal_summary_b.pending_review
651            ),
652        });
653    }
654    if proposal_summary_a.applied != proposal_summary_b.applied {
655        review_impacts.push(ReviewImpact {
656            kind: "applied_proposals".to_string(),
657            message: format!(
658                "Applied proposals changed: {} -> {}",
659                proposal_summary_a.applied, proposal_summary_b.applied
660            ),
661        });
662    }
663    if a.sources.len() != b.sources.len() || a.evidence_atoms.len() != b.evidence_atoms.len() {
664        review_impacts.push(ReviewImpact {
665            kind: "provenance_coverage".to_string(),
666            message: format!(
667                "Sources {} -> {}, evidence atoms {} -> {}",
668                a.sources.len(),
669                b.sources.len(),
670                a.evidence_atoms.len(),
671                b.evidence_atoms.len()
672            ),
673        });
674    }
675    if a.condition_records.len() != b.condition_records.len() {
676        review_impacts.push(ReviewImpact {
677            kind: "condition_boundary".to_string(),
678            message: format!(
679                "Condition records changed: {} -> {}",
680                a.condition_records.len(),
681                b.condition_records.len()
682            ),
683        });
684    }
685    if field_changes
686        .iter()
687        .any(|change| change.field == "conditions.text")
688    {
689        review_impacts.push(ReviewImpact {
690            kind: "condition_scope".to_string(),
691            message: "Condition boundaries changed for one or more paired findings.".to_string(),
692        });
693    }
694    if field_changes
695        .iter()
696        .any(|change| change.field == "provenance.doi")
697    {
698        review_impacts.push(ReviewImpact {
699            kind: "provenance".to_string(),
700            message: "Provenance identifiers changed for one or more paired findings.".to_string(),
701        });
702    }
703    if !new_contradictions.is_empty() {
704        review_impacts.push(ReviewImpact {
705            kind: "contradiction".to_string(),
706            message: format!(
707                "{} new contradiction links appeared in {}",
708                new_contradictions.len(),
709                b.project.name
710            ),
711        });
712    }
713
714    DiffResult {
715        name_a: a.project.name.clone(),
716        name_b: b.project.name.clone(),
717        findings_a: a.findings.len(),
718        findings_b: b.findings.len(),
719        only_in_a,
720        only_in_b,
721        only_in_a_reviews,
722        only_in_b_reviews,
723        only_in_a_dependencies,
724        only_in_b_dependencies,
725        semantic_pairs,
726        field_changes,
727        confidence_changes,
728        new_contradictions,
729        entities_only_in_a,
730        entities_only_in_b,
731        projections: ProjectionDiff {
732            sources: (a.sources.len(), b.sources.len()),
733            evidence_atoms: (a.evidence_atoms.len(), b.evidence_atoms.len()),
734            condition_records: (a.condition_records.len(), b.condition_records.len()),
735        },
736        proposal_state: ProposalStateDiff {
737            total: (proposal_summary_a.total, proposal_summary_b.total),
738            pending_review: (
739                proposal_summary_a.pending_review,
740                proposal_summary_b.pending_review,
741            ),
742            applied: (proposal_summary_a.applied, proposal_summary_b.applied),
743        },
744        event_log: EventLogDiff {
745            events: (event_summary_a.count, event_summary_b.count),
746            kinds_only_in_a,
747            kinds_only_in_b,
748        },
749        proof_state: ProofStateDiff {
750            status_a: a.proof_state.latest_packet.status.clone(),
751            status_b: b.proof_state.latest_packet.status.clone(),
752            stale_reason_a: a.proof_state.stale_reason.clone(),
753            stale_reason_b: b.proof_state.stale_reason.clone(),
754        },
755        review_impacts,
756        stats_comparison: StatsComparison {
757            findings: (a.stats.findings, b.stats.findings),
758            links: (a.stats.links, b.stats.links),
759            replicated: (a.stats.replicated, b.stats.replicated),
760            gaps: (a.stats.gaps, b.stats.gaps),
761            contested: (a.stats.contested, b.stats.contested),
762            review_events: (a.stats.review_event_count, b.stats.review_event_count),
763            avg_confidence: (a.stats.avg_confidence, b.stats.avg_confidence),
764        },
765    }
766}
767
768pub fn json_envelope<'a>(
769    path_a: &'a Path,
770    path_b: &'a Path,
771    diff: &'a DiffResult,
772) -> DiffJsonEnvelope<'a> {
773    DiffJsonEnvelope {
774        schema: "vela.diff.v2",
775        ok: true,
776        generated_at: chrono::Utc::now().to_rfc3339(),
777        command: "vela diff",
778        sources: DiffSources {
779            a: path_a.to_str().unwrap_or_default(),
780            b: path_b.to_str().unwrap_or_default(),
781        },
782        summary: DiffSummary {
783            findings_a: diff.findings_a,
784            findings_b: diff.findings_b,
785            only_in_a: diff.only_in_a.len(),
786            only_in_b: diff.only_in_b.len(),
787            semantic_pairs: diff.semantic_pairs.len(),
788            field_changes: diff.field_changes.len(),
789            confidence_changes: diff.confidence_changes.len(),
790            new_contradictions: diff.new_contradictions.len(),
791            review_events_only_in_a: diff.only_in_a_reviews.len(),
792            review_events_only_in_b: diff.only_in_b_reviews.len(),
793            review_impacts: diff.review_impacts.len(),
794        },
795        diff,
796    }
797}
798
799pub fn run(path_a: &Path, path_b: &Path, json: bool, quiet: bool) {
800    let a = repo::load_from_path(path_a).unwrap_or_else(|e| {
801        eprintln!(
802            "{} failed to load {}: {e}",
803            style::err_prefix(),
804            path_a.display()
805        );
806        std::process::exit(1);
807    });
808    let b = repo::load_from_path(path_b).unwrap_or_else(|e| {
809        eprintln!(
810            "{} failed to load {}: {e}",
811            style::err_prefix(),
812            path_b.display()
813        );
814        std::process::exit(1);
815    });
816
817    let diff = compare(&a, &b);
818
819    if json {
820        let envelope = json_envelope(path_a, path_b, &diff);
821        println!(
822            "{}",
823            serde_json::to_string_pretty(&envelope).expect("failed to serialize diff")
824        );
825        return;
826    }
827
828    // Summary line
829    println!();
830    println!("  {}", "VELA · DIFF".dimmed());
831    println!(
832        "  {}",
833        format!(
834            "{} ({} findings) vs {} ({} findings)",
835            diff.name_a, diff.findings_a, diff.name_b, diff.findings_b
836        )
837        .bold()
838    );
839    println!("  {}", style::tick_row(60));
840
841    if quiet {
842        println!();
843        return;
844    }
845
846    // Only in A
847    println!(
848        "\n{} {} findings only in {}",
849        style::madder("---"),
850        diff.only_in_a.len(),
851        style::madder(&diff.name_a)
852    );
853    for f in diff.only_in_a.iter().take(5) {
854        println!(
855            "  {} {} {}",
856            style::madder("-"),
857            f.id.dimmed(),
858            truncate(&f.assertion, 60)
859        );
860    }
861    if diff.only_in_a.len() > 5 {
862        println!(
863            "  {} ... and {} more",
864            " ".dimmed(),
865            diff.only_in_a.len() - 5
866        );
867    }
868
869    // Only in B
870    println!(
871        "\n{} {} findings only in {}",
872        style::moss("+++"),
873        diff.only_in_b.len(),
874        style::moss(&diff.name_b)
875    );
876    for f in diff.only_in_b.iter().take(5) {
877        println!(
878            "  {} {} {}",
879            style::moss("+"),
880            f.id.dimmed(),
881            truncate(&f.assertion, 60)
882        );
883    }
884    if diff.only_in_b.len() > 5 {
885        println!(
886            "  {} ... and {} more",
887            " ".dimmed(),
888            diff.only_in_b.len() - 5
889        );
890    }
891
892    if !diff.semantic_pairs.is_empty() {
893        println!(
894            "\n{} {} likely semantic pairs with changed IDs",
895            style::signal("·"),
896            diff.semantic_pairs.len()
897        );
898        for pair in diff.semantic_pairs.iter().take(10) {
899            println!(
900                "  {} · {}  score {:.2}  {}",
901                pair.id_a.dimmed(),
902                pair.id_b.dimmed(),
903                pair.score,
904                pair.reason
905            );
906        }
907        if diff.semantic_pairs.len() > 10 {
908            println!("  ... and {} more", diff.semantic_pairs.len() - 10);
909        }
910    }
911
912    if !diff.field_changes.is_empty() {
913        println!(
914            "\n{} {} field-level changes across paired findings",
915            style::brass("~"),
916            diff.field_changes.len()
917        );
918        for change in diff.field_changes.iter().take(10) {
919            println!(
920                "  {} · {} {}",
921                change.id_a.dimmed(),
922                change.id_b.dimmed(),
923                change.field
924            );
925        }
926        if diff.field_changes.len() > 10 {
927            println!("  ... and {} more", diff.field_changes.len() - 10);
928        }
929    }
930
931    println!();
932    println!("  {}", "FRONTIER KERNEL DIFF".dimmed());
933    println!(
934        "  sources:           {} -> {}",
935        diff.projections.sources.0, diff.projections.sources.1
936    );
937    println!(
938        "  evidence atoms:    {} -> {}",
939        diff.projections.evidence_atoms.0, diff.projections.evidence_atoms.1
940    );
941    println!(
942        "  condition records: {} -> {}",
943        diff.projections.condition_records.0, diff.projections.condition_records.1
944    );
945    println!(
946        "  proposals:         {} -> {} (pending {} -> {}, applied {} -> {})",
947        diff.proposal_state.total.0,
948        diff.proposal_state.total.1,
949        diff.proposal_state.pending_review.0,
950        diff.proposal_state.pending_review.1,
951        diff.proposal_state.applied.0,
952        diff.proposal_state.applied.1
953    );
954    println!(
955        "  canonical events:  {} -> {}",
956        diff.event_log.events.0, diff.event_log.events.1
957    );
958    println!(
959        "  proof state:       {} -> {}",
960        diff.proof_state.status_a, diff.proof_state.status_b
961    );
962    if !diff.event_log.kinds_only_in_b.is_empty() {
963        println!(
964            "  new event kinds:   {}",
965            diff.event_log.kinds_only_in_b.join(", ")
966        );
967    }
968
969    if !diff.review_impacts.is_empty() {
970        println!();
971        println!("  {}", "REVIEW IMPACT".dimmed());
972        for impact in diff.review_impacts.iter().take(10) {
973            println!("  · [{}] {}", impact.kind, impact.message);
974        }
975    }
976
977    // Confidence changes
978    if !diff.confidence_changes.is_empty() {
979        println!(
980            "\n{} {} shared findings with confidence changes",
981            style::brass("~"),
982            diff.confidence_changes.len()
983        );
984        for c in diff.confidence_changes.iter().take(10) {
985            let arrow = if c.delta > 0.0 {
986                style::moss(format!(
987                    "{:.2} -> {:.2} ({:+.2})",
988                    c.score_a, c.score_b, c.delta
989                ))
990            } else {
991                style::madder(format!(
992                    "{:.2} -> {:.2} ({:+.2})",
993                    c.score_a, c.score_b, c.delta
994                ))
995            };
996            println!(
997                "  {} {} {}",
998                c.id.dimmed(),
999                arrow,
1000                truncate(&c.assertion, 40)
1001            );
1002        }
1003        if diff.confidence_changes.len() > 10 {
1004            println!("  ... and {} more", diff.confidence_changes.len() - 10);
1005        }
1006    }
1007
1008    // Review events
1009    if !diff.only_in_a_reviews.is_empty() || !diff.only_in_b_reviews.is_empty() {
1010        println!();
1011        println!("  {}", "REVIEW EVENT DIFF".dimmed());
1012        if !diff.only_in_b_reviews.is_empty() {
1013            println!(
1014                "  {} new review events in {}",
1015                diff.only_in_b_reviews.len(),
1016                style::moss(&diff.name_b)
1017            );
1018            for review in diff.only_in_b_reviews.iter().take(5) {
1019                println!(
1020                    "    {} {} {} {}",
1021                    style::moss("+"),
1022                    review.id.dimmed(),
1023                    review.action,
1024                    truncate(&review.reason, 45)
1025                );
1026            }
1027            if diff.only_in_b_reviews.len() > 5 {
1028                println!("    ... and {} more", diff.only_in_b_reviews.len() - 5);
1029            }
1030        }
1031        if !diff.only_in_a_reviews.is_empty() {
1032            println!(
1033                "  {} review events only in {}",
1034                diff.only_in_a_reviews.len(),
1035                style::madder(&diff.name_a)
1036            );
1037            for review in diff.only_in_a_reviews.iter().take(5) {
1038                println!(
1039                    "    {} {} {} {}",
1040                    style::madder("-"),
1041                    review.id.dimmed(),
1042                    review.action,
1043                    truncate(&review.reason, 45)
1044                );
1045            }
1046            if diff.only_in_a_reviews.len() > 5 {
1047                println!("    ... and {} more", diff.only_in_a_reviews.len() - 5);
1048            }
1049        }
1050    }
1051
1052    // Dependency / lineage changes
1053    if !diff.only_in_a_dependencies.is_empty() || !diff.only_in_b_dependencies.is_empty() {
1054        println!();
1055        println!("  {}", "LINEAGE DIFF".dimmed());
1056        if !diff.only_in_b_dependencies.is_empty() {
1057            println!(
1058                "  {} ancestry entries only in {}",
1059                diff.only_in_b_dependencies.len(),
1060                style::moss(&diff.name_b)
1061            );
1062            for dep in diff.only_in_b_dependencies.iter().take(5) {
1063                println!(
1064                    "    {} {} [{}]",
1065                    style::moss("+"),
1066                    dep.name,
1067                    dep.source.dimmed()
1068                );
1069            }
1070        }
1071        if !diff.only_in_a_dependencies.is_empty() {
1072            println!(
1073                "  {} ancestry entries only in {}",
1074                diff.only_in_a_dependencies.len(),
1075                style::madder(&diff.name_a)
1076            );
1077            for dep in diff.only_in_a_dependencies.iter().take(5) {
1078                println!(
1079                    "    {} {} [{}]",
1080                    style::madder("-"),
1081                    dep.name,
1082                    dep.source.dimmed()
1083                );
1084            }
1085        }
1086    }
1087
1088    // New contradictions
1089    if !diff.new_contradictions.is_empty() {
1090        println!(
1091            "\n{} {} new contradictions in {}",
1092            style::madder("·"),
1093            diff.new_contradictions.len(),
1094            diff.name_b
1095        );
1096        for c in &diff.new_contradictions {
1097            println!(
1098                "  {} · {} · {}",
1099                c.from_id.dimmed(),
1100                c.target_id.dimmed(),
1101                truncate(&c.note, 50)
1102            );
1103        }
1104    }
1105
1106    // Entity coverage
1107    if !diff.entities_only_in_a.is_empty() || !diff.entities_only_in_b.is_empty() {
1108        println!();
1109        println!("  {}", "ENTITY COVERAGE DIFF".dimmed());
1110        if !diff.entities_only_in_b.is_empty() {
1111            println!(
1112                "  {} resolved in {} but not {}:",
1113                diff.entities_only_in_b.len(),
1114                diff.name_b,
1115                diff.name_a
1116            );
1117            for e in diff.entities_only_in_b.iter().take(10) {
1118                println!("    {} {}", style::moss("+"), e);
1119            }
1120            if diff.entities_only_in_b.len() > 10 {
1121                println!("    ... and {} more", diff.entities_only_in_b.len() - 10);
1122            }
1123        }
1124        if !diff.entities_only_in_a.is_empty() {
1125            println!(
1126                "  {} resolved in {} but not {}:",
1127                diff.entities_only_in_a.len(),
1128                diff.name_a,
1129                diff.name_b
1130            );
1131            for e in diff.entities_only_in_a.iter().take(10) {
1132                println!("    {} {}", style::madder("-"), e);
1133            }
1134            if diff.entities_only_in_a.len() > 10 {
1135                println!("    ... and {} more", diff.entities_only_in_a.len() - 10);
1136            }
1137        }
1138    }
1139
1140    // Stats comparison
1141    println!();
1142    println!("  {}", "STATS COMPARISON".dimmed());
1143    let s = &diff.stats_comparison;
1144    println!(
1145        "  {:<18} {:>8}  {:>8}",
1146        "",
1147        diff.name_a.dimmed(),
1148        diff.name_b.dimmed()
1149    );
1150    print_stat_row("findings", s.findings.0, s.findings.1);
1151    print_stat_row("links", s.links.0, s.links.1);
1152    print_stat_row("replicated", s.replicated.0, s.replicated.1);
1153    print_stat_row("gaps", s.gaps.0, s.gaps.1);
1154    print_stat_row("contested", s.contested.0, s.contested.1);
1155    print_stat_row("review events", s.review_events.0, s.review_events.1);
1156    println!(
1157        "  {:<18} {:>8.3}  {:>8.3}",
1158        "avg confidence", s.avg_confidence.0, s.avg_confidence.1
1159    );
1160
1161    println!();
1162    println!("  {}", style::tick_row(60));
1163    println!();
1164}
1165
1166fn print_stat_row(label: &str, a: usize, b: usize) {
1167    let diff = b as i64 - a as i64;
1168    let delta = if diff > 0 {
1169        style::moss(format!("(+{})", diff)).to_string()
1170    } else if diff < 0 {
1171        style::madder(format!("({})", diff)).to_string()
1172    } else {
1173        String::new()
1174    };
1175    println!("  {:<18} {:>8}  {:>8}  {}", label, a, b, delta);
1176}
1177
1178#[cfg(test)]
1179mod tests {
1180    use super::*;
1181    use crate::bundle::*;
1182    use crate::project;
1183    use crate::sources;
1184
1185    fn make_finding(
1186        id: &str,
1187        score: f64,
1188        assertion_type: &str,
1189        replicated: bool,
1190        gap: bool,
1191    ) -> FindingBundle {
1192        FindingBundle {
1193            id: id.into(),
1194            version: 1,
1195            previous_version: None,
1196            assertion: Assertion {
1197                text: format!("Finding {id}"),
1198                assertion_type: assertion_type.into(),
1199                entities: vec![],
1200                relation: None,
1201                direction: None,
1202                causal_claim: None,
1203                causal_evidence_grade: None,
1204            },
1205            evidence: Evidence {
1206                evidence_type: "experimental".into(),
1207                model_system: String::new(),
1208                species: None,
1209                method: String::new(),
1210                sample_size: None,
1211                effect_size: None,
1212                p_value: None,
1213                replicated,
1214                replication_count: None,
1215                evidence_spans: vec![],
1216            },
1217            conditions: Conditions {
1218                text: String::new(),
1219                species_verified: vec![],
1220                species_unverified: vec![],
1221                in_vitro: false,
1222                in_vivo: false,
1223                human_data: false,
1224                clinical_trial: false,
1225                concentration_range: None,
1226                duration: None,
1227                age_group: None,
1228                cell_type: None,
1229            },
1230            confidence: Confidence::raw(score, "seeded prior", 0.85),
1231            provenance: Provenance {
1232                source_type: "published_paper".into(),
1233                doi: None,
1234                pmid: None,
1235                pmc: None,
1236                openalex_id: None,
1237                url: None,
1238                title: "Test".into(),
1239                authors: vec![],
1240                year: Some(2024),
1241                journal: None,
1242                license: None,
1243                publisher: None,
1244                funders: vec![],
1245                extraction: Extraction::default(),
1246                review: None,
1247                citation_count: None,
1248            },
1249            flags: Flags {
1250                gap,
1251                negative_space: false,
1252                contested: false,
1253                retracted: false,
1254                declining: false,
1255                gravity_well: false,
1256                review_state: None,
1257                superseded: false,
1258                signature_threshold: None,
1259                jointly_accepted: false,
1260            },
1261            links: vec![],
1262            annotations: vec![],
1263            attachments: vec![],
1264            created: String::new(),
1265            updated: None,
1266
1267            access_tier: crate::access_tier::AccessTier::Public,
1268        }
1269    }
1270
1271    fn make_frontier(name: &str, findings: Vec<FindingBundle>) -> Project {
1272        project::assemble(name, findings, 0, 0, "test")
1273    }
1274
1275    fn make_review_event(id: &str, finding_id: &str, reason: &str) -> ReviewEvent {
1276        ReviewEvent {
1277            id: id.into(),
1278            workspace: None,
1279            finding_id: finding_id.into(),
1280            reviewer: "reviewer:test".into(),
1281            reviewed_at: "2026-01-01T00:00:00Z".into(),
1282            scope: None,
1283            status: Some("accepted".into()),
1284            action: ReviewAction::Approved,
1285            reason: reason.into(),
1286            evidence_considered: Vec::new(),
1287            state_change: None,
1288        }
1289    }
1290
1291    #[test]
1292    fn identical_frontiers_have_no_diff() {
1293        let findings = vec![
1294            make_finding("f1", 0.8, "mechanism", false, false),
1295            make_finding("f2", 0.7, "therapeutic", true, false),
1296        ];
1297        let a = make_frontier("A", findings.clone());
1298        let b = make_frontier("B", findings);
1299        let d = compare(&a, &b);
1300        assert!(d.only_in_a.is_empty());
1301        assert!(d.only_in_b.is_empty());
1302        assert!(d.confidence_changes.is_empty());
1303    }
1304
1305    #[test]
1306    fn detects_findings_only_in_a() {
1307        let a = make_frontier(
1308            "A",
1309            vec![
1310                make_finding("f1", 0.8, "mechanism", false, false),
1311                make_finding("f2", 0.7, "therapeutic", true, false),
1312            ],
1313        );
1314        let b = make_frontier(
1315            "B",
1316            vec![make_finding("f1", 0.8, "mechanism", false, false)],
1317        );
1318        let d = compare(&a, &b);
1319        assert_eq!(d.only_in_a.len(), 1);
1320        assert_eq!(d.only_in_a[0].id, "f2");
1321        assert!(d.only_in_b.is_empty());
1322    }
1323
1324    #[test]
1325    fn detects_confidence_changes() {
1326        let a = make_frontier(
1327            "A",
1328            vec![make_finding("f1", 0.8, "mechanism", false, false)],
1329        );
1330        let b = make_frontier(
1331            "B",
1332            vec![make_finding("f1", 0.6, "mechanism", false, false)],
1333        );
1334        let d = compare(&a, &b);
1335        assert_eq!(d.confidence_changes.len(), 1);
1336        assert!((d.confidence_changes[0].delta - (-0.2)).abs() < 1e-6);
1337    }
1338
1339    #[test]
1340    fn pairs_semantically_similar_changed_ids_and_fields() {
1341        let mut a_finding = make_finding("vf_old", 0.8, "mechanism", false, false);
1342        a_finding.assertion.text =
1343            "LRP1 mediates amyloid beta clearance at the blood brain barrier".into();
1344        a_finding.conditions.text = "human BBB context".into();
1345        a_finding.provenance.doi = Some("10.1234/test".into());
1346        let mut b_finding = make_finding("vf_new", 0.9, "mechanism", false, false);
1347        b_finding.assertion.text =
1348            "LRP1 mediates amyloid beta clearance at the blood brain barrier".into();
1349        b_finding.conditions.text = "human BBB context".into();
1350        b_finding.provenance.doi = Some("10.1234/test".into());
1351
1352        let a = make_frontier("A", vec![a_finding]);
1353        let b = make_frontier("B", vec![b_finding]);
1354        let d = compare(&a, &b);
1355        assert_eq!(d.semantic_pairs.len(), 1);
1356        assert_eq!(d.semantic_pairs[0].id_a, "vf_old");
1357        assert_eq!(d.semantic_pairs[0].id_b, "vf_new");
1358        assert!(
1359            d.field_changes
1360                .iter()
1361                .any(|c| c.field == "confidence.score")
1362        );
1363    }
1364
1365    #[test]
1366    fn detects_new_contradictions() {
1367        let mut fb = make_finding("f1", 0.8, "mechanism", false, false);
1368        fb.add_link("f2", "contradicts", "opposite direction");
1369        let a = make_frontier(
1370            "A",
1371            vec![make_finding("f1", 0.8, "mechanism", false, false)],
1372        );
1373        let b = make_frontier("B", vec![fb]);
1374        let d = compare(&a, &b);
1375        assert_eq!(d.new_contradictions.len(), 1);
1376    }
1377
1378    #[test]
1379    fn detects_review_events_only_in_b() {
1380        let mut a = make_frontier(
1381            "A",
1382            vec![make_finding("f1", 0.8, "mechanism", false, false)],
1383        );
1384        let mut b = make_frontier(
1385            "B",
1386            vec![make_finding("f1", 0.8, "mechanism", false, false)],
1387        );
1388        a.review_events
1389            .push(make_review_event("rev_a", "f1", "existing local review"));
1390        a.stats.review_event_count = a.review_events.len();
1391        b.review_events
1392            .push(make_review_event("rev_a", "f1", "existing local review"));
1393        b.review_events
1394            .push(make_review_event("rev_b", "f1", "imported external review"));
1395        b.stats.review_event_count = b.review_events.len();
1396
1397        let d = compare(&a, &b);
1398        assert_eq!(d.only_in_b_reviews.len(), 1);
1399        assert_eq!(d.only_in_b_reviews[0].id, "rev_b");
1400        assert_eq!(d.stats_comparison.review_events, (1, 2));
1401    }
1402
1403    #[test]
1404    fn stats_comparison_correct() {
1405        let a = make_frontier(
1406            "A",
1407            vec![
1408                make_finding("f1", 0.8, "mechanism", true, false),
1409                make_finding("f2", 0.7, "mechanism", false, true),
1410            ],
1411        );
1412        let b = make_frontier(
1413            "B",
1414            vec![
1415                make_finding("f1", 0.8, "mechanism", true, false),
1416                make_finding("f2", 0.7, "mechanism", false, true),
1417                make_finding("f3", 0.9, "therapeutic", true, false),
1418            ],
1419        );
1420        let d = compare(&a, &b);
1421        assert_eq!(d.stats_comparison.findings, (2, 3));
1422        assert_eq!(d.stats_comparison.replicated, (1, 2));
1423        assert_eq!(d.stats_comparison.gaps, (1, 1));
1424    }
1425
1426    #[test]
1427    fn diff_reports_frontier_kernel_state() {
1428        let mut a = make_frontier(
1429            "A",
1430            vec![make_finding("f1", 0.8, "mechanism", false, false)],
1431        );
1432        let mut b = make_frontier(
1433            "B",
1434            vec![make_finding("f1", 0.8, "mechanism", false, false)],
1435        );
1436        sources::materialize_project(&mut a);
1437        sources::materialize_project(&mut b);
1438        b.proof_state.latest_packet.status = "stale".into();
1439        b.proof_state.stale_reason = Some("new accepted proposal".into());
1440
1441        let d = compare(&a, &b);
1442        assert_eq!(d.proof_state.status_b, "stale");
1443        assert!(
1444            d.review_impacts
1445                .iter()
1446                .any(|impact| impact.kind == "proof_state")
1447        );
1448    }
1449}