1use std::collections::{HashMap, HashSet};
4use std::path::Path;
5
6use colored::Colorize;
7
8use crate::cli_style as style;
9use serde::Serialize;
10
11use crate::bundle::{ReviewAction, ReviewEvent};
12use crate::events;
13use crate::project::{Project, ProjectDependency};
14use crate::proposals;
15use crate::repo;
16
17#[derive(Debug, Serialize)]
19pub struct DiffResult {
20 pub name_a: String,
21 pub name_b: String,
22 pub findings_a: usize,
23 pub findings_b: usize,
24 pub only_in_a: Vec<FindingSummary>,
25 pub only_in_b: Vec<FindingSummary>,
26 pub only_in_a_reviews: Vec<ReviewSummary>,
27 pub only_in_b_reviews: Vec<ReviewSummary>,
28 pub only_in_a_dependencies: Vec<DependencySummary>,
29 pub only_in_b_dependencies: Vec<DependencySummary>,
30 pub semantic_pairs: Vec<SemanticPair>,
31 pub field_changes: Vec<FieldChange>,
32 pub confidence_changes: Vec<ConfidenceChange>,
33 pub new_contradictions: Vec<ContradictionSummary>,
34 pub entities_only_in_a: Vec<String>,
35 pub entities_only_in_b: Vec<String>,
36 pub projections: ProjectionDiff,
37 pub proposal_state: ProposalStateDiff,
38 pub event_log: EventLogDiff,
39 pub proof_state: ProofStateDiff,
40 pub review_impacts: Vec<ReviewImpact>,
41 pub stats_comparison: StatsComparison,
42}
43
44#[derive(Debug, Serialize)]
45pub struct ProjectionDiff {
46 pub sources: (usize, usize),
47 pub evidence_atoms: (usize, usize),
48 pub condition_records: (usize, usize),
49}
50
51#[derive(Debug, Serialize)]
52pub struct ProposalStateDiff {
53 pub total: (usize, usize),
54 pub pending_review: (usize, usize),
55 pub applied: (usize, usize),
56}
57
58#[derive(Debug, Serialize)]
59pub struct EventLogDiff {
60 pub events: (usize, usize),
61 pub kinds_only_in_a: Vec<String>,
62 pub kinds_only_in_b: Vec<String>,
63}
64
65#[derive(Debug, Serialize)]
66pub struct ProofStateDiff {
67 pub status_a: String,
68 pub status_b: String,
69 pub stale_reason_a: Option<String>,
70 pub stale_reason_b: Option<String>,
71}
72
73#[derive(Debug, Serialize)]
74pub struct ReviewImpact {
75 pub kind: String,
76 pub message: String,
77}
78
79#[derive(Debug, Serialize)]
80pub struct DependencySummary {
81 pub name: String,
82 pub source: String,
83 pub version: String,
84}
85
86#[derive(Debug, Serialize)]
87pub struct ReviewSummary {
88 pub id: String,
89 pub finding_id: String,
90 pub reviewer: String,
91 pub action: String,
92 pub reason: String,
93}
94
95#[derive(Debug, Serialize)]
96pub struct FindingSummary {
97 pub id: String,
98 pub assertion: String,
99}
100
101#[derive(Debug, Serialize)]
102pub struct ConfidenceChange {
103 pub id: String,
104 pub assertion: String,
105 pub score_a: f64,
106 pub score_b: f64,
107 pub delta: f64,
108}
109
110#[derive(Debug, Serialize)]
111pub struct ContradictionSummary {
112 pub from_id: String,
113 pub target_id: String,
114 pub note: String,
115}
116
117#[derive(Debug, Serialize)]
118pub struct SemanticPair {
119 pub id_a: String,
120 pub id_b: String,
121 pub score: f64,
122 pub reason: String,
123 pub assertion_a: String,
124 pub assertion_b: String,
125}
126
127#[derive(Debug, Serialize)]
128pub struct FieldChange {
129 pub id_a: String,
130 pub id_b: String,
131 pub field: String,
132 pub value_a: serde_json::Value,
133 pub value_b: serde_json::Value,
134}
135
136#[derive(Debug, Serialize)]
137pub struct StatsComparison {
138 pub findings: (usize, usize),
139 pub links: (usize, usize),
140 pub replicated: (usize, usize),
141 pub gaps: (usize, usize),
142 pub contested: (usize, usize),
143 pub review_events: (usize, usize),
144 pub avg_confidence: (f64, f64),
145}
146
147#[derive(Debug, Serialize)]
148pub struct DiffJsonEnvelope<'a> {
149 pub schema: &'static str,
150 pub ok: bool,
151 pub generated_at: String,
152 pub command: &'static str,
153 pub sources: DiffSources<'a>,
154 pub summary: DiffSummary,
155 pub diff: &'a DiffResult,
156}
157
158#[derive(Debug, Serialize)]
159pub struct DiffSources<'a> {
160 pub a: &'a str,
161 pub b: &'a str,
162}
163
164#[derive(Debug, Serialize)]
165pub struct DiffSummary {
166 pub findings_a: usize,
167 pub findings_b: usize,
168 pub only_in_a: usize,
169 pub only_in_b: usize,
170 pub semantic_pairs: usize,
171 pub field_changes: usize,
172 pub confidence_changes: usize,
173 pub new_contradictions: usize,
174 pub review_events_only_in_a: usize,
175 pub review_events_only_in_b: usize,
176 pub review_impacts: usize,
177}
178
179fn truncate(s: &str, max: usize) -> String {
180 if s.len() <= max {
181 s.to_string()
182 } else {
183 let mut end = max;
184 while end > 0 && !s.is_char_boundary(end) {
185 end -= 1;
186 }
187 format!("{}...", &s[..end])
188 }
189}
190
191fn summarize_review(event: &ReviewEvent) -> ReviewSummary {
192 ReviewSummary {
193 id: event.id.clone(),
194 finding_id: event.finding_id.clone(),
195 reviewer: event.reviewer.clone(),
196 action: review_action_label(&event.action),
197 reason: event.reason.clone(),
198 }
199}
200
201fn summarize_dependency(dep: &ProjectDependency) -> DependencySummary {
202 DependencySummary {
203 name: dep.name.clone(),
204 source: dep.source.clone(),
205 version: dep.version.clone().unwrap_or_else(|| "-".into()),
206 }
207}
208
209fn review_action_label(action: &ReviewAction) -> String {
210 match action {
211 ReviewAction::Approved => "approved".to_string(),
212 ReviewAction::Qualified { .. } => "qualified".to_string(),
213 ReviewAction::Corrected { field, .. } => format!("corrected:{field}"),
214 ReviewAction::Flagged { flag_type } => format!("flagged:{flag_type}"),
215 ReviewAction::Disputed { .. } => "disputed".to_string(),
216 }
217}
218
219fn semantic_key(f: &crate::bundle::FindingBundle) -> String {
220 normalize_text(&format!(
221 "{} {} {}",
222 f.assertion.assertion_type, f.assertion.text, f.conditions.text
223 ))
224}
225
226fn normalize_text(value: &str) -> String {
227 value
228 .to_lowercase()
229 .chars()
230 .map(|c| if c.is_ascii_alphanumeric() { c } else { ' ' })
231 .collect::<String>()
232 .split_whitespace()
233 .collect::<Vec<_>>()
234 .join(" ")
235}
236
237fn token_set(value: &str) -> HashSet<String> {
238 normalize_text(value)
239 .split_whitespace()
240 .filter(|token| token.len() > 2)
241 .map(str::to_string)
242 .collect()
243}
244
245fn jaccard(a: &HashSet<String>, b: &HashSet<String>) -> f64 {
246 if a.is_empty() && b.is_empty() {
247 return 1.0;
248 }
249 let intersection = a.intersection(b).count() as f64;
250 let union = a.union(b).count() as f64;
251 if union == 0.0 {
252 0.0
253 } else {
254 intersection / union
255 }
256}
257
258fn semantic_similarity(
259 a: &crate::bundle::FindingBundle,
260 b: &crate::bundle::FindingBundle,
261) -> (f64, String) {
262 let key_a = semantic_key(a);
263 let key_b = semantic_key(b);
264 if key_a == key_b {
265 return (
266 1.0,
267 "normalized assertion/type/conditions match".to_string(),
268 );
269 }
270
271 let tokens_a = token_set(&key_a);
272 let tokens_b = token_set(&key_b);
273 let token_score = jaccard(&tokens_a, &tokens_b);
274 let doi_match = a.provenance.doi.is_some() && a.provenance.doi == b.provenance.doi;
275 let pmid_match = a.provenance.pmid.is_some() && a.provenance.pmid == b.provenance.pmid;
276 let type_match = a.assertion.assertion_type == b.assertion.assertion_type;
277 let provenance_boost = if doi_match || pmid_match { 0.25 } else { 0.0 };
278 let type_boost = if type_match { 0.1 } else { 0.0 };
279 let score = (token_score + provenance_boost + type_boost).min(1.0);
280 let reason = if doi_match {
281 "shared DOI with similar assertion".to_string()
282 } else if pmid_match {
283 "shared PMID with similar assertion".to_string()
284 } else if type_match {
285 "same assertion type with similar text".to_string()
286 } else {
287 "similar assertion text".to_string()
288 };
289 (score, reason)
290}
291
292fn value_str(value: impl Into<String>) -> serde_json::Value {
293 serde_json::Value::String(value.into())
294}
295
296fn push_field_change(
297 changes: &mut Vec<FieldChange>,
298 id_a: &str,
299 id_b: &str,
300 field: &str,
301 value_a: serde_json::Value,
302 value_b: serde_json::Value,
303) {
304 if value_a != value_b {
305 changes.push(FieldChange {
306 id_a: id_a.to_string(),
307 id_b: id_b.to_string(),
308 field: field.to_string(),
309 value_a,
310 value_b,
311 });
312 }
313}
314
315fn finding_field_changes(
316 id_a: &str,
317 a: &crate::bundle::FindingBundle,
318 id_b: &str,
319 b: &crate::bundle::FindingBundle,
320) -> Vec<FieldChange> {
321 let mut changes = Vec::new();
322 push_field_change(
323 &mut changes,
324 id_a,
325 id_b,
326 "assertion.text",
327 value_str(a.assertion.text.clone()),
328 value_str(b.assertion.text.clone()),
329 );
330 push_field_change(
331 &mut changes,
332 id_a,
333 id_b,
334 "assertion.assertion_type",
335 value_str(a.assertion.assertion_type.clone()),
336 value_str(b.assertion.assertion_type.clone()),
337 );
338 push_field_change(
339 &mut changes,
340 id_a,
341 id_b,
342 "conditions.text",
343 value_str(a.conditions.text.clone()),
344 value_str(b.conditions.text.clone()),
345 );
346 push_field_change(
347 &mut changes,
348 id_a,
349 id_b,
350 "confidence.score",
351 serde_json::json!(a.confidence.score),
352 serde_json::json!(b.confidence.score),
353 );
354 push_field_change(
355 &mut changes,
356 id_a,
357 id_b,
358 "evidence.evidence_type",
359 value_str(a.evidence.evidence_type.clone()),
360 value_str(b.evidence.evidence_type.clone()),
361 );
362 push_field_change(
363 &mut changes,
364 id_a,
365 id_b,
366 "evidence.method",
367 value_str(a.evidence.method.clone()),
368 value_str(b.evidence.method.clone()),
369 );
370 push_field_change(
371 &mut changes,
372 id_a,
373 id_b,
374 "evidence.replicated",
375 serde_json::json!(a.evidence.replicated),
376 serde_json::json!(b.evidence.replicated),
377 );
378 push_field_change(
379 &mut changes,
380 id_a,
381 id_b,
382 "flags.gap",
383 serde_json::json!(a.flags.gap),
384 serde_json::json!(b.flags.gap),
385 );
386 push_field_change(
387 &mut changes,
388 id_a,
389 id_b,
390 "flags.contested",
391 serde_json::json!(a.flags.contested),
392 serde_json::json!(b.flags.contested),
393 );
394 push_field_change(
395 &mut changes,
396 id_a,
397 id_b,
398 "provenance.title",
399 value_str(a.provenance.title.clone()),
400 value_str(b.provenance.title.clone()),
401 );
402 push_field_change(
403 &mut changes,
404 id_a,
405 id_b,
406 "provenance.doi",
407 serde_json::json!(a.provenance.doi.clone()),
408 serde_json::json!(b.provenance.doi.clone()),
409 );
410 changes
411}
412
413pub fn compare(a: &Project, b: &Project) -> DiffResult {
414 let ids_a: HashSet<&str> = a.findings.iter().map(|f| f.id.as_str()).collect();
415 let ids_b: HashSet<&str> = b.findings.iter().map(|f| f.id.as_str()).collect();
416
417 let map_a: HashMap<&str, &crate::bundle::FindingBundle> =
418 a.findings.iter().map(|f| (f.id.as_str(), f)).collect();
419 let map_b: HashMap<&str, &crate::bundle::FindingBundle> =
420 b.findings.iter().map(|f| (f.id.as_str(), f)).collect();
421 let review_ids_a: HashSet<&str> = a.review_events.iter().map(|r| r.id.as_str()).collect();
422 let review_ids_b: HashSet<&str> = b.review_events.iter().map(|r| r.id.as_str()).collect();
423 let review_map_a: HashMap<&str, &ReviewEvent> = a
424 .review_events
425 .iter()
426 .map(|event| (event.id.as_str(), event))
427 .collect();
428 let review_map_b: HashMap<&str, &ReviewEvent> = b
429 .review_events
430 .iter()
431 .map(|event| (event.id.as_str(), event))
432 .collect();
433 let dep_ids_a: HashSet<String> = a
434 .project
435 .dependencies
436 .iter()
437 .map(|dep| format!("{}::{}", dep.name, dep.source))
438 .collect();
439 let dep_ids_b: HashSet<String> = b
440 .project
441 .dependencies
442 .iter()
443 .map(|dep| format!("{}::{}", dep.name, dep.source))
444 .collect();
445 let dep_map_a: HashMap<String, &ProjectDependency> = a
446 .project
447 .dependencies
448 .iter()
449 .map(|dep| (format!("{}::{}", dep.name, dep.source), dep))
450 .collect();
451 let dep_map_b: HashMap<String, &ProjectDependency> = b
452 .project
453 .dependencies
454 .iter()
455 .map(|dep| (format!("{}::{}", dep.name, dep.source), dep))
456 .collect();
457
458 let only_in_a: Vec<FindingSummary> = ids_a
460 .difference(&ids_b)
461 .map(|id| {
462 let f = map_a[id];
463 FindingSummary {
464 id: f.id.clone(),
465 assertion: f.assertion.text.clone(),
466 }
467 })
468 .collect();
469
470 let only_in_b: Vec<FindingSummary> = ids_b
471 .difference(&ids_a)
472 .map(|id| {
473 let f = map_b[id];
474 FindingSummary {
475 id: f.id.clone(),
476 assertion: f.assertion.text.clone(),
477 }
478 })
479 .collect();
480
481 let only_in_a_reviews: Vec<ReviewSummary> = review_ids_a
482 .difference(&review_ids_b)
483 .map(|id| summarize_review(review_map_a[id]))
484 .collect();
485 let only_in_b_reviews: Vec<ReviewSummary> = review_ids_b
486 .difference(&review_ids_a)
487 .map(|id| summarize_review(review_map_b[id]))
488 .collect();
489 let only_in_a_dependencies: Vec<DependencySummary> = dep_ids_a
490 .difference(&dep_ids_b)
491 .map(|id| summarize_dependency(dep_map_a[id]))
492 .collect();
493 let only_in_b_dependencies: Vec<DependencySummary> = dep_ids_b
494 .difference(&dep_ids_a)
495 .map(|id| summarize_dependency(dep_map_b[id]))
496 .collect();
497
498 let mut semantic_pairs = Vec::new();
499 let mut paired_a: HashSet<String> = HashSet::new();
500 let mut paired_b: HashSet<String> = HashSet::new();
501 let only_a_ids: Vec<&str> = ids_a.difference(&ids_b).copied().collect();
502 let only_b_ids: Vec<&str> = ids_b.difference(&ids_a).copied().collect();
503 let mut candidates: Vec<(f64, String, &str, &str)> = Vec::new();
504 for id_a in &only_a_ids {
505 for id_b in &only_b_ids {
506 let (score, reason) = semantic_similarity(map_a[id_a], map_b[id_b]);
507 if score >= 0.72 {
508 candidates.push((score, reason, *id_a, *id_b));
509 }
510 }
511 }
512 candidates.sort_by(|a, b| b.0.partial_cmp(&a.0).unwrap());
513 for (score, reason, id_a, id_b) in candidates {
514 if paired_a.contains(id_a) || paired_b.contains(id_b) {
515 continue;
516 }
517 paired_a.insert(id_a.to_string());
518 paired_b.insert(id_b.to_string());
519 semantic_pairs.push(SemanticPair {
520 id_a: id_a.to_string(),
521 id_b: id_b.to_string(),
522 score: (score * 1000.0).round() / 1000.0,
523 reason,
524 assertion_a: map_a[id_a].assertion.text.clone(),
525 assertion_b: map_b[id_b].assertion.text.clone(),
526 });
527 }
528
529 let shared: Vec<&str> = ids_a.intersection(&ids_b).copied().collect();
531 let mut confidence_changes: Vec<ConfidenceChange> = Vec::new();
532 let mut field_changes: Vec<FieldChange> = Vec::new();
533 for id in &shared {
534 let fa = map_a[id];
535 let fb = map_b[id];
536 field_changes.extend(finding_field_changes(id, fa, id, fb));
537 let delta = fb.confidence.score - fa.confidence.score;
538 if delta.abs() > 1e-6 {
539 confidence_changes.push(ConfidenceChange {
540 id: id.to_string(),
541 assertion: fa.assertion.text.clone(),
542 score_a: fa.confidence.score,
543 score_b: fb.confidence.score,
544 delta,
545 });
546 }
547 }
548 for pair in &semantic_pairs {
549 field_changes.extend(finding_field_changes(
550 &pair.id_a,
551 map_a[pair.id_a.as_str()],
552 &pair.id_b,
553 map_b[pair.id_b.as_str()],
554 ));
555 }
556 confidence_changes.sort_by(|a, b| b.delta.abs().partial_cmp(&a.delta.abs()).unwrap());
557 field_changes.sort_by(|a, b| {
558 a.id_a
559 .cmp(&b.id_a)
560 .then_with(|| a.id_b.cmp(&b.id_b))
561 .then_with(|| a.field.cmp(&b.field))
562 });
563
564 let contradictions_a: HashSet<(String, String)> = a
566 .findings
567 .iter()
568 .flat_map(|f| {
569 f.links
570 .iter()
571 .filter(|l| l.link_type == "contradicts")
572 .map(move |l| (f.id.clone(), l.target.clone()))
573 })
574 .collect();
575
576 let new_contradictions: Vec<ContradictionSummary> = b
577 .findings
578 .iter()
579 .flat_map(|f| {
580 f.links
581 .iter()
582 .filter(|l| l.link_type == "contradicts")
583 .filter(|l| !contradictions_a.contains(&(f.id.clone(), l.target.clone())))
584 .map(move |l| ContradictionSummary {
585 from_id: f.id.clone(),
586 target_id: l.target.clone(),
587 note: l.note.clone(),
588 })
589 })
590 .collect();
591
592 fn resolved_entities(c: &Project) -> HashSet<String> {
594 c.findings
595 .iter()
596 .flat_map(|f| {
597 f.assertion.entities.iter().filter_map(|e| {
598 if e.canonical_id.is_some() {
599 Some(e.name.clone())
600 } else {
601 None
602 }
603 })
604 })
605 .collect()
606 }
607
608 let entities_a = resolved_entities(a);
609 let entities_b = resolved_entities(b);
610
611 let mut entities_only_in_a: Vec<String> = entities_a.difference(&entities_b).cloned().collect();
612 let mut entities_only_in_b: Vec<String> = entities_b.difference(&entities_a).cloned().collect();
613 entities_only_in_a.sort();
614 entities_only_in_b.sort();
615
616 let proposal_summary_a = proposals::summary(a);
617 let proposal_summary_b = proposals::summary(b);
618 let event_summary_a = events::summarize(a);
619 let event_summary_b = events::summarize(b);
620 let kinds_a = event_summary_a
621 .kinds
622 .keys()
623 .cloned()
624 .collect::<HashSet<_>>();
625 let kinds_b = event_summary_b
626 .kinds
627 .keys()
628 .cloned()
629 .collect::<HashSet<_>>();
630 let mut kinds_only_in_a = kinds_a.difference(&kinds_b).cloned().collect::<Vec<_>>();
631 let mut kinds_only_in_b = kinds_b.difference(&kinds_a).cloned().collect::<Vec<_>>();
632 kinds_only_in_a.sort();
633 kinds_only_in_b.sort();
634
635 let mut review_impacts = Vec::new();
636 if a.proof_state.latest_packet.status != b.proof_state.latest_packet.status {
637 review_impacts.push(ReviewImpact {
638 kind: "proof_state".to_string(),
639 message: format!(
640 "Proof freshness changed: {} -> {}",
641 a.proof_state.latest_packet.status, b.proof_state.latest_packet.status
642 ),
643 });
644 }
645 if proposal_summary_a.pending_review != proposal_summary_b.pending_review {
646 review_impacts.push(ReviewImpact {
647 kind: "pending_review".to_string(),
648 message: format!(
649 "Pending proposals changed: {} -> {}",
650 proposal_summary_a.pending_review, proposal_summary_b.pending_review
651 ),
652 });
653 }
654 if proposal_summary_a.applied != proposal_summary_b.applied {
655 review_impacts.push(ReviewImpact {
656 kind: "applied_proposals".to_string(),
657 message: format!(
658 "Applied proposals changed: {} -> {}",
659 proposal_summary_a.applied, proposal_summary_b.applied
660 ),
661 });
662 }
663 if a.sources.len() != b.sources.len() || a.evidence_atoms.len() != b.evidence_atoms.len() {
664 review_impacts.push(ReviewImpact {
665 kind: "provenance_coverage".to_string(),
666 message: format!(
667 "Sources {} -> {}, evidence atoms {} -> {}",
668 a.sources.len(),
669 b.sources.len(),
670 a.evidence_atoms.len(),
671 b.evidence_atoms.len()
672 ),
673 });
674 }
675 if a.condition_records.len() != b.condition_records.len() {
676 review_impacts.push(ReviewImpact {
677 kind: "condition_boundary".to_string(),
678 message: format!(
679 "Condition records changed: {} -> {}",
680 a.condition_records.len(),
681 b.condition_records.len()
682 ),
683 });
684 }
685 if field_changes
686 .iter()
687 .any(|change| change.field == "conditions.text")
688 {
689 review_impacts.push(ReviewImpact {
690 kind: "condition_scope".to_string(),
691 message: "Condition boundaries changed for one or more paired findings.".to_string(),
692 });
693 }
694 if field_changes
695 .iter()
696 .any(|change| change.field == "provenance.doi")
697 {
698 review_impacts.push(ReviewImpact {
699 kind: "provenance".to_string(),
700 message: "Provenance identifiers changed for one or more paired findings.".to_string(),
701 });
702 }
703 if !new_contradictions.is_empty() {
704 review_impacts.push(ReviewImpact {
705 kind: "contradiction".to_string(),
706 message: format!(
707 "{} new contradiction links appeared in {}",
708 new_contradictions.len(),
709 b.project.name
710 ),
711 });
712 }
713
714 DiffResult {
715 name_a: a.project.name.clone(),
716 name_b: b.project.name.clone(),
717 findings_a: a.findings.len(),
718 findings_b: b.findings.len(),
719 only_in_a,
720 only_in_b,
721 only_in_a_reviews,
722 only_in_b_reviews,
723 only_in_a_dependencies,
724 only_in_b_dependencies,
725 semantic_pairs,
726 field_changes,
727 confidence_changes,
728 new_contradictions,
729 entities_only_in_a,
730 entities_only_in_b,
731 projections: ProjectionDiff {
732 sources: (a.sources.len(), b.sources.len()),
733 evidence_atoms: (a.evidence_atoms.len(), b.evidence_atoms.len()),
734 condition_records: (a.condition_records.len(), b.condition_records.len()),
735 },
736 proposal_state: ProposalStateDiff {
737 total: (proposal_summary_a.total, proposal_summary_b.total),
738 pending_review: (
739 proposal_summary_a.pending_review,
740 proposal_summary_b.pending_review,
741 ),
742 applied: (proposal_summary_a.applied, proposal_summary_b.applied),
743 },
744 event_log: EventLogDiff {
745 events: (event_summary_a.count, event_summary_b.count),
746 kinds_only_in_a,
747 kinds_only_in_b,
748 },
749 proof_state: ProofStateDiff {
750 status_a: a.proof_state.latest_packet.status.clone(),
751 status_b: b.proof_state.latest_packet.status.clone(),
752 stale_reason_a: a.proof_state.stale_reason.clone(),
753 stale_reason_b: b.proof_state.stale_reason.clone(),
754 },
755 review_impacts,
756 stats_comparison: StatsComparison {
757 findings: (a.stats.findings, b.stats.findings),
758 links: (a.stats.links, b.stats.links),
759 replicated: (a.stats.replicated, b.stats.replicated),
760 gaps: (a.stats.gaps, b.stats.gaps),
761 contested: (a.stats.contested, b.stats.contested),
762 review_events: (a.stats.review_event_count, b.stats.review_event_count),
763 avg_confidence: (a.stats.avg_confidence, b.stats.avg_confidence),
764 },
765 }
766}
767
768pub fn json_envelope<'a>(
769 path_a: &'a Path,
770 path_b: &'a Path,
771 diff: &'a DiffResult,
772) -> DiffJsonEnvelope<'a> {
773 DiffJsonEnvelope {
774 schema: "vela.diff.v2",
775 ok: true,
776 generated_at: chrono::Utc::now().to_rfc3339(),
777 command: "vela diff",
778 sources: DiffSources {
779 a: path_a.to_str().unwrap_or_default(),
780 b: path_b.to_str().unwrap_or_default(),
781 },
782 summary: DiffSummary {
783 findings_a: diff.findings_a,
784 findings_b: diff.findings_b,
785 only_in_a: diff.only_in_a.len(),
786 only_in_b: diff.only_in_b.len(),
787 semantic_pairs: diff.semantic_pairs.len(),
788 field_changes: diff.field_changes.len(),
789 confidence_changes: diff.confidence_changes.len(),
790 new_contradictions: diff.new_contradictions.len(),
791 review_events_only_in_a: diff.only_in_a_reviews.len(),
792 review_events_only_in_b: diff.only_in_b_reviews.len(),
793 review_impacts: diff.review_impacts.len(),
794 },
795 diff,
796 }
797}
798
799pub fn run(path_a: &Path, path_b: &Path, json: bool, quiet: bool) {
800 let a = repo::load_from_path(path_a).unwrap_or_else(|e| {
801 eprintln!(
802 "{} failed to load {}: {e}",
803 style::err_prefix(),
804 path_a.display()
805 );
806 std::process::exit(1);
807 });
808 let b = repo::load_from_path(path_b).unwrap_or_else(|e| {
809 eprintln!(
810 "{} failed to load {}: {e}",
811 style::err_prefix(),
812 path_b.display()
813 );
814 std::process::exit(1);
815 });
816
817 let diff = compare(&a, &b);
818
819 if json {
820 let envelope = json_envelope(path_a, path_b, &diff);
821 println!(
822 "{}",
823 serde_json::to_string_pretty(&envelope).expect("failed to serialize diff")
824 );
825 return;
826 }
827
828 println!();
830 println!(" {}", "VELA · DIFF".dimmed());
831 println!(
832 " {}",
833 format!(
834 "{} ({} findings) vs {} ({} findings)",
835 diff.name_a, diff.findings_a, diff.name_b, diff.findings_b
836 )
837 .bold()
838 );
839 println!(" {}", style::tick_row(60));
840
841 if quiet {
842 println!();
843 return;
844 }
845
846 println!(
848 "\n{} {} findings only in {}",
849 style::madder("---"),
850 diff.only_in_a.len(),
851 style::madder(&diff.name_a)
852 );
853 for f in diff.only_in_a.iter().take(5) {
854 println!(
855 " {} {} {}",
856 style::madder("-"),
857 f.id.dimmed(),
858 truncate(&f.assertion, 60)
859 );
860 }
861 if diff.only_in_a.len() > 5 {
862 println!(
863 " {} ... and {} more",
864 " ".dimmed(),
865 diff.only_in_a.len() - 5
866 );
867 }
868
869 println!(
871 "\n{} {} findings only in {}",
872 style::moss("+++"),
873 diff.only_in_b.len(),
874 style::moss(&diff.name_b)
875 );
876 for f in diff.only_in_b.iter().take(5) {
877 println!(
878 " {} {} {}",
879 style::moss("+"),
880 f.id.dimmed(),
881 truncate(&f.assertion, 60)
882 );
883 }
884 if diff.only_in_b.len() > 5 {
885 println!(
886 " {} ... and {} more",
887 " ".dimmed(),
888 diff.only_in_b.len() - 5
889 );
890 }
891
892 if !diff.semantic_pairs.is_empty() {
893 println!(
894 "\n{} {} likely semantic pairs with changed IDs",
895 style::signal("·"),
896 diff.semantic_pairs.len()
897 );
898 for pair in diff.semantic_pairs.iter().take(10) {
899 println!(
900 " {} · {} score {:.2} {}",
901 pair.id_a.dimmed(),
902 pair.id_b.dimmed(),
903 pair.score,
904 pair.reason
905 );
906 }
907 if diff.semantic_pairs.len() > 10 {
908 println!(" ... and {} more", diff.semantic_pairs.len() - 10);
909 }
910 }
911
912 if !diff.field_changes.is_empty() {
913 println!(
914 "\n{} {} field-level changes across paired findings",
915 style::brass("~"),
916 diff.field_changes.len()
917 );
918 for change in diff.field_changes.iter().take(10) {
919 println!(
920 " {} · {} {}",
921 change.id_a.dimmed(),
922 change.id_b.dimmed(),
923 change.field
924 );
925 }
926 if diff.field_changes.len() > 10 {
927 println!(" ... and {} more", diff.field_changes.len() - 10);
928 }
929 }
930
931 println!();
932 println!(" {}", "FRONTIER KERNEL DIFF".dimmed());
933 println!(
934 " sources: {} -> {}",
935 diff.projections.sources.0, diff.projections.sources.1
936 );
937 println!(
938 " evidence atoms: {} -> {}",
939 diff.projections.evidence_atoms.0, diff.projections.evidence_atoms.1
940 );
941 println!(
942 " condition records: {} -> {}",
943 diff.projections.condition_records.0, diff.projections.condition_records.1
944 );
945 println!(
946 " proposals: {} -> {} (pending {} -> {}, applied {} -> {})",
947 diff.proposal_state.total.0,
948 diff.proposal_state.total.1,
949 diff.proposal_state.pending_review.0,
950 diff.proposal_state.pending_review.1,
951 diff.proposal_state.applied.0,
952 diff.proposal_state.applied.1
953 );
954 println!(
955 " canonical events: {} -> {}",
956 diff.event_log.events.0, diff.event_log.events.1
957 );
958 println!(
959 " proof state: {} -> {}",
960 diff.proof_state.status_a, diff.proof_state.status_b
961 );
962 if !diff.event_log.kinds_only_in_b.is_empty() {
963 println!(
964 " new event kinds: {}",
965 diff.event_log.kinds_only_in_b.join(", ")
966 );
967 }
968
969 if !diff.review_impacts.is_empty() {
970 println!();
971 println!(" {}", "REVIEW IMPACT".dimmed());
972 for impact in diff.review_impacts.iter().take(10) {
973 println!(" · [{}] {}", impact.kind, impact.message);
974 }
975 }
976
977 if !diff.confidence_changes.is_empty() {
979 println!(
980 "\n{} {} shared findings with confidence changes",
981 style::brass("~"),
982 diff.confidence_changes.len()
983 );
984 for c in diff.confidence_changes.iter().take(10) {
985 let arrow = if c.delta > 0.0 {
986 style::moss(format!(
987 "{:.2} -> {:.2} ({:+.2})",
988 c.score_a, c.score_b, c.delta
989 ))
990 } else {
991 style::madder(format!(
992 "{:.2} -> {:.2} ({:+.2})",
993 c.score_a, c.score_b, c.delta
994 ))
995 };
996 println!(
997 " {} {} {}",
998 c.id.dimmed(),
999 arrow,
1000 truncate(&c.assertion, 40)
1001 );
1002 }
1003 if diff.confidence_changes.len() > 10 {
1004 println!(" ... and {} more", diff.confidence_changes.len() - 10);
1005 }
1006 }
1007
1008 if !diff.only_in_a_reviews.is_empty() || !diff.only_in_b_reviews.is_empty() {
1010 println!();
1011 println!(" {}", "REVIEW EVENT DIFF".dimmed());
1012 if !diff.only_in_b_reviews.is_empty() {
1013 println!(
1014 " {} new review events in {}",
1015 diff.only_in_b_reviews.len(),
1016 style::moss(&diff.name_b)
1017 );
1018 for review in diff.only_in_b_reviews.iter().take(5) {
1019 println!(
1020 " {} {} {} {}",
1021 style::moss("+"),
1022 review.id.dimmed(),
1023 review.action,
1024 truncate(&review.reason, 45)
1025 );
1026 }
1027 if diff.only_in_b_reviews.len() > 5 {
1028 println!(" ... and {} more", diff.only_in_b_reviews.len() - 5);
1029 }
1030 }
1031 if !diff.only_in_a_reviews.is_empty() {
1032 println!(
1033 " {} review events only in {}",
1034 diff.only_in_a_reviews.len(),
1035 style::madder(&diff.name_a)
1036 );
1037 for review in diff.only_in_a_reviews.iter().take(5) {
1038 println!(
1039 " {} {} {} {}",
1040 style::madder("-"),
1041 review.id.dimmed(),
1042 review.action,
1043 truncate(&review.reason, 45)
1044 );
1045 }
1046 if diff.only_in_a_reviews.len() > 5 {
1047 println!(" ... and {} more", diff.only_in_a_reviews.len() - 5);
1048 }
1049 }
1050 }
1051
1052 if !diff.only_in_a_dependencies.is_empty() || !diff.only_in_b_dependencies.is_empty() {
1054 println!();
1055 println!(" {}", "LINEAGE DIFF".dimmed());
1056 if !diff.only_in_b_dependencies.is_empty() {
1057 println!(
1058 " {} ancestry entries only in {}",
1059 diff.only_in_b_dependencies.len(),
1060 style::moss(&diff.name_b)
1061 );
1062 for dep in diff.only_in_b_dependencies.iter().take(5) {
1063 println!(
1064 " {} {} [{}]",
1065 style::moss("+"),
1066 dep.name,
1067 dep.source.dimmed()
1068 );
1069 }
1070 }
1071 if !diff.only_in_a_dependencies.is_empty() {
1072 println!(
1073 " {} ancestry entries only in {}",
1074 diff.only_in_a_dependencies.len(),
1075 style::madder(&diff.name_a)
1076 );
1077 for dep in diff.only_in_a_dependencies.iter().take(5) {
1078 println!(
1079 " {} {} [{}]",
1080 style::madder("-"),
1081 dep.name,
1082 dep.source.dimmed()
1083 );
1084 }
1085 }
1086 }
1087
1088 if !diff.new_contradictions.is_empty() {
1090 println!(
1091 "\n{} {} new contradictions in {}",
1092 style::madder("·"),
1093 diff.new_contradictions.len(),
1094 diff.name_b
1095 );
1096 for c in &diff.new_contradictions {
1097 println!(
1098 " {} · {} · {}",
1099 c.from_id.dimmed(),
1100 c.target_id.dimmed(),
1101 truncate(&c.note, 50)
1102 );
1103 }
1104 }
1105
1106 if !diff.entities_only_in_a.is_empty() || !diff.entities_only_in_b.is_empty() {
1108 println!();
1109 println!(" {}", "ENTITY COVERAGE DIFF".dimmed());
1110 if !diff.entities_only_in_b.is_empty() {
1111 println!(
1112 " {} resolved in {} but not {}:",
1113 diff.entities_only_in_b.len(),
1114 diff.name_b,
1115 diff.name_a
1116 );
1117 for e in diff.entities_only_in_b.iter().take(10) {
1118 println!(" {} {}", style::moss("+"), e);
1119 }
1120 if diff.entities_only_in_b.len() > 10 {
1121 println!(" ... and {} more", diff.entities_only_in_b.len() - 10);
1122 }
1123 }
1124 if !diff.entities_only_in_a.is_empty() {
1125 println!(
1126 " {} resolved in {} but not {}:",
1127 diff.entities_only_in_a.len(),
1128 diff.name_a,
1129 diff.name_b
1130 );
1131 for e in diff.entities_only_in_a.iter().take(10) {
1132 println!(" {} {}", style::madder("-"), e);
1133 }
1134 if diff.entities_only_in_a.len() > 10 {
1135 println!(" ... and {} more", diff.entities_only_in_a.len() - 10);
1136 }
1137 }
1138 }
1139
1140 println!();
1142 println!(" {}", "STATS COMPARISON".dimmed());
1143 let s = &diff.stats_comparison;
1144 println!(
1145 " {:<18} {:>8} {:>8}",
1146 "",
1147 diff.name_a.dimmed(),
1148 diff.name_b.dimmed()
1149 );
1150 print_stat_row("findings", s.findings.0, s.findings.1);
1151 print_stat_row("links", s.links.0, s.links.1);
1152 print_stat_row("replicated", s.replicated.0, s.replicated.1);
1153 print_stat_row("gaps", s.gaps.0, s.gaps.1);
1154 print_stat_row("contested", s.contested.0, s.contested.1);
1155 print_stat_row("review events", s.review_events.0, s.review_events.1);
1156 println!(
1157 " {:<18} {:>8.3} {:>8.3}",
1158 "avg confidence", s.avg_confidence.0, s.avg_confidence.1
1159 );
1160
1161 println!();
1162 println!(" {}", style::tick_row(60));
1163 println!();
1164}
1165
1166fn print_stat_row(label: &str, a: usize, b: usize) {
1167 let diff = b as i64 - a as i64;
1168 let delta = if diff > 0 {
1169 style::moss(format!("(+{})", diff)).to_string()
1170 } else if diff < 0 {
1171 style::madder(format!("({})", diff)).to_string()
1172 } else {
1173 String::new()
1174 };
1175 println!(" {:<18} {:>8} {:>8} {}", label, a, b, delta);
1176}
1177
1178#[cfg(test)]
1179mod tests {
1180 use super::*;
1181 use crate::bundle::*;
1182 use crate::project;
1183 use crate::sources;
1184
1185 fn make_finding(
1186 id: &str,
1187 score: f64,
1188 assertion_type: &str,
1189 replicated: bool,
1190 gap: bool,
1191 ) -> FindingBundle {
1192 FindingBundle {
1193 id: id.into(),
1194 version: 1,
1195 previous_version: None,
1196 assertion: Assertion {
1197 text: format!("Finding {id}"),
1198 assertion_type: assertion_type.into(),
1199 entities: vec![],
1200 relation: None,
1201 direction: None,
1202 causal_claim: None,
1203 causal_evidence_grade: None,
1204 },
1205 evidence: Evidence {
1206 evidence_type: "experimental".into(),
1207 model_system: String::new(),
1208 species: None,
1209 method: String::new(),
1210 sample_size: None,
1211 effect_size: None,
1212 p_value: None,
1213 replicated,
1214 replication_count: None,
1215 evidence_spans: vec![],
1216 },
1217 conditions: Conditions {
1218 text: String::new(),
1219 species_verified: vec![],
1220 species_unverified: vec![],
1221 in_vitro: false,
1222 in_vivo: false,
1223 human_data: false,
1224 clinical_trial: false,
1225 concentration_range: None,
1226 duration: None,
1227 age_group: None,
1228 cell_type: None,
1229 },
1230 confidence: Confidence::raw(score, "seeded prior", 0.85),
1231 provenance: Provenance {
1232 source_type: "published_paper".into(),
1233 doi: None,
1234 pmid: None,
1235 pmc: None,
1236 openalex_id: None,
1237 url: None,
1238 title: "Test".into(),
1239 authors: vec![],
1240 year: Some(2024),
1241 journal: None,
1242 license: None,
1243 publisher: None,
1244 funders: vec![],
1245 extraction: Extraction::default(),
1246 review: None,
1247 citation_count: None,
1248 },
1249 flags: Flags {
1250 gap,
1251 negative_space: false,
1252 contested: false,
1253 retracted: false,
1254 declining: false,
1255 gravity_well: false,
1256 review_state: None,
1257 superseded: false,
1258 signature_threshold: None,
1259 jointly_accepted: false,
1260 },
1261 links: vec![],
1262 annotations: vec![],
1263 attachments: vec![],
1264 created: String::new(),
1265 updated: None,
1266
1267 access_tier: crate::access_tier::AccessTier::Public,
1268 }
1269 }
1270
1271 fn make_frontier(name: &str, findings: Vec<FindingBundle>) -> Project {
1272 project::assemble(name, findings, 0, 0, "test")
1273 }
1274
1275 fn make_review_event(id: &str, finding_id: &str, reason: &str) -> ReviewEvent {
1276 ReviewEvent {
1277 id: id.into(),
1278 workspace: None,
1279 finding_id: finding_id.into(),
1280 reviewer: "reviewer:test".into(),
1281 reviewed_at: "2026-01-01T00:00:00Z".into(),
1282 scope: None,
1283 status: Some("accepted".into()),
1284 action: ReviewAction::Approved,
1285 reason: reason.into(),
1286 evidence_considered: Vec::new(),
1287 state_change: None,
1288 }
1289 }
1290
1291 #[test]
1292 fn identical_frontiers_have_no_diff() {
1293 let findings = vec![
1294 make_finding("f1", 0.8, "mechanism", false, false),
1295 make_finding("f2", 0.7, "therapeutic", true, false),
1296 ];
1297 let a = make_frontier("A", findings.clone());
1298 let b = make_frontier("B", findings);
1299 let d = compare(&a, &b);
1300 assert!(d.only_in_a.is_empty());
1301 assert!(d.only_in_b.is_empty());
1302 assert!(d.confidence_changes.is_empty());
1303 }
1304
1305 #[test]
1306 fn detects_findings_only_in_a() {
1307 let a = make_frontier(
1308 "A",
1309 vec![
1310 make_finding("f1", 0.8, "mechanism", false, false),
1311 make_finding("f2", 0.7, "therapeutic", true, false),
1312 ],
1313 );
1314 let b = make_frontier(
1315 "B",
1316 vec![make_finding("f1", 0.8, "mechanism", false, false)],
1317 );
1318 let d = compare(&a, &b);
1319 assert_eq!(d.only_in_a.len(), 1);
1320 assert_eq!(d.only_in_a[0].id, "f2");
1321 assert!(d.only_in_b.is_empty());
1322 }
1323
1324 #[test]
1325 fn detects_confidence_changes() {
1326 let a = make_frontier(
1327 "A",
1328 vec![make_finding("f1", 0.8, "mechanism", false, false)],
1329 );
1330 let b = make_frontier(
1331 "B",
1332 vec![make_finding("f1", 0.6, "mechanism", false, false)],
1333 );
1334 let d = compare(&a, &b);
1335 assert_eq!(d.confidence_changes.len(), 1);
1336 assert!((d.confidence_changes[0].delta - (-0.2)).abs() < 1e-6);
1337 }
1338
1339 #[test]
1340 fn pairs_semantically_similar_changed_ids_and_fields() {
1341 let mut a_finding = make_finding("vf_old", 0.8, "mechanism", false, false);
1342 a_finding.assertion.text =
1343 "LRP1 mediates amyloid beta clearance at the blood brain barrier".into();
1344 a_finding.conditions.text = "human BBB context".into();
1345 a_finding.provenance.doi = Some("10.1234/test".into());
1346 let mut b_finding = make_finding("vf_new", 0.9, "mechanism", false, false);
1347 b_finding.assertion.text =
1348 "LRP1 mediates amyloid beta clearance at the blood brain barrier".into();
1349 b_finding.conditions.text = "human BBB context".into();
1350 b_finding.provenance.doi = Some("10.1234/test".into());
1351
1352 let a = make_frontier("A", vec![a_finding]);
1353 let b = make_frontier("B", vec![b_finding]);
1354 let d = compare(&a, &b);
1355 assert_eq!(d.semantic_pairs.len(), 1);
1356 assert_eq!(d.semantic_pairs[0].id_a, "vf_old");
1357 assert_eq!(d.semantic_pairs[0].id_b, "vf_new");
1358 assert!(
1359 d.field_changes
1360 .iter()
1361 .any(|c| c.field == "confidence.score")
1362 );
1363 }
1364
1365 #[test]
1366 fn detects_new_contradictions() {
1367 let mut fb = make_finding("f1", 0.8, "mechanism", false, false);
1368 fb.add_link("f2", "contradicts", "opposite direction");
1369 let a = make_frontier(
1370 "A",
1371 vec![make_finding("f1", 0.8, "mechanism", false, false)],
1372 );
1373 let b = make_frontier("B", vec![fb]);
1374 let d = compare(&a, &b);
1375 assert_eq!(d.new_contradictions.len(), 1);
1376 }
1377
1378 #[test]
1379 fn detects_review_events_only_in_b() {
1380 let mut a = make_frontier(
1381 "A",
1382 vec![make_finding("f1", 0.8, "mechanism", false, false)],
1383 );
1384 let mut b = make_frontier(
1385 "B",
1386 vec![make_finding("f1", 0.8, "mechanism", false, false)],
1387 );
1388 a.review_events
1389 .push(make_review_event("rev_a", "f1", "existing local review"));
1390 a.stats.review_event_count = a.review_events.len();
1391 b.review_events
1392 .push(make_review_event("rev_a", "f1", "existing local review"));
1393 b.review_events
1394 .push(make_review_event("rev_b", "f1", "imported external review"));
1395 b.stats.review_event_count = b.review_events.len();
1396
1397 let d = compare(&a, &b);
1398 assert_eq!(d.only_in_b_reviews.len(), 1);
1399 assert_eq!(d.only_in_b_reviews[0].id, "rev_b");
1400 assert_eq!(d.stats_comparison.review_events, (1, 2));
1401 }
1402
1403 #[test]
1404 fn stats_comparison_correct() {
1405 let a = make_frontier(
1406 "A",
1407 vec![
1408 make_finding("f1", 0.8, "mechanism", true, false),
1409 make_finding("f2", 0.7, "mechanism", false, true),
1410 ],
1411 );
1412 let b = make_frontier(
1413 "B",
1414 vec![
1415 make_finding("f1", 0.8, "mechanism", true, false),
1416 make_finding("f2", 0.7, "mechanism", false, true),
1417 make_finding("f3", 0.9, "therapeutic", true, false),
1418 ],
1419 );
1420 let d = compare(&a, &b);
1421 assert_eq!(d.stats_comparison.findings, (2, 3));
1422 assert_eq!(d.stats_comparison.replicated, (1, 2));
1423 assert_eq!(d.stats_comparison.gaps, (1, 1));
1424 }
1425
1426 #[test]
1427 fn diff_reports_frontier_kernel_state() {
1428 let mut a = make_frontier(
1429 "A",
1430 vec![make_finding("f1", 0.8, "mechanism", false, false)],
1431 );
1432 let mut b = make_frontier(
1433 "B",
1434 vec![make_finding("f1", 0.8, "mechanism", false, false)],
1435 );
1436 sources::materialize_project(&mut a);
1437 sources::materialize_project(&mut b);
1438 b.proof_state.latest_packet.status = "stale".into();
1439 b.proof_state.stale_reason = Some("new accepted proposal".into());
1440
1441 let d = compare(&a, &b);
1442 assert_eq!(d.proof_state.status_b, "stale");
1443 assert!(
1444 d.review_impacts
1445 .iter()
1446 .any(|impact| impact.kind == "proof_state")
1447 );
1448 }
1449}