Skip to main content

vela_protocol/
signals.rs

1//! Derived frontier signals.
2//!
3//! Signals are read-only projections over frontier state. They are not a second
4//! source of truth and are intentionally safe to recompute from the frontier,
5//! diagnostics, proof traces, or benchmark output.
6
7#![allow(clippy::module_name_repetitions)]
8
9use std::collections::{BTreeMap, BTreeSet};
10
11use serde::{Deserialize, Serialize};
12use serde_json::{Value, json};
13
14use crate::project::{self, Project};
15use crate::proposals;
16use crate::sources;
17
18#[derive(Debug, Clone, Serialize, Deserialize, PartialEq, Eq)]
19pub struct SignalTarget {
20    pub r#type: String,
21    pub id: String,
22}
23
24#[derive(Debug, Clone, Serialize, Deserialize, PartialEq, Eq)]
25pub struct SignalItem {
26    pub id: String,
27    pub kind: String,
28    pub severity: String,
29    pub target: SignalTarget,
30    pub reason: String,
31    pub recommended_action: String,
32    pub blocks: Vec<String>,
33    pub caveats: Vec<String>,
34}
35
36#[derive(Debug, Clone, Serialize, Deserialize, PartialEq, Eq)]
37pub struct ReviewQueueItem {
38    pub id: String,
39    pub priority: String,
40    pub priority_score: u32,
41    pub target: SignalTarget,
42    pub signal_ids: Vec<String>,
43    pub reasons: Vec<String>,
44    pub recommended_action: String,
45}
46
47#[derive(Debug, Clone, Serialize, Deserialize, PartialEq, Eq)]
48pub struct ProofReadiness {
49    pub status: String,
50    pub blockers: usize,
51    pub warnings: usize,
52    pub caveats: Vec<String>,
53}
54
55#[derive(Debug, Clone, Serialize, Deserialize)]
56pub struct SignalReport {
57    pub schema: String,
58    pub frontier: String,
59    pub signals: Vec<SignalItem>,
60    pub review_queue: Vec<ReviewQueueItem>,
61    pub proof_readiness: ProofReadiness,
62}
63
64pub fn analyze(frontier: &Project, diagnostics: &[Value]) -> SignalReport {
65    let mut signals = Vec::new();
66
67    for diagnostic in diagnostics {
68        let severity = diagnostic
69            .get("severity")
70            .and_then(Value::as_str)
71            .unwrap_or("info");
72        let rule_id = diagnostic
73            .get("rule_id")
74            .and_then(Value::as_str)
75            .unwrap_or("check_error");
76        if severity == "error"
77            || matches!(
78                rule_id,
79                "missing_source_record"
80                    | "missing_evidence_atom"
81                    | "missing_evidence_locator"
82                    | "condition_record_missing"
83            )
84        {
85            let id = format!("sig_diagnostic_{}", signals.len() + 1);
86            signals.push(SignalItem {
87                id,
88                kind: match rule_id {
89                    "event_replay" => "event_replay_conflict",
90                    "missing_source_record" => "missing_source_record",
91                    "missing_evidence_atom" => "missing_evidence_atom",
92                    "missing_evidence_locator" => "missing_evidence_locator",
93                    "condition_record_missing" => "condition_record_missing",
94                    "reviewer_identity_missing" => "reviewer_identity_missing",
95                    _ => "check_error",
96                }
97                .to_string(),
98                severity: severity.to_string(),
99                target: SignalTarget {
100                    r#type: diagnostic
101                        .get("finding_id")
102                        .and_then(Value::as_str)
103                        .map_or("frontier", |_| "finding")
104                        .to_string(),
105                    id: diagnostic
106                        .get("finding_id")
107                        .and_then(Value::as_str)
108                        .unwrap_or(&frontier.project.name)
109                        .to_string(),
110                },
111                reason: diagnostic
112                    .get("message")
113                    .and_then(Value::as_str)
114                    .unwrap_or("Frontier validation error.")
115                    .to_string(),
116                recommended_action: diagnostic
117                    .get("suggestion")
118                    .and_then(Value::as_str)
119                    .unwrap_or("Inspect and correct the referenced frontier field.")
120                    .to_string(),
121                blocks: if rule_id == "missing_evidence_locator" {
122                    vec!["proof_ready".to_string()]
123                } else {
124                    vec!["strict_check".to_string(), "proof_ready".to_string()]
125                },
126                caveats: vec![],
127            });
128        }
129    }
130
131    let projection = sources::derive_projection(frontier);
132    let source_by_id = projection
133        .sources
134        .iter()
135        .map(|source| (source.id.as_str(), source))
136        .collect::<BTreeMap<_, _>>();
137    let reviewed_finding_ids = frontier
138        .events
139        .iter()
140        .filter(|event| {
141            event.target.r#type == "finding"
142                && event.actor.id.starts_with("reviewer:")
143                && matches!(
144                    event.kind.as_str(),
145                    "finding.asserted" | "finding.reviewed" | "finding.caveated"
146                )
147        })
148        .map(|event| event.target.id.as_str())
149        .collect::<BTreeSet<_>>();
150
151    for source in &projection.sources {
152        if source.content_hash.is_none()
153            && matches!(
154                source.source_type.as_str(),
155                "pdf"
156                    | "jats"
157                    | "csv"
158                    | "text"
159                    | "note"
160                    | "agent_trace"
161                    | "benchmark_output"
162                    | "notebook_entry"
163                    | "experiment_log"
164                    | "synthetic_report"
165            )
166        {
167            signals.push(SignalItem {
168                id: signal_id("source_hash_missing", &source.id),
169                kind: "source_hash_missing".to_string(),
170                severity: "info".to_string(),
171                target: SignalTarget {
172                    r#type: "source".to_string(),
173                    id: source.id.clone(),
174                },
175                reason: "Source record has no content hash for a local or generated artifact."
176                    .to_string(),
177                recommended_action:
178                    "Recompile from the local corpus or add a source content hash before relying on this source."
179                        .to_string(),
180                blocks: vec![],
181                caveats: vec!["Source identity and scientific confidence are separate.".to_string()],
182            });
183        }
184
185        if source.source_type == "agent_trace" {
186            signals.push(SignalItem {
187                id: signal_id("agent_trace_unverified", &source.id),
188                kind: "agent_trace_unverified".to_string(),
189                severity: "warning".to_string(),
190                target: SignalTarget {
191                    r#type: "source".to_string(),
192                    id: source.id.clone(),
193                },
194                reason: "Agent trace source requires review before it can support active frontier state."
195                    .to_string(),
196                recommended_action:
197                    "Verify the trace against primary evidence and add review before proof use."
198                        .to_string(),
199                blocks: vec!["strict_check".to_string(), "proof_ready".to_string()],
200                caveats: vec!["Agent traces are source artifacts, not scientific truth.".to_string()],
201            });
202        }
203
204        if source.source_type == "synthetic_report"
205            && !source
206                .finding_ids
207                .iter()
208                .any(|finding_id| reviewed_finding_ids.contains(finding_id.as_str()))
209        {
210            signals.push(SignalItem {
211                id: signal_id("synthetic_source_requires_review", &source.id),
212                kind: "synthetic_source_requires_review".to_string(),
213                severity: "warning".to_string(),
214                target: SignalTarget {
215                    r#type: "source".to_string(),
216                    id: source.id.clone(),
217                },
218                reason: "Synthetic report source requires human review and primary-source grounding."
219                    .to_string(),
220                recommended_action:
221                    "Use synthetic reports as review leads unless evidence atoms trace back to primary sources."
222                        .to_string(),
223                blocks: vec!["strict_check".to_string(), "proof_ready".to_string()],
224                caveats: vec!["Synthetic sources should not silently become evidence.".to_string()],
225            });
226        }
227    }
228
229    for atom in &projection.evidence_atoms {
230        if atom.locator.is_none() {
231            signals.push(SignalItem {
232                id: signal_id("missing_evidence_locator", &atom.id),
233                kind: "missing_evidence_locator".to_string(),
234                severity: "warning".to_string(),
235                target: SignalTarget {
236                    r#type: "finding".to_string(),
237                    id: atom.finding_id.clone(),
238                },
239                reason:
240                    "Evidence atom lacks a span, table row, page, section, run, or metric locator."
241                        .to_string(),
242                recommended_action:
243                    "Verify the exact source location or keep this as a weak review lead."
244                        .to_string(),
245                blocks: vec!["proof_ready".to_string()],
246                caveats: vec![
247                    "A source citation is weaker than a located evidence atom.".to_string(),
248                ],
249            });
250        }
251
252        if !atom.human_verified
253            && source_by_id
254                .get(atom.source_id.as_str())
255                .is_some_and(|source| sources::is_synthetic_source(source))
256            && !reviewed_finding_ids.contains(atom.finding_id.as_str())
257        {
258            signals.push(SignalItem {
259                id: signal_id("synthetic_source_requires_review", &atom.id),
260                kind: "synthetic_source_requires_review".to_string(),
261                severity: "warning".to_string(),
262                target: SignalTarget {
263                    r#type: "finding".to_string(),
264                    id: atom.finding_id.clone(),
265                },
266                reason: "Evidence atom is linked to an unverified synthetic or agent source."
267                    .to_string(),
268                recommended_action:
269                    "Attach primary evidence or review the atom before proof export.".to_string(),
270                blocks: vec!["strict_check".to_string(), "proof_ready".to_string()],
271                caveats: vec![
272                    "Generated traces can guide review but are not trusted evidence.".to_string(),
273                ],
274            });
275        }
276    }
277
278    for record in &projection.condition_records {
279        if record.text.trim().is_empty() {
280            signals.push(SignalItem {
281                id: signal_id("missing_conditions", &record.id),
282                kind: "missing_conditions".to_string(),
283                severity: "warning".to_string(),
284                target: SignalTarget {
285                    r#type: "finding".to_string(),
286                    id: record.finding_id.clone(),
287                },
288                reason: "Finding has no declared condition boundary.".to_string(),
289                recommended_action:
290                    "Add the species, model system, assay, comparator, endpoint, or scope that bounds the finding."
291                        .to_string(),
292                blocks: vec!["strict_check".to_string(), "proof_ready".to_string()],
293                caveats: vec!["A finding without conditions is incomplete frontier state.".to_string()],
294            });
295        }
296
297        if record.comparator_status == "missing_or_unclear"
298            && (record.exposure_or_efficacy == "efficacy" || record.exposure_or_efficacy == "both")
299        {
300            signals.push(SignalItem {
301                id: signal_id("missing_comparator", &record.id),
302                kind: "missing_comparator".to_string(),
303                severity: "info".to_string(),
304                target: SignalTarget {
305                    r#type: "finding".to_string(),
306                    id: record.finding_id.clone(),
307                },
308                reason: "Condition record does not declare a comparator or baseline.".to_string(),
309                recommended_action:
310                    "Review whether the evidence supports the asserted direction without a declared comparator."
311                        .to_string(),
312                blocks: vec![],
313                caveats: vec![
314                    "Comparator absence is a review signal, not automatic disproof.".to_string(),
315                ],
316            });
317        }
318
319        if record.exposure_or_efficacy == "both" {
320            signals.push(SignalItem {
321                id: signal_id("exposure_efficacy_overgeneralization", &record.id),
322                kind: "condition_loss_risk".to_string(),
323                severity: "info".to_string(),
324                target: SignalTarget {
325                    r#type: "finding".to_string(),
326                    id: record.finding_id.clone(),
327                },
328                reason: "Exposure and efficacy language appear in the same condition boundary."
329                    .to_string(),
330                recommended_action:
331                    "Keep exposure, functional delivery, and therapeutic efficacy separate unless the source directly supports the broader claim."
332                        .to_string(),
333                blocks: vec![],
334                caveats: vec![
335                    "Vela flags possible overgeneralization; reviewers decide the final scope."
336                        .to_string(),
337                ],
338            });
339        }
340
341        if record.translation_scope == "animal_model"
342            && record
343                .caveats
344                .iter()
345                .any(|caveat| caveat.contains("human translation"))
346        {
347            signals.push(SignalItem {
348                id: signal_id("mouse_human_translation_risk", &record.id),
349                kind: "condition_loss_risk".to_string(),
350                severity: "info".to_string(),
351                target: SignalTarget {
352                    r#type: "finding".to_string(),
353                    id: record.finding_id.clone(),
354                },
355                reason: "Animal-model evidence is adjacent to human translation language."
356                    .to_string(),
357                recommended_action:
358                    "Preserve the animal-model scope unless human data are explicitly attached."
359                        .to_string(),
360                blocks: vec![],
361                caveats: vec![
362                    "Mouse or animal evidence should not silently become a human claim."
363                        .to_string(),
364                ],
365            });
366        }
367    }
368
369    // Build a set of finding IDs that have at least one evidence atom
370    // attached. Used by the source-grounding doctrine invariant below.
371    let evidence_grounded: BTreeSet<&str> = projection
372        .evidence_atoms
373        .iter()
374        .map(|atom| atom.finding_id.as_str())
375        .collect();
376
377    for finding in &frontier.findings {
378        if finding.provenance.doi.is_none()
379            && finding.provenance.pmid.is_none()
380            && finding.provenance.title.trim().is_empty()
381        {
382            signals.push(SignalItem {
383                id: signal_id("weak_provenance", &finding.id),
384                kind: "weak_provenance".to_string(),
385                severity: "warning".to_string(),
386                target: SignalTarget {
387                    r#type: "finding".to_string(),
388                    id: finding.id.clone(),
389                },
390                reason: "Finding lacks DOI, PMID, and source title fallback.".to_string(),
391                recommended_action:
392                    "Add source metadata or mark the finding as unresolved before proof export."
393                        .to_string(),
394                blocks: vec!["strict_check".to_string(), "proof_ready".to_string()],
395                caveats: vec!["Provenance is separate from confidence.".to_string()],
396            });
397        }
398
399        // Doctrine line 3: a finding without conditions is incomplete.
400        // Strict check blocker when both conditions.text is empty AND no
401        // scope flag is set, AND the finding is not theoretical (theoretical
402        // findings can be scope-free by nature).
403        let scope_declared = finding.conditions.in_vivo
404            || finding.conditions.in_vitro
405            || finding.conditions.human_data
406            || finding.conditions.clinical_trial;
407        if finding.conditions.text.trim().is_empty()
408            && !scope_declared
409            && finding.assertion.assertion_type != "theoretical"
410            && !finding.flags.retracted
411        {
412            signals.push(SignalItem {
413                id: signal_id("conditions_undeclared", &finding.id),
414                kind: "conditions_undeclared".to_string(),
415                severity: "error".to_string(),
416                target: SignalTarget {
417                    r#type: "finding".to_string(),
418                    id: finding.id.clone(),
419                },
420                reason:
421                    "Finding has no condition text and no scope flag (in_vivo/in_vitro/human_data/clinical_trial)."
422                        .to_string(),
423                recommended_action:
424                    "Declare at least one scope flag and condition text, or mark the finding theoretical."
425                        .to_string(),
426                blocks: vec!["strict_check".to_string(), "proof_ready".to_string()],
427                caveats: vec![
428                    "A finding without conditions is doctrinally incomplete state."
429                        .to_string(),
430                ],
431            });
432        }
433
434        // Doctrine line 4: a result without provenance is not evidence.
435        // Strict-check blocker when an active finding has no evidence atom.
436        if !finding.flags.retracted && !evidence_grounded.contains(finding.id.as_str()) {
437            signals.push(SignalItem {
438                id: signal_id("evidence_atom_missing", &finding.id),
439                kind: "evidence_atom_missing".to_string(),
440                severity: "error".to_string(),
441                target: SignalTarget {
442                    r#type: "finding".to_string(),
443                    id: finding.id.clone(),
444                },
445                reason:
446                    "Active finding has no materialized evidence atom in the source-evidence map."
447                        .to_string(),
448                recommended_action:
449                    "Run `vela normalize` to materialize evidence atoms, or attach explicit evidence spans."
450                        .to_string(),
451                blocks: vec!["strict_check".to_string(), "proof_ready".to_string()],
452                caveats: vec![
453                    "A citation alone is not evidence in the v0.3 substrate.".to_string(),
454                ],
455            });
456        }
457
458        // Doctrine line 5: an agent trace is not truth.
459        // Strict-check blocker when source_type implies the claim came from
460        // a non-peer-reviewed source (model_output, expert_assertion,
461        // agent_trace) AND the finding has not been reviewed.
462        let agent_typed = matches!(
463            finding.provenance.source_type.as_str(),
464            "model_output" | "expert_assertion" | "agent_trace"
465        );
466        let has_review = finding
467            .provenance
468            .review
469            .as_ref()
470            .is_some_and(|r| r.reviewed)
471            || finding.flags.review_state.is_some()
472            || reviewed_finding_ids.contains(finding.id.as_str());
473        if agent_typed && !has_review && !finding.flags.gap && !finding.flags.retracted {
474            signals.push(SignalItem {
475                id: signal_id("agent_typed_unreviewed", &finding.id),
476                kind: "agent_typed_unreviewed".to_string(),
477                severity: "warning".to_string(),
478                target: SignalTarget {
479                    r#type: "finding".to_string(),
480                    id: finding.id.clone(),
481                },
482                reason: format!(
483                    "Source type '{}' requires explicit review before strict acceptance.",
484                    finding.provenance.source_type
485                ),
486                recommended_action:
487                    "Run `vela review --apply` against this finding or flag it as gap before strict use."
488                        .to_string(),
489                blocks: vec!["strict_check".to_string(), "proof_ready".to_string()],
490                caveats: vec![
491                    "Agent traces, expert assertions, and model outputs are sources, not truth."
492                        .to_string(),
493                ],
494            });
495        }
496
497        if finding.evidence.evidence_spans.is_empty() {
498            signals.push(SignalItem {
499                id: signal_id("missing_evidence_span", &finding.id),
500                kind: "missing_evidence_span".to_string(),
501                severity: "warning".to_string(),
502                target: SignalTarget {
503                    r#type: "finding".to_string(),
504                    id: finding.id.clone(),
505                },
506                reason: "Finding has no verified evidence span attached.".to_string(),
507                recommended_action:
508                    "Verify the assertion against source text and add evidence spans where possible."
509                        .to_string(),
510                blocks: vec!["proof_ready".to_string()],
511                caveats: vec!["Missing spans do not imply the assertion is false.".to_string()],
512            });
513        }
514
515        if finding.conditions.text.trim().is_empty() {
516            signals.push(SignalItem {
517                id: signal_id("missing_conditions", &finding.id),
518                kind: "missing_conditions".to_string(),
519                severity: "warning".to_string(),
520                target: SignalTarget {
521                    r#type: "finding".to_string(),
522                    id: finding.id.clone(),
523                },
524                reason: "Finding has no explicit condition boundary.".to_string(),
525                recommended_action:
526                    "Add species, model system, assay, regimen, population, or scope conditions."
527                        .to_string(),
528                blocks: vec!["proof_ready".to_string()],
529                caveats: vec![
530                    "Condition loss is a common source of overgeneralized scientific claims."
531                        .to_string(),
532                ],
533            });
534        }
535
536        if finding.conditions.text.trim().is_empty()
537            && contains_condition_sensitive_claim(&finding.assertion.text)
538        {
539            signals.push(SignalItem {
540                id: signal_id("condition_loss_risk", &finding.id),
541                kind: "condition_loss_risk".to_string(),
542                severity: "warning".to_string(),
543                target: SignalTarget {
544                    r#type: "finding".to_string(),
545                    id: finding.id.clone(),
546                },
547                reason: "Finding uses condition-sensitive language without explicit condition boundaries."
548                    .to_string(),
549                recommended_action:
550                    "Separate exposure, efficacy, species, assay, payload, endpoint, and translation scope."
551                        .to_string(),
552                blocks: vec!["strict_check".to_string(), "proof_ready".to_string()],
553                caveats: vec![
554                    "Vela should flag overgeneralization rather than smooth it into a summary."
555                        .to_string(),
556                ],
557            });
558        }
559
560        if finding
561            .assertion
562            .entities
563            .iter()
564            .any(|entity| entity.needs_review)
565        {
566            signals.push(SignalItem {
567                id: signal_id("needs_human_review", &finding.id),
568                kind: "needs_human_review".to_string(),
569                severity: "warning".to_string(),
570                target: SignalTarget {
571                    r#type: "finding".to_string(),
572                    id: finding.id.clone(),
573                },
574                reason: "Finding contains unresolved or low-confidence entity resolution."
575                    .to_string(),
576                recommended_action:
577                    "Review entity names, types, identifiers, and source grounding before proof use."
578                        .to_string(),
579                blocks: vec!["strict_check".to_string(), "proof_ready".to_string()],
580                caveats: vec!["Entity review status is separate from assertion confidence.".to_string()],
581            });
582        }
583
584        if finding.provenance.extraction.method.contains("fallback")
585            || finding.provenance.extraction.method.contains("rough")
586            || finding.provenance.extraction.method.contains("abstract")
587        {
588            signals.push(SignalItem {
589                id: signal_id("rough_source_extraction", &finding.id),
590                kind: "rough_source_extraction".to_string(),
591                severity: "warning".to_string(),
592                target: SignalTarget {
593                    r#type: "finding".to_string(),
594                    id: finding.id.clone(),
595                },
596                reason: format!(
597                    "Finding was produced by extraction mode '{}'.",
598                    finding.provenance.extraction.method
599                ),
600                recommended_action:
601                    "Inspect the source text and mark caveats or review status before treating this as durable state."
602                        .to_string(),
603                blocks: vec!["proof_ready".to_string()],
604                caveats: vec![
605                    "Rough extraction can be useful as a review lead, not as a scientific conclusion."
606                        .to_string(),
607                ],
608            });
609        }
610
611        if matches!(
612            finding.provenance.source_type.as_str(),
613            "model_output" | "summary" | "synthesis"
614        ) && !reviewed_finding_ids.contains(finding.id.as_str())
615        {
616            signals.push(SignalItem {
617                id: signal_id("synthesis_used_as_source", &finding.id),
618                kind: "synthesis_used_as_source".to_string(),
619                severity: "warning".to_string(),
620                target: SignalTarget {
621                    r#type: "finding".to_string(),
622                    id: finding.id.clone(),
623                },
624                reason: "Finding provenance indicates synthesized text or model output as source."
625                    .to_string(),
626                recommended_action:
627                    "Trace this finding back to primary source evidence or mark it as a review lead."
628                        .to_string(),
629                blocks: vec!["strict_check".to_string(), "proof_ready".to_string()],
630                caveats: vec![
631                    "Derived synthesis should not silently become primary evidence.".to_string(),
632                ],
633            });
634        }
635
636        if finding.flags.contested && finding.confidence.score >= 0.8 {
637            signals.push(SignalItem {
638                id: signal_id("contested_high_confidence", &finding.id),
639                kind: "contested_high_confidence".to_string(),
640                severity: "warning".to_string(),
641                target: SignalTarget {
642                    r#type: "finding".to_string(),
643                    id: finding.id.clone(),
644                },
645                reason: "Finding is contested while carrying high confidence.".to_string(),
646                recommended_action:
647                    "Review contradiction links, provenance, and confidence components."
648                        .to_string(),
649                blocks: vec!["proof_ready".to_string()],
650                caveats: vec![
651                    "Candidate tensions are review surfaces, not definitive contradictions."
652                        .to_string(),
653                ],
654            });
655        }
656    }
657
658    // Phase N (v0.4): provenance authority. `Project.sources` is
659    // canonical; `FindingBundle.provenance` is a denormalized cache.
660    // Drift between the two is a strict-mode failure — the source
661    // record wins, and the finding must be rewritten via
662    // `vela normalize --resync-provenance`.
663    let mut by_doi: BTreeMap<String, &crate::sources::SourceRecord> = BTreeMap::new();
664    let mut by_pmid: BTreeMap<String, &crate::sources::SourceRecord> = BTreeMap::new();
665    let mut duplicate_dois: BTreeSet<String> = BTreeSet::new();
666    let mut duplicate_pmids: BTreeSet<String> = BTreeSet::new();
667    for source in &frontier.sources {
668        if let Some(doi) = source.doi.as_deref() {
669            let key = doi.to_lowercase();
670            if by_doi.insert(key.clone(), source).is_some() {
671                duplicate_dois.insert(key);
672            }
673        }
674        if let Some(pmid) = source.pmid.as_deref() {
675            let key = pmid.to_string();
676            if by_pmid.insert(key.clone(), source).is_some() {
677                duplicate_pmids.insert(key);
678            }
679        }
680    }
681    for key in &duplicate_dois {
682        by_doi.remove(key);
683    }
684    for key in &duplicate_pmids {
685        by_pmid.remove(key);
686    }
687    for finding in &frontier.findings {
688        if finding.flags.retracted {
689            continue;
690        }
691        let source = finding
692            .provenance
693            .doi
694            .as_deref()
695            .map(str::to_lowercase)
696            .and_then(|k| by_doi.get(&k).copied())
697            .or_else(|| {
698                finding
699                    .provenance
700                    .pmid
701                    .as_deref()
702                    .and_then(|k| by_pmid.get(k).copied())
703            });
704        let Some(source) = source else { continue };
705
706        let mut diffs: Vec<String> = Vec::new();
707        if !source.title.is_empty() && source.title != finding.provenance.title {
708            diffs.push(format!(
709                "title differs (source='{}', cached='{}')",
710                truncate(&source.title, 60),
711                truncate(&finding.provenance.title, 60)
712            ));
713        }
714        if source.year.is_some() && source.year != finding.provenance.year {
715            diffs.push(format!(
716                "year differs (source={:?}, cached={:?})",
717                source.year, finding.provenance.year
718            ));
719        }
720        if !diffs.is_empty() {
721            signals.push(SignalItem {
722                id: signal_id("provenance_drift", &finding.id),
723                kind: "provenance_drift".to_string(),
724                severity: "error".to_string(),
725                target: SignalTarget {
726                    r#type: "finding".to_string(),
727                    id: finding.id.clone(),
728                },
729                reason: format!(
730                    "Cached finding.provenance disagrees with canonical source: {}",
731                    diffs.join("; ")
732                ),
733                recommended_action:
734                    "Run `vela normalize --resync-provenance --write` to regenerate finding.provenance from the canonical SourceRecord."
735                        .to_string(),
736                blocks: vec!["strict_check".to_string(), "proof_ready".to_string()],
737                caveats: vec![
738                    "sources is the authority; provenance is the cache."
739                        .to_string(),
740                ],
741            });
742        }
743    }
744
745    // Phase M (v0.4): registered actors must sign their canonical
746    // events. Once an actor.id appears in `frontier.actors`, every
747    // canonical event referencing that actor.id MUST carry a signature
748    // that verifies against the registered public key. Unregistered
749    // actor.ids fall back to the legacy placeholder-rejection rule.
750    if !frontier.actors.is_empty() {
751        let registry: BTreeMap<&str, &str> = frontier
752            .actors
753            .iter()
754            .map(|actor| (actor.id.as_str(), actor.public_key.as_str()))
755            .collect();
756        for event in &frontier.events {
757            if event.actor.r#type != "human" {
758                continue;
759            }
760            let Some(pubkey) = registry.get(event.actor.id.as_str()) else {
761                continue;
762            };
763            let invalid = match event.signature.as_deref() {
764                None => Some("missing".to_string()),
765                Some(_) => match crate::sign::verify_event_signature(event, pubkey) {
766                    Ok(true) => None,
767                    Ok(false) => Some("does not verify".to_string()),
768                    Err(err) => Some(err),
769                },
770            };
771            if let Some(reason) = invalid {
772                signals.push(SignalItem {
773                    id: signal_id("unsigned_registered_actor", &event.id),
774                    kind: "unsigned_registered_actor".to_string(),
775                    severity: "error".to_string(),
776                    target: SignalTarget {
777                        r#type: "event".to_string(),
778                        id: event.id.clone(),
779                    },
780                    reason: format!(
781                        "Event {} from registered actor '{}' has invalid signature: {reason}.",
782                        event.id, event.actor.id
783                    ),
784                    recommended_action:
785                        "Sign the event with the registered Ed25519 key before strict acceptance."
786                            .to_string(),
787                    blocks: vec!["strict_check".to_string(), "proof_ready".to_string()],
788                    caveats: vec![
789                        "Registered actors are bound to their public key; unsigned writes break that binding."
790                            .to_string(),
791                    ],
792                });
793            }
794        }
795    }
796
797    let proposal_summary = proposals::summary(frontier);
798    for duplicate in &proposal_summary.duplicate_ids {
799        signals.push(SignalItem {
800            id: signal_id("proposal_conflict", duplicate),
801            kind: "proposal_conflict".to_string(),
802            severity: "error".to_string(),
803            target: SignalTarget {
804                r#type: "frontier".to_string(),
805                id: frontier.project.name.clone(),
806            },
807            reason: format!("Duplicate proposal id detected: {duplicate}."),
808            recommended_action: "Remove or rename the duplicate proposal before applying writes."
809                .to_string(),
810            blocks: vec!["strict_check".to_string(), "proof_ready".to_string()],
811            caveats: vec![],
812        });
813    }
814    for target in &proposal_summary.invalid_targets {
815        signals.push(SignalItem {
816            id: signal_id("proposal_conflict", target),
817            kind: "proposal_conflict".to_string(),
818            severity: "error".to_string(),
819            target: SignalTarget {
820                r#type: "finding".to_string(),
821                id: target.clone(),
822            },
823            reason: format!("Proposal target does not exist in frontier state: {target}."),
824            recommended_action:
825                "Fix the proposal target or remove the orphan proposal before applying writes."
826                    .to_string(),
827            blocks: vec!["strict_check".to_string(), "proof_ready".to_string()],
828            caveats: vec![],
829        });
830    }
831    for proposal in frontier
832        .proposals
833        .iter()
834        .filter(|proposal| proposal.status == "pending_review")
835    {
836        signals.push(SignalItem {
837            id: signal_id("pending_proposal_review", &proposal.id),
838            kind: "pending_proposal_review".to_string(),
839            severity: "warning".to_string(),
840            target: SignalTarget {
841                r#type: proposal.target.r#type.clone(),
842                id: proposal.target.id.clone(),
843            },
844            reason: format!(
845                "Pending {} proposal requires review before frontier truth changes.",
846                proposal.kind
847            ),
848            recommended_action:
849                "Review the proposal and accept or reject it before strict proof use.".to_string(),
850            blocks: vec!["strict_check".to_string(), "proof_ready".to_string()],
851            caveats: vec!["Pending proposals are not active frontier state.".to_string()],
852        });
853    }
854    for proposal in frontier
855        .proposals
856        .iter()
857        .filter(|proposal| proposal.status == "applied")
858    {
859        signals.push(SignalItem {
860            id: signal_id("proposal_applied", &proposal.id),
861            kind: "proposal_applied".to_string(),
862            severity: "info".to_string(),
863            target: SignalTarget {
864                r#type: proposal.target.r#type.clone(),
865                id: proposal.target.id.clone(),
866            },
867            reason: format!("Applied proposal {} changed frontier state.", proposal.id),
868            recommended_action:
869                "Re-export proof artifacts if this proposal materially changes what reviewers should inspect."
870                    .to_string(),
871            blocks: vec![],
872            caveats: vec![],
873        });
874    }
875    for proposal in frontier.proposals.iter().filter(|proposal| {
876        matches!(proposal.status.as_str(), "accepted" | "applied")
877            && proposal
878                .reviewed_by
879                .as_deref()
880                .is_none_or(proposals::is_placeholder_reviewer)
881    }) {
882        signals.push(SignalItem {
883            id: signal_id("reviewer_identity_missing", &proposal.id),
884            kind: "reviewer_identity_missing".to_string(),
885            severity: "error".to_string(),
886            target: SignalTarget {
887                r#type: proposal.target.r#type.clone(),
888                id: proposal.target.id.clone(),
889            },
890            reason: format!(
891                "Accepted or applied proposal {} lacks a stable named reviewer identity.",
892                proposal.id
893            ),
894            recommended_action:
895                "Re-accept the proposal with a stable named reviewer id before strict proof use."
896                    .to_string(),
897            blocks: vec!["strict_check".to_string(), "proof_ready".to_string()],
898            caveats: vec![
899                "Placeholder reviewer ids do not satisfy the v0 trust boundary.".to_string(),
900            ],
901        });
902    }
903    if frontier.proof_state.latest_packet.status == "stale" {
904        signals.push(SignalItem {
905            id: signal_id("stale_proof_packet", &frontier.project.name),
906            kind: "stale_proof_packet".to_string(),
907            severity: "warning".to_string(),
908            target: SignalTarget {
909                r#type: "frontier".to_string(),
910                id: frontier.project.name.clone(),
911            },
912            reason: frontier
913                .proof_state
914                .stale_reason
915                .clone()
916                .unwrap_or_else(|| "Proof packet is stale relative to current frontier state.".to_string()),
917            recommended_action:
918                "Run `vela proof` again to export a packet that matches the current frontier snapshot."
919                    .to_string(),
920            blocks: vec!["strict_check".to_string(), "proof_ready".to_string()],
921            caveats: vec!["Packet validation can still pass for stale but internally consistent packets.".to_string()],
922        });
923    }
924
925    let review_queue = build_review_queue(frontier, &signals);
926    let proof_readiness = build_proof_readiness(&signals);
927
928    SignalReport {
929        schema: "vela.signals.v0".to_string(),
930        frontier: frontier.project.name.clone(),
931        signals,
932        review_queue,
933        proof_readiness,
934    }
935}
936
937pub fn quality_table(frontier: &Project, report: &SignalReport) -> Value {
938    let mut by_kind = BTreeMap::<String, usize>::new();
939    let mut by_severity = BTreeMap::<String, usize>::new();
940    let proposal_summary = proposals::summary(frontier);
941    for signal in &report.signals {
942        *by_kind.entry(signal.kind.clone()).or_default() += 1;
943        *by_severity.entry(signal.severity.clone()).or_default() += 1;
944    }
945
946    json!({
947        "schema": "vela.quality-table.v0",
948        "frontier": frontier.project.name,
949        "stats": frontier.stats,
950        "event_log": {
951            "events": frontier.events.len(),
952            "review_events_projection": frontier.review_events.len(),
953            "confidence_updates_projection": frontier.confidence_updates.len(),
954        },
955        "signals": {
956            "total": report.signals.len(),
957            "by_kind": by_kind,
958            "by_severity": by_severity,
959        },
960        "review_queue": {
961            "items": report.review_queue.len(),
962            "high_priority": report.review_queue.iter().filter(|item| item.priority == "high").count(),
963        },
964        "proposals": proposal_summary,
965        "proof_state": frontier.proof_state,
966        "proof_readiness": report.proof_readiness,
967        "caveats": [
968            "Signals are derived from frontier state and should be recomputed after edits.",
969            "Candidate gaps, bridges, and tensions require human review.",
970            "A clean quality table is not proof of scientific truth."
971        ],
972    })
973}
974
975pub fn ro_crate_metadata(frontier: &Project, files: &[String]) -> Value {
976    let graph_files: Vec<Value> = files
977        .iter()
978        .map(|path| {
979            json!({
980                "@id": path,
981                "@type": "File",
982                "name": path,
983            })
984        })
985        .collect();
986
987    let mut graph = vec![
988        json!({
989            "@id": "ro-crate-metadata.jsonld",
990            "@type": "CreativeWork",
991            "about": {"@id": "./"}
992        }),
993        json!({
994            "@id": "./",
995            "@type": "Dataset",
996            "name": format!("{} proof packet", frontier.project.name),
997            "description": frontier.project.description,
998            "dateCreated": frontier.project.compiled_at,
999            "conformsTo": {"@id": project::VELA_SCHEMA_URL},
1000            "hasPart": files.iter().map(|path| json!({"@id": path})).collect::<Vec<_>>()
1001        }),
1002    ];
1003    graph.extend(graph_files);
1004    graph.extend(frontier.artifacts.iter().map(|artifact| {
1005        json!({
1006            "@id": artifact.id,
1007            "@type": "CreativeWork",
1008            "name": artifact.name,
1009            "encodingFormat": artifact.media_type,
1010            "sha256": artifact.content_hash,
1011            "url": artifact.source_url.as_ref().or(artifact.locator.as_ref()),
1012            "license": artifact.license,
1013        })
1014    }));
1015
1016    json!({
1017        "@context": "https://w3id.org/ro/crate/1.2/context",
1018        "@graph": graph,
1019    })
1020}
1021
1022fn build_review_queue(frontier: &Project, signals: &[SignalItem]) -> Vec<ReviewQueueItem> {
1023    let link_counts = frontier
1024        .findings
1025        .iter()
1026        .map(|finding| {
1027            let outgoing = finding.links.len() as u32;
1028            let incoming = frontier
1029                .findings
1030                .iter()
1031                .flat_map(|other| &other.links)
1032                .filter(|link| link.target == finding.id)
1033                .count() as u32;
1034            (finding.id.clone(), outgoing + incoming)
1035        })
1036        .collect::<BTreeMap<_, _>>();
1037
1038    let mut by_target = BTreeMap::<String, Vec<&SignalItem>>::new();
1039    for signal in signals {
1040        if signal.target.r#type == "finding" {
1041            by_target
1042                .entry(signal.target.id.clone())
1043                .or_default()
1044                .push(signal);
1045        }
1046    }
1047
1048    let mut queue = by_target
1049        .into_iter()
1050        .map(|(target_id, grouped)| {
1051            let signal_score = grouped
1052                .iter()
1053                .map(|signal| signal_weight(signal))
1054                .sum::<u32>();
1055            let centrality_score = link_counts.get(&target_id).copied().unwrap_or(0).min(25);
1056            let priority_score = signal_score + centrality_score;
1057            let priority = if grouped
1058                .iter()
1059                .any(|signal| signal.blocks.iter().any(|block| block == "strict_check"))
1060            {
1061                "high"
1062            } else if grouped
1063                .iter()
1064                .any(|signal| signal.blocks.iter().any(|block| block == "proof_ready"))
1065            {
1066                "medium"
1067            } else {
1068                "low"
1069            };
1070            ReviewQueueItem {
1071                id: format!("rq_{}", target_id.trim_start_matches("vf_")),
1072                priority: priority.to_string(),
1073                priority_score,
1074                target: SignalTarget {
1075                    r#type: "finding".to_string(),
1076                    id: target_id,
1077                },
1078                signal_ids: grouped.iter().map(|signal| signal.id.clone()).collect(),
1079                reasons: grouped.iter().map(|signal| signal.reason.clone()).collect(),
1080                recommended_action: grouped
1081                    .first()
1082                    .map(|signal| signal.recommended_action.clone())
1083                    .unwrap_or_else(|| "Review finding state.".to_string()),
1084            }
1085        })
1086        .collect::<Vec<_>>();
1087    queue.sort_by(|a, b| {
1088        b.priority_score
1089            .cmp(&a.priority_score)
1090            .then_with(|| a.target.id.cmp(&b.target.id))
1091    });
1092    queue
1093}
1094
1095fn signal_weight(signal: &SignalItem) -> u32 {
1096    let severity = match signal.severity.as_str() {
1097        "error" => 100,
1098        "warning" => 30,
1099        _ => 10,
1100    };
1101    let kind = match signal.kind.as_str() {
1102        "check_error" => 100,
1103        "contested_high_confidence" => 70,
1104        "proposal_conflict" => 80,
1105        "pending_proposal_review" => 50,
1106        "weak_provenance" => 45,
1107        "missing_evidence_span" => 35,
1108        _ => 10,
1109    };
1110    let blocker = if signal.blocks.iter().any(|block| block == "strict_check") {
1111        30
1112    } else if signal.blocks.iter().any(|block| block == "proof_ready") {
1113        15
1114    } else {
1115        0
1116    };
1117    severity + kind + blocker
1118}
1119
1120fn build_proof_readiness(signals: &[SignalItem]) -> ProofReadiness {
1121    let blockers = signals
1122        .iter()
1123        .filter(|signal| signal.blocks.iter().any(|block| block == "proof_ready"))
1124        .count();
1125    let warnings = signals
1126        .iter()
1127        .filter(|signal| signal.severity == "warning")
1128        .count();
1129    ProofReadiness {
1130        status: if blockers == 0 {
1131            "ready".to_string()
1132        } else {
1133            "needs_review".to_string()
1134        },
1135        blockers,
1136        warnings,
1137        caveats: vec![
1138            "Proof readiness means packet state is reviewable, not scientifically settled."
1139                .to_string(),
1140        ],
1141    }
1142}
1143
1144fn truncate(s: &str, n: usize) -> String {
1145    if s.chars().count() <= n {
1146        s.to_string()
1147    } else {
1148        let head: String = s.chars().take(n).collect();
1149        format!("{head}…")
1150    }
1151}
1152
1153fn signal_id(kind: &str, finding_id: &str) -> String {
1154    format!("sig_{kind}_{}", finding_id.trim_start_matches("vf_"))
1155}
1156
1157fn contains_condition_sensitive_claim(text: &str) -> bool {
1158    let lower = text.to_ascii_lowercase();
1159    [
1160        "delivery",
1161        "efficacy",
1162        "therapeutic",
1163        "clinical",
1164        "human",
1165        "mouse",
1166        "mice",
1167        "assay",
1168        "endpoint",
1169        "payload",
1170        "exposure",
1171        "translation",
1172    ]
1173    .iter()
1174    .any(|term| lower.contains(term))
1175}
1176
1177#[cfg(test)]
1178mod tests {
1179    use crate::bundle::{
1180        Assertion, Conditions, Confidence, Evidence, FindingBundle, Flags, Provenance,
1181    };
1182
1183    use super::*;
1184
1185    fn minimal_finding(id: &str) -> FindingBundle {
1186        let assertion = Assertion {
1187            text: "LRP1 transport is altered in Alzheimer models.".to_string(),
1188            assertion_type: "mechanism".to_string(),
1189            entities: vec![],
1190            relation: None,
1191            direction: None,
1192            causal_claim: None,
1193            causal_evidence_grade: None,
1194        };
1195        let provenance = Provenance {
1196            source_type: "published_paper".to_string(),
1197            doi: None,
1198            pmid: None,
1199            pmc: None,
1200            openalex_id: None,
1201            url: None,
1202            title: String::new(),
1203            authors: vec![],
1204            year: Some(2020),
1205            journal: None,
1206            license: None,
1207            publisher: None,
1208            funders: vec![],
1209            extraction: Default::default(),
1210            review: None,
1211            citation_count: None,
1212        };
1213        FindingBundle {
1214            id: id.to_string(),
1215            version: 1,
1216            previous_version: None,
1217            assertion,
1218            evidence: Evidence {
1219                evidence_type: "experimental".to_string(),
1220                model_system: "mouse".to_string(),
1221                species: Some("Mus musculus".to_string()),
1222                method: "test".to_string(),
1223                sample_size: None,
1224                effect_size: None,
1225                p_value: None,
1226                replicated: false,
1227                replication_count: None,
1228                evidence_spans: vec![],
1229            },
1230            conditions: Conditions {
1231                text: String::new(),
1232                species_verified: vec![],
1233                species_unverified: vec![],
1234                in_vitro: false,
1235                in_vivo: true,
1236                human_data: false,
1237                clinical_trial: false,
1238                concentration_range: None,
1239                duration: None,
1240                age_group: None,
1241                cell_type: None,
1242            },
1243            confidence: Confidence::raw(0.9, "test".to_string(), 0.9),
1244            provenance,
1245            flags: Flags {
1246                gap: false,
1247                negative_space: false,
1248                contested: true,
1249                retracted: false,
1250                declining: false,
1251                gravity_well: false,
1252                review_state: None,
1253                superseded: false,
1254                signature_threshold: None,
1255                jointly_accepted: false,
1256            },
1257            links: vec![],
1258            annotations: vec![],
1259            attachments: vec![],
1260            created: "2026-01-01T00:00:00Z".to_string(),
1261            updated: None,
1262
1263            access_tier: crate::access_tier::AccessTier::Public,
1264        }
1265    }
1266
1267    #[test]
1268    fn weak_and_contested_findings_emit_review_signals() {
1269        let frontier = project::assemble("test", vec![minimal_finding("vf_abc")], 1, 0, "test");
1270        let report = analyze(&frontier, &[]);
1271        assert!(report.signals.iter().any(|s| s.kind == "weak_provenance"));
1272        assert!(
1273            report
1274                .signals
1275                .iter()
1276                .any(|s| s.kind == "missing_evidence_span")
1277        );
1278        assert!(
1279            report
1280                .signals
1281                .iter()
1282                .any(|s| s.kind == "contested_high_confidence")
1283        );
1284        assert_eq!(report.review_queue.len(), 1);
1285    }
1286}