vela_protocol/
signals.rs

1//! Derived frontier signals.
2//!
3//! Signals are read-only projections over frontier state. They are not a second
4//! source of truth and are intentionally safe to recompute from the frontier,
5//! diagnostics, proof traces, or benchmark output.
6
7#![allow(clippy::module_name_repetitions)]
8
9use std::collections::{BTreeMap, BTreeSet};
10
11use serde::{Deserialize, Serialize};
12use serde_json::{Value, json};
13
14use crate::project::{self, Project};
15use crate::proposals;
16use crate::sources;
17
18#[derive(Debug, Clone, Serialize, Deserialize, PartialEq, Eq)]
19pub struct SignalTarget {
20    pub r#type: String,
21    pub id: String,
22}
23
24#[derive(Debug, Clone, Serialize, Deserialize, PartialEq, Eq)]
25pub struct SignalItem {
26    pub id: String,
27    pub kind: String,
28    pub severity: String,
29    pub target: SignalTarget,
30    pub reason: String,
31    pub recommended_action: String,
32    pub blocks: Vec<String>,
33    pub caveats: Vec<String>,
34}
35
36#[derive(Debug, Clone, Serialize, Deserialize, PartialEq, Eq)]
37pub struct ReviewQueueItem {
38    pub id: String,
39    pub priority: String,
40    pub priority_score: u32,
41    pub target: SignalTarget,
42    pub signal_ids: Vec<String>,
43    pub reasons: Vec<String>,
44    pub recommended_action: String,
45}
46
47#[derive(Debug, Clone, Serialize, Deserialize, PartialEq, Eq)]
48pub struct ProofReadiness {
49    pub status: String,
50    pub blockers: usize,
51    pub warnings: usize,
52    pub caveats: Vec<String>,
53}
54
55#[derive(Debug, Clone, Serialize, Deserialize)]
56pub struct SignalReport {
57    pub schema: String,
58    pub frontier: String,
59    pub signals: Vec<SignalItem>,
60    pub review_queue: Vec<ReviewQueueItem>,
61    pub proof_readiness: ProofReadiness,
62}
63
64pub fn analyze(frontier: &Project, diagnostics: &[Value]) -> SignalReport {
65    let mut signals = Vec::new();
66
67    for diagnostic in diagnostics {
68        let severity = diagnostic
69            .get("severity")
70            .and_then(Value::as_str)
71            .unwrap_or("info");
72        let rule_id = diagnostic
73            .get("rule_id")
74            .and_then(Value::as_str)
75            .unwrap_or("check_error");
76        if severity == "error"
77            || matches!(
78                rule_id,
79                "missing_source_record"
80                    | "missing_evidence_atom"
81                    | "missing_evidence_locator"
82                    | "condition_record_missing"
83            )
84        {
85            let id = format!("sig_diagnostic_{}", signals.len() + 1);
86            signals.push(SignalItem {
87                id,
88                kind: match rule_id {
89                    "event_replay" => "event_replay_conflict",
90                    "missing_source_record" => "missing_source_record",
91                    "missing_evidence_atom" => "missing_evidence_atom",
92                    "missing_evidence_locator" => "missing_evidence_locator",
93                    "condition_record_missing" => "condition_record_missing",
94                    "reviewer_identity_missing" => "reviewer_identity_missing",
95                    _ => "check_error",
96                }
97                .to_string(),
98                severity: severity.to_string(),
99                target: SignalTarget {
100                    r#type: diagnostic
101                        .get("finding_id")
102                        .and_then(Value::as_str)
103                        .map_or("frontier", |_| "finding")
104                        .to_string(),
105                    id: diagnostic
106                        .get("finding_id")
107                        .and_then(Value::as_str)
108                        .unwrap_or(&frontier.project.name)
109                        .to_string(),
110                },
111                reason: diagnostic
112                    .get("message")
113                    .and_then(Value::as_str)
114                    .unwrap_or("Frontier validation error.")
115                    .to_string(),
116                recommended_action: diagnostic
117                    .get("suggestion")
118                    .and_then(Value::as_str)
119                    .unwrap_or("Inspect and correct the referenced frontier field.")
120                    .to_string(),
121                blocks: if rule_id == "missing_evidence_locator" {
122                    vec!["proof_ready".to_string()]
123                } else {
124                    vec!["strict_check".to_string(), "proof_ready".to_string()]
125                },
126                caveats: vec![],
127            });
128        }
129    }
130
131    let projection = sources::derive_projection(frontier);
132    let source_by_id = projection
133        .sources
134        .iter()
135        .map(|source| (source.id.as_str(), source))
136        .collect::<BTreeMap<_, _>>();
137    let reviewed_finding_ids = frontier
138        .events
139        .iter()
140        .filter(|event| {
141            event.target.r#type == "finding"
142                && event.actor.id.starts_with("reviewer:")
143                && matches!(
144                    event.kind.as_str(),
145                    "finding.asserted" | "finding.reviewed" | "finding.caveated"
146                )
147        })
148        .map(|event| event.target.id.as_str())
149        .collect::<BTreeSet<_>>();
150
151    for source in &projection.sources {
152        if source.content_hash.is_none()
153            && matches!(
154                source.source_type.as_str(),
155                "pdf"
156                    | "jats"
157                    | "csv"
158                    | "text"
159                    | "note"
160                    | "agent_trace"
161                    | "benchmark_output"
162                    | "notebook_entry"
163                    | "experiment_log"
164                    | "synthetic_report"
165            )
166        {
167            signals.push(SignalItem {
168                id: signal_id("source_hash_missing", &source.id),
169                kind: "source_hash_missing".to_string(),
170                severity: "info".to_string(),
171                target: SignalTarget {
172                    r#type: "source".to_string(),
173                    id: source.id.clone(),
174                },
175                reason: "Source record has no content hash for a local or generated artifact."
176                    .to_string(),
177                recommended_action:
178                    "Recompile from the local corpus or add a source content hash before relying on this source."
179                        .to_string(),
180                blocks: vec![],
181                caveats: vec!["Source identity and scientific confidence are separate.".to_string()],
182            });
183        }
184
185        if source.source_type == "agent_trace" {
186            signals.push(SignalItem {
187                id: signal_id("agent_trace_unverified", &source.id),
188                kind: "agent_trace_unverified".to_string(),
189                severity: "warning".to_string(),
190                target: SignalTarget {
191                    r#type: "source".to_string(),
192                    id: source.id.clone(),
193                },
194                reason: "Agent trace source requires review before it can support active frontier state."
195                    .to_string(),
196                recommended_action:
197                    "Verify the trace against primary evidence and add review before proof use."
198                        .to_string(),
199                blocks: vec!["strict_check".to_string(), "proof_ready".to_string()],
200                caveats: vec!["Agent traces are source artifacts, not scientific truth.".to_string()],
201            });
202        }
203
204        if source.source_type == "synthetic_report"
205            && !source
206                .finding_ids
207                .iter()
208                .any(|finding_id| reviewed_finding_ids.contains(finding_id.as_str()))
209        {
210            signals.push(SignalItem {
211                id: signal_id("synthetic_source_requires_review", &source.id),
212                kind: "synthetic_source_requires_review".to_string(),
213                severity: "warning".to_string(),
214                target: SignalTarget {
215                    r#type: "source".to_string(),
216                    id: source.id.clone(),
217                },
218                reason: "Synthetic report source requires human review and primary-source grounding."
219                    .to_string(),
220                recommended_action:
221                    "Use synthetic reports as review leads unless evidence atoms trace back to primary sources."
222                        .to_string(),
223                blocks: vec!["strict_check".to_string(), "proof_ready".to_string()],
224                caveats: vec!["Synthetic sources should not silently become evidence.".to_string()],
225            });
226        }
227    }
228
229    for atom in &projection.evidence_atoms {
230        if atom.locator.is_none() {
231            signals.push(SignalItem {
232                id: signal_id("missing_evidence_locator", &atom.id),
233                kind: "missing_evidence_locator".to_string(),
234                severity: "warning".to_string(),
235                target: SignalTarget {
236                    r#type: "finding".to_string(),
237                    id: atom.finding_id.clone(),
238                },
239                reason:
240                    "Evidence atom lacks a span, table row, page, section, run, or metric locator."
241                        .to_string(),
242                recommended_action:
243                    "Verify the exact source location or keep this as a weak review lead."
244                        .to_string(),
245                blocks: vec!["proof_ready".to_string()],
246                caveats: vec![
247                    "A source citation is weaker than a located evidence atom.".to_string(),
248                ],
249            });
250        }
251
252        if !atom.human_verified
253            && source_by_id
254                .get(atom.source_id.as_str())
255                .is_some_and(|source| sources::is_synthetic_source(source))
256            && !reviewed_finding_ids.contains(atom.finding_id.as_str())
257        {
258            signals.push(SignalItem {
259                id: signal_id("synthetic_source_requires_review", &atom.id),
260                kind: "synthetic_source_requires_review".to_string(),
261                severity: "warning".to_string(),
262                target: SignalTarget {
263                    r#type: "finding".to_string(),
264                    id: atom.finding_id.clone(),
265                },
266                reason: "Evidence atom is linked to an unverified synthetic or agent source."
267                    .to_string(),
268                recommended_action:
269                    "Attach primary evidence or review the atom before proof export.".to_string(),
270                blocks: vec!["strict_check".to_string(), "proof_ready".to_string()],
271                caveats: vec![
272                    "Generated traces can guide review but are not trusted evidence.".to_string(),
273                ],
274            });
275        }
276    }
277
278    for record in &projection.condition_records {
279        if record.text.trim().is_empty() {
280            signals.push(SignalItem {
281                id: signal_id("missing_conditions", &record.id),
282                kind: "missing_conditions".to_string(),
283                severity: "warning".to_string(),
284                target: SignalTarget {
285                    r#type: "finding".to_string(),
286                    id: record.finding_id.clone(),
287                },
288                reason: "Finding has no declared condition boundary.".to_string(),
289                recommended_action:
290                    "Add the species, model system, assay, comparator, endpoint, or scope that bounds the finding."
291                        .to_string(),
292                blocks: vec!["strict_check".to_string(), "proof_ready".to_string()],
293                caveats: vec!["A finding without conditions is incomplete frontier state.".to_string()],
294            });
295        }
296
297        if record.comparator_status == "missing_or_unclear"
298            && (record.exposure_or_efficacy == "efficacy" || record.exposure_or_efficacy == "both")
299        {
300            signals.push(SignalItem {
301                id: signal_id("missing_comparator", &record.id),
302                kind: "missing_comparator".to_string(),
303                severity: "info".to_string(),
304                target: SignalTarget {
305                    r#type: "finding".to_string(),
306                    id: record.finding_id.clone(),
307                },
308                reason: "Condition record does not declare a comparator or baseline.".to_string(),
309                recommended_action:
310                    "Review whether the evidence supports the asserted direction without a declared comparator."
311                        .to_string(),
312                blocks: vec![],
313                caveats: vec![
314                    "Comparator absence is a review signal, not automatic disproof.".to_string(),
315                ],
316            });
317        }
318
319        if record.exposure_or_efficacy == "both" {
320            signals.push(SignalItem {
321                id: signal_id("exposure_efficacy_overgeneralization", &record.id),
322                kind: "condition_loss_risk".to_string(),
323                severity: "info".to_string(),
324                target: SignalTarget {
325                    r#type: "finding".to_string(),
326                    id: record.finding_id.clone(),
327                },
328                reason: "Exposure and efficacy language appear in the same condition boundary."
329                    .to_string(),
330                recommended_action:
331                    "Keep exposure, functional delivery, and therapeutic efficacy separate unless the source directly supports the broader claim."
332                        .to_string(),
333                blocks: vec![],
334                caveats: vec![
335                    "Vela flags possible overgeneralization; reviewers decide the final scope."
336                        .to_string(),
337                ],
338            });
339        }
340
341        if record.translation_scope == "animal_model"
342            && record
343                .caveats
344                .iter()
345                .any(|caveat| caveat.contains("human translation"))
346        {
347            signals.push(SignalItem {
348                id: signal_id("mouse_human_translation_risk", &record.id),
349                kind: "condition_loss_risk".to_string(),
350                severity: "info".to_string(),
351                target: SignalTarget {
352                    r#type: "finding".to_string(),
353                    id: record.finding_id.clone(),
354                },
355                reason: "Animal-model evidence is adjacent to human translation language."
356                    .to_string(),
357                recommended_action:
358                    "Preserve the animal-model scope unless human data are explicitly attached."
359                        .to_string(),
360                blocks: vec![],
361                caveats: vec![
362                    "Mouse or animal evidence should not silently become a human claim."
363                        .to_string(),
364                ],
365            });
366        }
367    }
368
369    // Build a set of finding IDs that have at least one evidence atom
370    // attached. Used by the source-grounding doctrine invariant below.
371    let evidence_grounded: BTreeSet<&str> = projection
372        .evidence_atoms
373        .iter()
374        .map(|atom| atom.finding_id.as_str())
375        .collect();
376
377    for finding in &frontier.findings {
378        if finding.provenance.doi.is_none()
379            && finding.provenance.pmid.is_none()
380            && finding.provenance.title.trim().is_empty()
381        {
382            signals.push(SignalItem {
383                id: signal_id("weak_provenance", &finding.id),
384                kind: "weak_provenance".to_string(),
385                severity: "warning".to_string(),
386                target: SignalTarget {
387                    r#type: "finding".to_string(),
388                    id: finding.id.clone(),
389                },
390                reason: "Finding lacks DOI, PMID, and source title fallback.".to_string(),
391                recommended_action:
392                    "Add source metadata or mark the finding as unresolved before proof export."
393                        .to_string(),
394                blocks: vec!["strict_check".to_string(), "proof_ready".to_string()],
395                caveats: vec!["Provenance is separate from confidence.".to_string()],
396            });
397        }
398
399        // Doctrine line 3: a finding without conditions is incomplete.
400        // Strict check blocker when both conditions.text is empty AND no
401        // scope flag is set, AND the finding is not theoretical (theoretical
402        // findings can be scope-free by nature).
403        let scope_declared = finding.conditions.in_vivo
404            || finding.conditions.in_vitro
405            || finding.conditions.human_data
406            || finding.conditions.clinical_trial;
407        if finding.conditions.text.trim().is_empty()
408            && !scope_declared
409            && finding.assertion.assertion_type != "theoretical"
410            && !finding.flags.retracted
411        {
412            signals.push(SignalItem {
413                id: signal_id("conditions_undeclared", &finding.id),
414                kind: "conditions_undeclared".to_string(),
415                severity: "error".to_string(),
416                target: SignalTarget {
417                    r#type: "finding".to_string(),
418                    id: finding.id.clone(),
419                },
420                reason:
421                    "Finding has no condition text and no scope flag (in_vivo/in_vitro/human_data/clinical_trial)."
422                        .to_string(),
423                recommended_action:
424                    "Declare at least one scope flag and condition text, or mark the finding theoretical."
425                        .to_string(),
426                blocks: vec!["strict_check".to_string(), "proof_ready".to_string()],
427                caveats: vec![
428                    "A finding without conditions is doctrinally incomplete state."
429                        .to_string(),
430                ],
431            });
432        }
433
434        // Doctrine line 4: a result without provenance is not evidence.
435        // Strict-check blocker when an active finding has no evidence atom.
436        if !finding.flags.retracted && !evidence_grounded.contains(finding.id.as_str()) {
437            signals.push(SignalItem {
438                id: signal_id("evidence_atom_missing", &finding.id),
439                kind: "evidence_atom_missing".to_string(),
440                severity: "error".to_string(),
441                target: SignalTarget {
442                    r#type: "finding".to_string(),
443                    id: finding.id.clone(),
444                },
445                reason:
446                    "Active finding has no materialized evidence atom in the source-evidence map."
447                        .to_string(),
448                recommended_action:
449                    "Run `vela normalize` to materialize evidence atoms, or attach explicit evidence spans."
450                        .to_string(),
451                blocks: vec!["strict_check".to_string(), "proof_ready".to_string()],
452                caveats: vec![
453                    "A citation alone is not evidence in the v0.3 substrate.".to_string(),
454                ],
455            });
456        }
457
458        // Doctrine line 5: an agent trace is not truth.
459        // Strict-check blocker when source_type implies the claim came from
460        // a non-peer-reviewed source (model_output, expert_assertion,
461        // agent_trace) AND the finding has not been reviewed.
462        let agent_typed = matches!(
463            finding.provenance.source_type.as_str(),
464            "model_output" | "expert_assertion" | "agent_trace"
465        );
466        let has_review = finding
467            .provenance
468            .review
469            .as_ref()
470            .is_some_and(|r| r.reviewed)
471            || finding.flags.review_state.is_some()
472            || reviewed_finding_ids.contains(finding.id.as_str());
473        if agent_typed && !has_review && !finding.flags.gap && !finding.flags.retracted {
474            signals.push(SignalItem {
475                id: signal_id("agent_typed_unreviewed", &finding.id),
476                kind: "agent_typed_unreviewed".to_string(),
477                severity: "warning".to_string(),
478                target: SignalTarget {
479                    r#type: "finding".to_string(),
480                    id: finding.id.clone(),
481                },
482                reason: format!(
483                    "Source type '{}' requires explicit review before strict acceptance.",
484                    finding.provenance.source_type
485                ),
486                recommended_action:
487                    "Run `vela review --apply` against this finding or flag it as gap before strict use."
488                        .to_string(),
489                blocks: vec!["strict_check".to_string(), "proof_ready".to_string()],
490                caveats: vec![
491                    "Agent traces, expert assertions, and model outputs are sources, not truth."
492                        .to_string(),
493                ],
494            });
495        }
496
497        if finding.evidence.evidence_spans.is_empty() {
498            signals.push(SignalItem {
499                id: signal_id("missing_evidence_span", &finding.id),
500                kind: "missing_evidence_span".to_string(),
501                severity: "warning".to_string(),
502                target: SignalTarget {
503                    r#type: "finding".to_string(),
504                    id: finding.id.clone(),
505                },
506                reason: "Finding has no verified evidence span attached.".to_string(),
507                recommended_action:
508                    "Verify the assertion against source text and add evidence spans where possible."
509                        .to_string(),
510                blocks: vec!["proof_ready".to_string()],
511                caveats: vec!["Missing spans do not imply the assertion is false.".to_string()],
512            });
513        }
514
515        if finding.conditions.text.trim().is_empty() {
516            signals.push(SignalItem {
517                id: signal_id("missing_conditions", &finding.id),
518                kind: "missing_conditions".to_string(),
519                severity: "warning".to_string(),
520                target: SignalTarget {
521                    r#type: "finding".to_string(),
522                    id: finding.id.clone(),
523                },
524                reason: "Finding has no explicit condition boundary.".to_string(),
525                recommended_action:
526                    "Add species, model system, assay, regimen, population, or scope conditions."
527                        .to_string(),
528                blocks: vec!["proof_ready".to_string()],
529                caveats: vec![
530                    "Condition loss is a common source of overgeneralized scientific claims."
531                        .to_string(),
532                ],
533            });
534        }
535
536        if finding.conditions.text.trim().is_empty()
537            && contains_condition_sensitive_claim(&finding.assertion.text)
538        {
539            signals.push(SignalItem {
540                id: signal_id("condition_loss_risk", &finding.id),
541                kind: "condition_loss_risk".to_string(),
542                severity: "warning".to_string(),
543                target: SignalTarget {
544                    r#type: "finding".to_string(),
545                    id: finding.id.clone(),
546                },
547                reason: "Finding uses condition-sensitive language without explicit condition boundaries."
548                    .to_string(),
549                recommended_action:
550                    "Separate exposure, efficacy, species, assay, payload, endpoint, and translation scope."
551                        .to_string(),
552                blocks: vec!["strict_check".to_string(), "proof_ready".to_string()],
553                caveats: vec![
554                    "Vela should flag overgeneralization rather than smooth it into a summary."
555                        .to_string(),
556                ],
557            });
558        }
559
560        if finding
561            .assertion
562            .entities
563            .iter()
564            .any(|entity| entity.needs_review)
565        {
566            signals.push(SignalItem {
567                id: signal_id("needs_human_review", &finding.id),
568                kind: "needs_human_review".to_string(),
569                severity: "warning".to_string(),
570                target: SignalTarget {
571                    r#type: "finding".to_string(),
572                    id: finding.id.clone(),
573                },
574                reason: "Finding contains unresolved or low-confidence entity resolution."
575                    .to_string(),
576                recommended_action:
577                    "Review entity names, types, identifiers, and source grounding before proof use."
578                        .to_string(),
579                blocks: vec!["strict_check".to_string(), "proof_ready".to_string()],
580                caveats: vec!["Entity review status is separate from assertion confidence.".to_string()],
581            });
582        }
583
584        if finding.provenance.extraction.method.contains("fallback")
585            || finding.provenance.extraction.method.contains("rough")
586            || finding.provenance.extraction.method.contains("abstract")
587        {
588            signals.push(SignalItem {
589                id: signal_id("rough_source_extraction", &finding.id),
590                kind: "rough_source_extraction".to_string(),
591                severity: "warning".to_string(),
592                target: SignalTarget {
593                    r#type: "finding".to_string(),
594                    id: finding.id.clone(),
595                },
596                reason: format!(
597                    "Finding was produced by extraction mode '{}'.",
598                    finding.provenance.extraction.method
599                ),
600                recommended_action:
601                    "Inspect the source text and mark caveats or review status before treating this as durable state."
602                        .to_string(),
603                blocks: vec!["proof_ready".to_string()],
604                caveats: vec![
605                    "Rough extraction can be useful as a review lead, not as a scientific conclusion."
606                        .to_string(),
607                ],
608            });
609        }
610
611        if matches!(
612            finding.provenance.source_type.as_str(),
613            "model_output" | "summary" | "synthesis"
614        ) && !reviewed_finding_ids.contains(finding.id.as_str())
615        {
616            signals.push(SignalItem {
617                id: signal_id("synthesis_used_as_source", &finding.id),
618                kind: "synthesis_used_as_source".to_string(),
619                severity: "warning".to_string(),
620                target: SignalTarget {
621                    r#type: "finding".to_string(),
622                    id: finding.id.clone(),
623                },
624                reason: "Finding provenance indicates synthesized text or model output as source."
625                    .to_string(),
626                recommended_action:
627                    "Trace this finding back to primary source evidence or mark it as a review lead."
628                        .to_string(),
629                blocks: vec!["strict_check".to_string(), "proof_ready".to_string()],
630                caveats: vec![
631                    "Derived synthesis should not silently become primary evidence.".to_string(),
632                ],
633            });
634        }
635
636        if finding.flags.contested && finding.confidence.score >= 0.8 {
637            signals.push(SignalItem {
638                id: signal_id("contested_high_confidence", &finding.id),
639                kind: "contested_high_confidence".to_string(),
640                severity: "warning".to_string(),
641                target: SignalTarget {
642                    r#type: "finding".to_string(),
643                    id: finding.id.clone(),
644                },
645                reason: "Finding is contested while carrying high confidence.".to_string(),
646                recommended_action:
647                    "Review contradiction links, provenance, and confidence components."
648                        .to_string(),
649                blocks: vec!["proof_ready".to_string()],
650                caveats: vec![
651                    "Candidate tensions are review surfaces, not definitive contradictions."
652                        .to_string(),
653                ],
654            });
655        }
656    }
657
658    // Phase N (v0.4): provenance authority. `Project.sources` is
659    // canonical; `FindingBundle.provenance` is a denormalized cache.
660    // Drift between the two is a strict-mode failure — the source
661    // record wins, and the finding must be rewritten via
662    // `vela normalize --resync-provenance`.
663    let mut by_doi: BTreeMap<String, &crate::sources::SourceRecord> = BTreeMap::new();
664    let mut by_pmid: BTreeMap<String, &crate::sources::SourceRecord> = BTreeMap::new();
665    let mut duplicate_dois: BTreeSet<String> = BTreeSet::new();
666    let mut duplicate_pmids: BTreeSet<String> = BTreeSet::new();
667    for source in &frontier.sources {
668        if let Some(doi) = source.doi.as_deref() {
669            let key = doi.to_lowercase();
670            if by_doi.insert(key.clone(), source).is_some() {
671                duplicate_dois.insert(key);
672            }
673        }
674        if let Some(pmid) = source.pmid.as_deref() {
675            let key = pmid.to_string();
676            if by_pmid.insert(key.clone(), source).is_some() {
677                duplicate_pmids.insert(key);
678            }
679        }
680    }
681    for key in &duplicate_dois {
682        by_doi.remove(key);
683    }
684    for key in &duplicate_pmids {
685        by_pmid.remove(key);
686    }
687    for finding in &frontier.findings {
688        if finding.flags.retracted {
689            continue;
690        }
691        let source = finding
692            .provenance
693            .doi
694            .as_deref()
695            .map(str::to_lowercase)
696            .and_then(|k| by_doi.get(&k).copied())
697            .or_else(|| {
698                finding
699                    .provenance
700                    .pmid
701                    .as_deref()
702                    .and_then(|k| by_pmid.get(k).copied())
703            });
704        let Some(source) = source else { continue };
705
706        let mut diffs: Vec<String> = Vec::new();
707        if !source.title.is_empty() && source.title != finding.provenance.title {
708            diffs.push(format!(
709                "title differs (source='{}', cached='{}')",
710                truncate(&source.title, 60),
711                truncate(&finding.provenance.title, 60)
712            ));
713        }
714        if source.year.is_some() && source.year != finding.provenance.year {
715            diffs.push(format!(
716                "year differs (source={:?}, cached={:?})",
717                source.year, finding.provenance.year
718            ));
719        }
720        if !diffs.is_empty() {
721            signals.push(SignalItem {
722                id: signal_id("provenance_drift", &finding.id),
723                kind: "provenance_drift".to_string(),
724                severity: "error".to_string(),
725                target: SignalTarget {
726                    r#type: "finding".to_string(),
727                    id: finding.id.clone(),
728                },
729                reason: format!(
730                    "Cached finding.provenance disagrees with canonical source: {}",
731                    diffs.join("; ")
732                ),
733                recommended_action:
734                    "Run `vela normalize --resync-provenance --write` to regenerate finding.provenance from the canonical SourceRecord."
735                        .to_string(),
736                blocks: vec!["strict_check".to_string(), "proof_ready".to_string()],
737                caveats: vec![
738                    "sources is the authority; provenance is the cache."
739                        .to_string(),
740                ],
741            });
742        }
743    }
744
745    // Phase M (v0.4): registered actors must sign their canonical
746    // events. Once an actor.id appears in `frontier.actors`, every
747    // canonical event referencing that actor.id MUST carry a signature
748    // that verifies against the registered public key. Unregistered
749    // actor.ids fall back to the legacy placeholder-rejection rule.
750    if !frontier.actors.is_empty() {
751        let registry: BTreeMap<&str, &crate::sign::ActorRecord> = frontier
752            .actors
753            .iter()
754            .map(|actor| (actor.id.as_str(), actor))
755            .collect();
756        for event in &frontier.events {
757            if event.actor.r#type != "human" {
758                continue;
759            }
760            let Some(actor_record) = registry.get(event.actor.id.as_str()) else {
761                continue;
762            };
763            // v0.127: A7 mitigation. If the actor's key is revoked at
764            // or before this event's timestamp, the signature is
765            // rejected regardless of whether it would otherwise
766            // verify. Historical signatures (events with timestamp
767            // strictly before revoked_at) remain valid: the
768            // substrate does not retroactively invalidate canonical
769            // history.
770            if actor_record.is_revoked_at(event.timestamp.as_str()) {
771                signals.push(SignalItem {
772                    id: signal_id("post_revocation_signature", &event.id),
773                    kind: "post_revocation_signature".to_string(),
774                    severity: "error".to_string(),
775                    target: SignalTarget {
776                        r#type: "event".to_string(),
777                        id: event.id.clone(),
778                    },
779                    reason: format!(
780                        "Event {} carries a signature from actor '{}' whose key was revoked at {} (event timestamp {}).",
781                        event.id,
782                        event.actor.id,
783                        actor_record.revoked_at.as_deref().unwrap_or("?"),
784                        event.timestamp
785                    ),
786                    recommended_action:
787                        "Reject this event. The signing key was revoked at-or-before the event timestamp; verify the rotation chain and re-sign under the current actor key."
788                            .to_string(),
789                    blocks: vec!["strict_check".to_string()],
790                    caveats: Vec::new(),
791                });
792                continue;
793            }
794            let pubkey = actor_record.public_key.as_str();
795            let invalid = match event.signature.as_deref() {
796                None => Some("missing".to_string()),
797                Some(_) => match crate::sign::verify_event_signature(event, pubkey) {
798                    Ok(true) => None,
799                    Ok(false) => Some("does not verify".to_string()),
800                    Err(err) => Some(err),
801                },
802            };
803            if let Some(reason) = invalid {
804                signals.push(SignalItem {
805                    id: signal_id("unsigned_registered_actor", &event.id),
806                    kind: "unsigned_registered_actor".to_string(),
807                    severity: "error".to_string(),
808                    target: SignalTarget {
809                        r#type: "event".to_string(),
810                        id: event.id.clone(),
811                    },
812                    reason: format!(
813                        "Event {} from registered actor '{}' has invalid signature: {reason}.",
814                        event.id, event.actor.id
815                    ),
816                    recommended_action:
817                        "Sign the event with the registered Ed25519 key before strict acceptance."
818                            .to_string(),
819                    blocks: vec!["strict_check".to_string(), "proof_ready".to_string()],
820                    caveats: vec![
821                        "Registered actors are bound to their public key; unsigned writes break that binding."
822                            .to_string(),
823                    ],
824                });
825            }
826        }
827    }
828
829    let proposal_summary = proposals::summary(frontier);
830    for duplicate in &proposal_summary.duplicate_ids {
831        signals.push(SignalItem {
832            id: signal_id("proposal_conflict", duplicate),
833            kind: "proposal_conflict".to_string(),
834            severity: "error".to_string(),
835            target: SignalTarget {
836                r#type: "frontier".to_string(),
837                id: frontier.project.name.clone(),
838            },
839            reason: format!("Duplicate proposal id detected: {duplicate}."),
840            recommended_action: "Remove or rename the duplicate proposal before applying writes."
841                .to_string(),
842            blocks: vec!["strict_check".to_string(), "proof_ready".to_string()],
843            caveats: vec![],
844        });
845    }
846    for target in &proposal_summary.invalid_targets {
847        signals.push(SignalItem {
848            id: signal_id("proposal_conflict", target),
849            kind: "proposal_conflict".to_string(),
850            severity: "error".to_string(),
851            target: SignalTarget {
852                r#type: "finding".to_string(),
853                id: target.clone(),
854            },
855            reason: format!("Proposal target does not exist in frontier state: {target}."),
856            recommended_action:
857                "Fix the proposal target or remove the orphan proposal before applying writes."
858                    .to_string(),
859            blocks: vec!["strict_check".to_string(), "proof_ready".to_string()],
860            caveats: vec![],
861        });
862    }
863    for proposal in frontier
864        .proposals
865        .iter()
866        .filter(|proposal| proposal.status == "pending_review")
867    {
868        signals.push(SignalItem {
869            id: signal_id("pending_proposal_review", &proposal.id),
870            kind: "pending_proposal_review".to_string(),
871            severity: "warning".to_string(),
872            target: SignalTarget {
873                r#type: proposal.target.r#type.clone(),
874                id: proposal.target.id.clone(),
875            },
876            reason: format!(
877                "Pending {} proposal requires review before frontier truth changes.",
878                proposal.kind
879            ),
880            recommended_action:
881                "Review the proposal and accept or reject it before strict proof use.".to_string(),
882            blocks: vec!["strict_check".to_string(), "proof_ready".to_string()],
883            caveats: vec!["Pending proposals are not active frontier state.".to_string()],
884        });
885    }
886    for proposal in frontier
887        .proposals
888        .iter()
889        .filter(|proposal| proposal.status == "applied")
890    {
891        signals.push(SignalItem {
892            id: signal_id("proposal_applied", &proposal.id),
893            kind: "proposal_applied".to_string(),
894            severity: "info".to_string(),
895            target: SignalTarget {
896                r#type: proposal.target.r#type.clone(),
897                id: proposal.target.id.clone(),
898            },
899            reason: format!("Applied proposal {} changed frontier state.", proposal.id),
900            recommended_action:
901                "Re-export proof artifacts if this proposal materially changes what reviewers should inspect."
902                    .to_string(),
903            blocks: vec![],
904            caveats: vec![],
905        });
906    }
907    for proposal in frontier.proposals.iter().filter(|proposal| {
908        matches!(proposal.status.as_str(), "accepted" | "applied")
909            && proposal
910                .reviewed_by
911                .as_deref()
912                .is_none_or(proposals::is_placeholder_reviewer)
913    }) {
914        signals.push(SignalItem {
915            id: signal_id("reviewer_identity_missing", &proposal.id),
916            kind: "reviewer_identity_missing".to_string(),
917            severity: "error".to_string(),
918            target: SignalTarget {
919                r#type: proposal.target.r#type.clone(),
920                id: proposal.target.id.clone(),
921            },
922            reason: format!(
923                "Accepted or applied proposal {} lacks a stable named reviewer identity.",
924                proposal.id
925            ),
926            recommended_action:
927                "Re-accept the proposal with a stable named reviewer id before strict proof use."
928                    .to_string(),
929            blocks: vec!["strict_check".to_string(), "proof_ready".to_string()],
930            caveats: vec![
931                "Placeholder reviewer ids do not satisfy the v0 trust boundary.".to_string(),
932            ],
933        });
934    }
935    if frontier.proof_state.latest_packet.status == "stale" {
936        signals.push(SignalItem {
937            id: signal_id("stale_proof_packet", &frontier.project.name),
938            kind: "stale_proof_packet".to_string(),
939            severity: "warning".to_string(),
940            target: SignalTarget {
941                r#type: "frontier".to_string(),
942                id: frontier.project.name.clone(),
943            },
944            reason: frontier
945                .proof_state
946                .stale_reason
947                .clone()
948                .unwrap_or_else(|| "Proof packet is stale relative to current frontier state.".to_string()),
949            recommended_action:
950                "Run `vela proof` again to export a packet that matches the current frontier snapshot."
951                    .to_string(),
952            blocks: vec!["strict_check".to_string(), "proof_ready".to_string()],
953            caveats: vec!["Packet validation can still pass for stale but internally consistent packets.".to_string()],
954        });
955    }
956
957    let review_queue = build_review_queue(frontier, &signals);
958    let proof_readiness = build_proof_readiness(&signals);
959
960    SignalReport {
961        schema: "vela.signals.v0".to_string(),
962        frontier: frontier.project.name.clone(),
963        signals,
964        review_queue,
965        proof_readiness,
966    }
967}
968
969pub fn quality_table(frontier: &Project, report: &SignalReport) -> Value {
970    let mut by_kind = BTreeMap::<String, usize>::new();
971    let mut by_severity = BTreeMap::<String, usize>::new();
972    let proposal_summary = proposals::summary(frontier);
973    for signal in &report.signals {
974        *by_kind.entry(signal.kind.clone()).or_default() += 1;
975        *by_severity.entry(signal.severity.clone()).or_default() += 1;
976    }
977
978    json!({
979        "schema": "vela.quality-table.v0",
980        "frontier": frontier.project.name,
981        "stats": frontier.stats,
982        "event_log": {
983            "events": frontier.events.len(),
984            "review_events_projection": frontier.review_events.len(),
985            "confidence_updates_projection": frontier.confidence_updates.len(),
986        },
987        "signals": {
988            "total": report.signals.len(),
989            "by_kind": by_kind,
990            "by_severity": by_severity,
991        },
992        "review_queue": {
993            "items": report.review_queue.len(),
994            "high_priority": report.review_queue.iter().filter(|item| item.priority == "high").count(),
995        },
996        "proposals": proposal_summary,
997        "proof_state": frontier.proof_state,
998        "proof_readiness": report.proof_readiness,
999        "caveats": [
1000            "Signals are derived from frontier state and should be recomputed after edits.",
1001            "Candidate gaps, bridges, and tensions require human review.",
1002            "A clean quality table is not proof of scientific truth."
1003        ],
1004    })
1005}
1006
1007pub fn ro_crate_metadata(frontier: &Project, files: &[String]) -> Value {
1008    let graph_files: Vec<Value> = files
1009        .iter()
1010        .map(|path| {
1011            json!({
1012                "@id": path,
1013                "@type": "File",
1014                "name": path,
1015            })
1016        })
1017        .collect();
1018
1019    let mut graph = vec![
1020        json!({
1021            "@id": "ro-crate-metadata.jsonld",
1022            "@type": "CreativeWork",
1023            "about": {"@id": "./"}
1024        }),
1025        json!({
1026            "@id": "./",
1027            "@type": "Dataset",
1028            "name": format!("{} proof packet", frontier.project.name),
1029            "description": frontier.project.description,
1030            "dateCreated": frontier.project.compiled_at,
1031            "conformsTo": {"@id": project::VELA_SCHEMA_URL},
1032            "hasPart": files.iter().map(|path| json!({"@id": path})).collect::<Vec<_>>()
1033        }),
1034    ];
1035    graph.extend(graph_files);
1036    graph.extend(frontier.artifacts.iter().map(|artifact| {
1037        json!({
1038            "@id": artifact.id,
1039            "@type": "CreativeWork",
1040            "name": artifact.name,
1041            "encodingFormat": artifact.media_type,
1042            "sha256": artifact.content_hash,
1043            "url": artifact.source_url.as_ref().or(artifact.locator.as_ref()),
1044            "license": artifact.license,
1045        })
1046    }));
1047
1048    json!({
1049        "@context": "https://w3id.org/ro/crate/1.2/context",
1050        "@graph": graph,
1051    })
1052}
1053
1054fn build_review_queue(frontier: &Project, signals: &[SignalItem]) -> Vec<ReviewQueueItem> {
1055    let link_counts = frontier
1056        .findings
1057        .iter()
1058        .map(|finding| {
1059            let outgoing = finding.links.len() as u32;
1060            let incoming = frontier
1061                .findings
1062                .iter()
1063                .flat_map(|other| &other.links)
1064                .filter(|link| link.target == finding.id)
1065                .count() as u32;
1066            (finding.id.clone(), outgoing + incoming)
1067        })
1068        .collect::<BTreeMap<_, _>>();
1069
1070    let mut by_target = BTreeMap::<String, Vec<&SignalItem>>::new();
1071    for signal in signals {
1072        if signal.target.r#type == "finding" {
1073            by_target
1074                .entry(signal.target.id.clone())
1075                .or_default()
1076                .push(signal);
1077        }
1078    }
1079
1080    let mut queue = by_target
1081        .into_iter()
1082        .map(|(target_id, grouped)| {
1083            let signal_score = grouped
1084                .iter()
1085                .map(|signal| signal_weight(signal))
1086                .sum::<u32>();
1087            let centrality_score = link_counts.get(&target_id).copied().unwrap_or(0).min(25);
1088            let priority_score = signal_score + centrality_score;
1089            let priority = if grouped
1090                .iter()
1091                .any(|signal| signal.blocks.iter().any(|block| block == "strict_check"))
1092            {
1093                "high"
1094            } else if grouped
1095                .iter()
1096                .any(|signal| signal.blocks.iter().any(|block| block == "proof_ready"))
1097            {
1098                "medium"
1099            } else {
1100                "low"
1101            };
1102            ReviewQueueItem {
1103                id: format!("rq_{}", target_id.trim_start_matches("vf_")),
1104                priority: priority.to_string(),
1105                priority_score,
1106                target: SignalTarget {
1107                    r#type: "finding".to_string(),
1108                    id: target_id,
1109                },
1110                signal_ids: grouped.iter().map(|signal| signal.id.clone()).collect(),
1111                reasons: grouped.iter().map(|signal| signal.reason.clone()).collect(),
1112                recommended_action: grouped
1113                    .first()
1114                    .map(|signal| signal.recommended_action.clone())
1115                    .unwrap_or_else(|| "Review finding state.".to_string()),
1116            }
1117        })
1118        .collect::<Vec<_>>();
1119    queue.sort_by(|a, b| {
1120        b.priority_score
1121            .cmp(&a.priority_score)
1122            .then_with(|| a.target.id.cmp(&b.target.id))
1123    });
1124    queue
1125}
1126
1127fn signal_weight(signal: &SignalItem) -> u32 {
1128    let severity = match signal.severity.as_str() {
1129        "error" => 100,
1130        "warning" => 30,
1131        _ => 10,
1132    };
1133    let kind = match signal.kind.as_str() {
1134        "check_error" => 100,
1135        "contested_high_confidence" => 70,
1136        "proposal_conflict" => 80,
1137        "pending_proposal_review" => 50,
1138        "weak_provenance" => 45,
1139        "missing_evidence_span" => 35,
1140        _ => 10,
1141    };
1142    let blocker = if signal.blocks.iter().any(|block| block == "strict_check") {
1143        30
1144    } else if signal.blocks.iter().any(|block| block == "proof_ready") {
1145        15
1146    } else {
1147        0
1148    };
1149    severity + kind + blocker
1150}
1151
1152fn build_proof_readiness(signals: &[SignalItem]) -> ProofReadiness {
1153    let blockers = signals
1154        .iter()
1155        .filter(|signal| signal.blocks.iter().any(|block| block == "proof_ready"))
1156        .count();
1157    let warnings = signals
1158        .iter()
1159        .filter(|signal| signal.severity == "warning")
1160        .count();
1161    ProofReadiness {
1162        status: if blockers == 0 {
1163            "ready".to_string()
1164        } else {
1165            "needs_review".to_string()
1166        },
1167        blockers,
1168        warnings,
1169        caveats: vec![
1170            "Proof readiness means packet state is reviewable, not scientifically settled."
1171                .to_string(),
1172        ],
1173    }
1174}
1175
1176fn truncate(s: &str, n: usize) -> String {
1177    if s.chars().count() <= n {
1178        s.to_string()
1179    } else {
1180        let head: String = s.chars().take(n).collect();
1181        format!("{head}…")
1182    }
1183}
1184
1185fn signal_id(kind: &str, finding_id: &str) -> String {
1186    format!("sig_{kind}_{}", finding_id.trim_start_matches("vf_"))
1187}
1188
1189fn contains_condition_sensitive_claim(text: &str) -> bool {
1190    let lower = text.to_ascii_lowercase();
1191    [
1192        "delivery",
1193        "efficacy",
1194        "therapeutic",
1195        "clinical",
1196        "human",
1197        "mouse",
1198        "mice",
1199        "assay",
1200        "endpoint",
1201        "payload",
1202        "exposure",
1203        "translation",
1204    ]
1205    .iter()
1206    .any(|term| lower.contains(term))
1207}
1208
1209#[cfg(test)]
1210mod tests {
1211    use crate::bundle::{
1212        Assertion, Conditions, Confidence, Evidence, FindingBundle, Flags, Provenance,
1213    };
1214
1215    use super::*;
1216
1217    fn minimal_finding(id: &str) -> FindingBundle {
1218        let assertion = Assertion {
1219            text: "LRP1 transport is altered in Alzheimer models.".to_string(),
1220            assertion_type: "mechanism".to_string(),
1221            entities: vec![],
1222            relation: None,
1223            direction: None,
1224            causal_claim: None,
1225            causal_evidence_grade: None,
1226        };
1227        let provenance = Provenance {
1228            source_type: "published_paper".to_string(),
1229            doi: None,
1230            pmid: None,
1231            pmc: None,
1232            openalex_id: None,
1233            url: None,
1234            title: String::new(),
1235            authors: vec![],
1236            year: Some(2020),
1237            journal: None,
1238            license: None,
1239            publisher: None,
1240            funders: vec![],
1241            extraction: Default::default(),
1242            review: None,
1243            citation_count: None,
1244        };
1245        FindingBundle {
1246            id: id.to_string(),
1247            version: 1,
1248            previous_version: None,
1249            assertion,
1250            evidence: Evidence {
1251                evidence_type: "experimental".to_string(),
1252                model_system: "mouse".to_string(),
1253                species: Some("Mus musculus".to_string()),
1254                method: "test".to_string(),
1255                sample_size: None,
1256                effect_size: None,
1257                p_value: None,
1258                replicated: false,
1259                replication_count: None,
1260                evidence_spans: vec![],
1261            },
1262            conditions: Conditions {
1263                text: String::new(),
1264                species_verified: vec![],
1265                species_unverified: vec![],
1266                in_vitro: false,
1267                in_vivo: true,
1268                human_data: false,
1269                clinical_trial: false,
1270                concentration_range: None,
1271                duration: None,
1272                age_group: None,
1273                cell_type: None,
1274            },
1275            confidence: Confidence::raw(0.9, "test".to_string(), 0.9),
1276            provenance,
1277            flags: Flags {
1278                gap: false,
1279                negative_space: false,
1280                contested: true,
1281                retracted: false,
1282                declining: false,
1283                gravity_well: false,
1284                review_state: None,
1285                superseded: false,
1286                signature_threshold: None,
1287                jointly_accepted: false,
1288            },
1289            links: vec![],
1290            annotations: vec![],
1291            attachments: vec![],
1292            created: "2026-01-01T00:00:00Z".to_string(),
1293            updated: None,
1294
1295            access_tier: crate::access_tier::AccessTier::Public,
1296        }
1297    }
1298
1299    #[test]
1300    fn weak_and_contested_findings_emit_review_signals() {
1301        let frontier = project::assemble("test", vec![minimal_finding("vf_abc")], 1, 0, "test");
1302        let report = analyze(&frontier, &[]);
1303        assert!(report.signals.iter().any(|s| s.kind == "weak_provenance"));
1304        assert!(
1305            report
1306                .signals
1307                .iter()
1308                .any(|s| s.kind == "missing_evidence_span")
1309        );
1310        assert!(
1311            report
1312                .signals
1313                .iter()
1314                .any(|s| s.kind == "contested_high_confidence")
1315        );
1316        assert_eq!(report.review_queue.len(), 1);
1317    }
1318}
vela_protocol/signals.rs

vela_protocol/
signals.rs