vela_protocol/
bundle.rs

1//! Content-addressed finding bundles: the atomic object of the Vela protocol.
2
3use std::collections::BTreeMap;
4
5use chrono::Utc;
6use serde::{Deserialize, Serialize};
7use serde_json::Value;
8use sha2::{Digest, Sha256};
9
10/// Valid entity types per schema. Single source of truth shared by the validator
11/// and the `vela finding add` CLI; do not duplicate.
12///
13/// v0.10 added domain-neutral entries — `particle`, `instrument`, `dataset`,
14/// `quantity` — surfaced by the first non-bio frontier on the public hub
15/// (Nakamura's dark-matter constraints). The biology-leaning entries remain
16/// for back-compat; the additions widen expressiveness without churn.
17pub const VALID_ENTITY_TYPES: &[&str] = &[
18    // bio (pre-v0.10)
19    "gene",
20    "protein",
21    "compound",
22    "disease",
23    "cell_type",
24    "organism",
25    "pathway",
26    "assay",
27    "anatomical_structure",
28    // domain-neutral (v0.10)
29    "particle",
30    "instrument",
31    "dataset",
32    "quantity",
33    // escape valve
34    "other",
35];
36
37/// Valid assertion types per schema.
38///
39/// v0.10 added `measurement` and `exclusion` for measurement-heavy domains
40/// (physics, chemistry, climate, materials) where the substance of a
41/// finding is a numerical value or an exclusion limit at a confidence level.
42pub const VALID_ASSERTION_TYPES: &[&str] = &[
43    "mechanism",
44    "therapeutic",
45    "diagnostic",
46    "epidemiological",
47    "observational",
48    "review",
49    "methodological",
50    "computational",
51    "theoretical",
52    "negative",
53    // v0.10
54    "measurement",
55    "exclusion",
56    // v0.30: Notes Compiler emits these for proposals derived from
57    // researcher zettelkasten / Obsidian vaults. They become canonical
58    // findings on accept; rejecting them at the validator would force a
59    // post-hoc rewrite that breaks content-addressed ids. The semantic
60    // intent: `tension` = a theoretical claim about a field-level
61    // contradiction (paired claims that don't reconcile); `open_question`
62    // = an unresolved framing the agent surfaced; `hypothesis` = a
63    // provisional candidate claim awaiting evidence. The notes-compiler
64    // proposals doc covers how these are produced.
65    "tension",
66    "open_question",
67    "hypothesis",
68    "candidate_finding",
69];
70
71/// Valid artifact kinds for the generic `Artifact` kernel object.
72///
73/// `Dataset` and `CodeArtifact` remain as stronger, typed legacy objects.
74/// `Artifact` is the shared substrate path for files and records that need
75/// durable byte or pointer provenance before a domain-specific object exists.
76pub const VALID_ARTIFACT_KINDS: &[&str] = &[
77    "dataset",
78    "clinical_trial_record",
79    "protocol",
80    "supplement",
81    "notebook",
82    "code",
83    "model_output",
84    "table",
85    "figure",
86    "registry_record",
87    "lab_file",
88    "source_file",
89    "other",
90];
91
92pub fn valid_artifact_kind(kind: &str) -> bool {
93    VALID_ARTIFACT_KINDS.contains(&kind)
94}
95
96/// Valid evidence types per schema.
97pub const VALID_EVIDENCE_TYPES: &[&str] = &[
98    "experimental",
99    "observational",
100    "computational",
101    "theoretical",
102    "meta_analysis",
103    "systematic_review",
104    "case_report",
105    // v0.30: Notes Compiler — the evidence span lives in the researcher's
106    // zettelkasten note rather than a primary literature passage.
107    // Treated as an `expert_assertion`-shaped evidence kind.
108    "extracted_from_notes",
109];
110
111/// Valid provenance source types per schema.
112///
113/// v0.10 added `data_release` for instrument runs, observation campaigns,
114/// and dataset versions that are themselves the substantive object — distinct
115/// from the paper that reports them (XENONnT SR0, Planck data releases,
116/// JWST observation runs, LHC analysis releases).
117pub const VALID_PROVENANCE_SOURCE_TYPES: &[&str] = &[
118    "published_paper",
119    "preprint",
120    "clinical_trial",
121    "lab_notebook",
122    "model_output",
123    "expert_assertion",
124    "database_record",
125    // v0.10
126    "data_release",
127    // v0.30: notes-compiler proposals cite the source markdown note
128    // by filename. Distinct from `lab_notebook` (which implies a
129    // dated lab workbook entry with primary observations) and
130    // `expert_assertion` (which implies a named expert's claim).
131    "researcher_notes",
132];
133
134/// Valid link types per protocol §5.
135pub const VALID_LINK_TYPES: &[&str] = &[
136    "supports",
137    "contradicts",
138    "extends",
139    "depends",
140    "replicates",
141    "supersedes",
142    "synthesized_from",
143];
144
145/// A resolved identifier from a scientific database.
146#[derive(Debug, Clone, Serialize, Deserialize)]
147pub struct ResolvedId {
148    /// The database source (mesh, uniprot, pubchem, chebi, go, ncbi_gene).
149    pub source: String,
150    /// The identifier value (e.g., "D000544", "Q6ZSS7", "24752728").
151    pub id: String,
152    /// Confidence in this resolution (0.0-1.0).
153    pub confidence: f64,
154    /// The matched name in the source database.
155    #[serde(default, skip_serializing_if = "Option::is_none")]
156    pub matched_name: Option<String>,
157}
158
159/// How an entity was resolved to its canonical form (v0.2.0 schema).
160#[derive(Debug, Clone, PartialEq, Eq, Serialize, Deserialize)]
161#[serde(rename_all = "snake_case")]
162pub enum ResolutionMethod {
163    ExactMatch,
164    FuzzyMatch,
165    LlmInference,
166    Manual,
167}
168
169impl std::fmt::Display for ResolutionMethod {
170    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
171        match self {
172            ResolutionMethod::ExactMatch => write!(f, "exact_match"),
173            ResolutionMethod::FuzzyMatch => write!(f, "fuzzy_match"),
174            ResolutionMethod::LlmInference => write!(f, "llm_inference"),
175            ResolutionMethod::Manual => write!(f, "manual"),
176        }
177    }
178}
179
180#[derive(Debug, Clone, Serialize, Deserialize)]
181pub struct Entity {
182    pub name: String,
183    #[serde(rename = "type")]
184    pub entity_type: String,
185    /// Deprecated: flat identifiers map. Retained for backward compatibility with
186    /// older frontier JSON files. New code should use `canonical_id` and `candidates`.
187    #[serde(default)]
188    pub identifiers: serde_json::Map<String, serde_json::Value>,
189    /// The primary resolved identifier (if resolved).
190    #[serde(default, skip_serializing_if = "Option::is_none")]
191    pub canonical_id: Option<ResolvedId>,
192    /// Alternative resolution candidates with scores.
193    #[serde(default)]
194    pub candidates: Vec<ResolvedId>,
195    /// Known aliases for this entity (e.g., NLRP3 = cryopyrin = NALP3).
196    #[serde(default)]
197    pub aliases: Vec<String>,
198    /// How this resolution was performed.
199    #[serde(default, skip_serializing_if = "Option::is_none")]
200    pub resolution_provenance: Option<String>,
201    #[serde(default = "default_one")]
202    pub resolution_confidence: f64,
203    /// How the entity was resolved: exact_match, fuzzy_match, llm_inference, manual.
204    #[serde(default, skip_serializing_if = "Option::is_none")]
205    pub resolution_method: Option<ResolutionMethod>,
206    /// Species context for orthologs (e.g., "Homo sapiens" vs "Mus musculus" for APP).
207    #[serde(default, skip_serializing_if = "Option::is_none")]
208    pub species_context: Option<String>,
209    /// True when resolution_confidence < 0.8 and the match needs human review.
210    #[serde(default, skip_serializing_if = "std::ops::Not::not")]
211    pub needs_review: bool,
212}
213
214fn default_one() -> f64 {
215    1.0
216}
217
218#[derive(Debug, Clone, Serialize, Deserialize)]
219pub struct Evidence {
220    #[serde(rename = "type")]
221    pub evidence_type: String,
222    #[serde(default)]
223    pub model_system: String,
224    pub species: Option<String>,
225    #[serde(default)]
226    pub method: String,
227    pub sample_size: Option<String>,
228    pub effect_size: Option<String>,
229    pub p_value: Option<String>,
230    #[serde(default)]
231    pub replicated: bool,
232    pub replication_count: Option<u32>,
233    #[serde(default)]
234    pub evidence_spans: Vec<serde_json::Value>,
235}
236
237/// Valid replication outcomes per v0.32 schema.
238///
239/// `replicated`: an independent attempt reproduced the finding within the
240/// stated conditions. `failed`: the attempt did not reproduce. `partial`:
241/// some conditions matched, others didn't (e.g., effect size present but
242/// smaller). `inconclusive`: methodology ambiguity prevents a clean
243/// outcome judgment.
244pub const VALID_REPLICATION_OUTCOMES: &[&str] =
245    &["replicated", "failed", "partial", "inconclusive"];
246
247/// v0.32: Replication as a first-class kernel object.
248///
249/// Before v0.32, replication was encoded as `Evidence.replicated: bool`
250/// + `Evidence.replication_count: u32` — a scalar property on the
251/// finding. The kernel could not represent "lab A replicated this in
252/// human iPSC; lab B failed to replicate in mouse OPCs" — those are
253/// distinct epistemic facts, not a single count.
254///
255/// Each `Replication` is content-addressed (`vrep_<16hex>`) over its
256/// target finding, the actor that attempted it, the canonical
257/// conditions, and the outcome. This mirrors the `vf_<id>` pattern and
258/// makes replication chains queryable, citeable, and propagable through
259/// the link graph.
260///
261/// The legacy `Evidence.replicated` and `Evidence.replication_count`
262/// fields are preserved for backward compatibility; v0.32+ frontiers
263/// derive them from the structured collection on load.
264#[derive(Debug, Clone, Serialize, Deserialize)]
265pub struct Replication {
266    /// `vrep_<16hex>`, content-addressed; see `Replication::content_address`.
267    pub id: String,
268    /// `vf_<id>` of the finding being replicated.
269    pub target_finding: String,
270    /// Stable actor id of the lab / curator / agent that attempted the
271    /// replication. Same shape as `FindingBundle.actor` references.
272    pub attempted_by: String,
273    /// One of `replicated`, `failed`, `partial`, `inconclusive`.
274    /// Stored as a string for forward-compat with future outcome
275    /// taxonomies; validated against `VALID_REPLICATION_OUTCOMES`.
276    pub outcome: String,
277    /// Evidence collected from the replication attempt. Reuses the
278    /// existing `Evidence` shape so confidence math stays consistent.
279    pub evidence: Evidence,
280    /// Conditions under which the replication was attempted (model
281    /// system, species, in_vivo/vitro, etc.). The conditions field is
282    /// what makes "replicated in mouse but failed in human" a
283    /// representable fact.
284    pub conditions: Conditions,
285    /// Provenance of the replicating paper / preprint / lab notebook.
286    pub provenance: Provenance,
287    /// Free-text reviewer note. Often the most important field for
288    /// partial / inconclusive outcomes.
289    #[serde(default)]
290    pub notes: String,
291    /// Original creation timestamp (RFC 3339).
292    pub created: String,
293    /// If this attempt extends or refines a previous one, the
294    /// `vrep_<id>` of that earlier attempt. Allows replication chains
295    /// (lab A → lab B refines → lab C generalizes).
296    #[serde(default, skip_serializing_if = "Option::is_none")]
297    pub previous_attempt: Option<String>,
298}
299
300impl Replication {
301    /// Compute the content-addressed ID per v0.32 spec:
302    /// `SHA-256(target_finding | attempted_by | normalize(conditions.text) | outcome)`.
303    /// Returns first 16 hex chars prefixed with "vrep_".
304    ///
305    /// `conditions.text` is normalized by the same lower/whitespace/punct
306    /// rules as `FindingBundle::normalize_text` so two replications with
307    /// trivially-different condition prose produce the same id only when
308    /// the substantive conditions match.
309    pub fn content_address(
310        target_finding: &str,
311        attempted_by: &str,
312        conditions: &Conditions,
313        outcome: &str,
314    ) -> String {
315        let norm_conditions = FindingBundle::normalize_text(&conditions.text);
316        let preimage = format!(
317            "{}|{}|{}|{}",
318            target_finding, attempted_by, norm_conditions, outcome
319        );
320        let hash = Sha256::digest(preimage.as_bytes());
321        format!("vrep_{}", &hex::encode(hash)[..16])
322    }
323
324    /// Construct a new Replication with a freshly-derived id and
325    /// `created` timestamp set to now.
326    pub fn new(
327        target_finding: impl Into<String>,
328        attempted_by: impl Into<String>,
329        outcome: impl Into<String>,
330        evidence: Evidence,
331        conditions: Conditions,
332        provenance: Provenance,
333        notes: impl Into<String>,
334    ) -> Self {
335        let target = target_finding.into();
336        let actor = attempted_by.into();
337        let oc = outcome.into();
338        let id = Self::content_address(&target, &actor, &conditions, &oc);
339        Self {
340            id,
341            target_finding: target,
342            attempted_by: actor,
343            outcome: oc,
344            evidence,
345            conditions,
346            provenance,
347            notes: notes.into(),
348            created: Utc::now().to_rfc3339(),
349            previous_attempt: None,
350        }
351    }
352}
353
354/// v0.34: ExpectedOutcome — the structured shape of a Prediction's
355/// expected resolution.
356///
357/// `Affirmed` / `Falsified` are the binary cases ("this claim will
358/// hold" / "this claim will fail"). `Quantitative` carries a numeric
359/// expectation with tolerance + units ("CDR-SB effect ≥ 0.4 SD ± 0.1").
360/// `Categorical` carries an arbitrary label for outcomes that aren't
361/// numeric ("FDA decision is one of: full approval, accelerated,
362/// declined").
363#[derive(Debug, Clone, Serialize, Deserialize)]
364#[serde(tag = "kind", rename_all = "snake_case")]
365pub enum ExpectedOutcome {
366    Affirmed,
367    Falsified,
368    Quantitative {
369        value: f64,
370        tolerance: f64,
371        units: String,
372    },
373    Categorical {
374        value: String,
375    },
376}
377
378impl ExpectedOutcome {
379    /// Compact string representation used in the content-address
380    /// preimage and CLI rendering.
381    pub fn canonical(&self) -> String {
382        match self {
383            ExpectedOutcome::Affirmed => "affirmed".to_string(),
384            ExpectedOutcome::Falsified => "falsified".to_string(),
385            ExpectedOutcome::Quantitative {
386                value,
387                tolerance,
388                units,
389            } => format!("quant:{value}±{tolerance}{units}"),
390            ExpectedOutcome::Categorical { value } => format!("cat:{value}"),
391        }
392    }
393}
394
395/// v0.34: Prediction as a first-class kernel object.
396///
397/// A `Prediction` is a falsifiable claim about a future observation,
398/// scoped to one or more existing findings, made by a registered
399/// actor at a known timestamp, with an explicit resolution
400/// criterion and (typically) a deadline. Resolutions arrive later as
401/// `Resolution` records that close out the prediction by recording
402/// what actually happened.
403///
404/// Predictions are the kernel's epistemic accountability layer.
405/// Other parts of the substrate describe what *is* believed today;
406/// predictions describe what is *expected* and let the substrate
407/// score, over time, how well each actor's beliefs track reality.
408/// Calibration records (Brier, log score, hit rate) are derived
409/// from the resolved subset.
410///
411/// `vpred_<id>` is content-addressed over `claim_text + made_by +
412/// predicted_at + resolution_criterion`. Two predictions with the
413/// same prose but different actors or different criteria are
414/// distinct kernel objects.
415#[derive(Debug, Clone, Serialize, Deserialize)]
416pub struct Prediction {
417    /// `vpred_<16hex>`, content-addressed.
418    pub id: String,
419    /// The falsifiable prediction itself, in plain prose.
420    pub claim_text: String,
421    /// Existing `vf_*` findings whose truth this prediction depends
422    /// on. May be empty for predictions that don't tie back to a
423    /// specific frontier claim.
424    #[serde(default)]
425    pub target_findings: Vec<String>,
426    /// RFC 3339 timestamp of when the prediction was made. Goes into
427    /// the content-address preimage so re-asserting the same prose
428    /// at a later date produces a distinct record.
429    pub predicted_at: String,
430    /// RFC 3339 deadline for resolution. `None` means open-ended; a
431    /// concrete date is strongly preferred for calibration scoring.
432    pub resolves_by: Option<String>,
433    /// Unambiguous prose that says "we'll know this resolved when X."
434    /// Goes into the content-address preimage so the same prose with
435    /// a different criterion is a distinct record.
436    pub resolution_criterion: String,
437    /// Structured expectation: affirmed / falsified / quantitative /
438    /// categorical. The resolver checks this against the observed
439    /// outcome at resolution time.
440    pub expected_outcome: ExpectedOutcome,
441    /// Stable actor id of the predictor.
442    pub made_by: String,
443    /// Predictor's prior belief in the expected outcome, on [0, 1].
444    /// Drives Brier scoring at resolution time.
445    pub confidence: f64,
446    /// Conditions under which the prediction applies. Reuses the
447    /// `Conditions` shape so model relevance, scope, etc., flow
448    /// through.
449    pub conditions: Conditions,
450    /// v0.40.1: True once the calibration runtime has marked this
451    /// prediction as expired without an explicit `Resolution`. Set by
452    /// `calibration::expire_overdue_predictions` when `resolves_by`
453    /// is in the past. Pre-v0.40.1 frontiers omit the field; loading
454    /// is backward-compatible. An expired prediction does not become
455    /// a resolved prediction — it is closed without contributing to
456    /// Brier or log scoring (calibration tracks it as a separate
457    /// `n_expired` count so the predictor still answers for the
458    /// missing commitment).
459    #[serde(default, skip_serializing_if = "std::ops::Not::not")]
460    pub expired_unresolved: bool,
461}
462
463impl Prediction {
464    /// Compute the content-addressed ID per v0.34 spec:
465    /// `SHA-256(normalize(claim_text) | made_by | predicted_at | normalize(resolution_criterion) | expected_outcome.canonical())`.
466    /// Returns first 16 hex chars prefixed with "vpred_".
467    pub fn content_address(
468        claim_text: &str,
469        made_by: &str,
470        predicted_at: &str,
471        resolution_criterion: &str,
472        expected_outcome: &ExpectedOutcome,
473    ) -> String {
474        let preimage = format!(
475            "{}|{}|{}|{}|{}",
476            FindingBundle::normalize_text(claim_text),
477            made_by,
478            predicted_at,
479            FindingBundle::normalize_text(resolution_criterion),
480            expected_outcome.canonical(),
481        );
482        let hash = Sha256::digest(preimage.as_bytes());
483        format!("vpred_{}", &hex::encode(hash)[..16])
484    }
485
486    /// Construct a new Prediction. `predicted_at` defaults to "now"
487    /// in RFC 3339 if not supplied.
488    #[allow(clippy::too_many_arguments)]
489    pub fn new(
490        claim_text: impl Into<String>,
491        target_findings: Vec<String>,
492        predicted_at: Option<String>,
493        resolves_by: Option<String>,
494        resolution_criterion: impl Into<String>,
495        expected_outcome: ExpectedOutcome,
496        made_by: impl Into<String>,
497        confidence: f64,
498        conditions: Conditions,
499    ) -> Self {
500        let now = predicted_at.unwrap_or_else(|| Utc::now().to_rfc3339());
501        let claim = claim_text.into();
502        let crit = resolution_criterion.into();
503        let actor = made_by.into();
504        let id = Self::content_address(&claim, &actor, &now, &crit, &expected_outcome);
505        Self {
506            id,
507            claim_text: claim,
508            target_findings,
509            predicted_at: now,
510            resolves_by,
511            resolution_criterion: crit,
512            expected_outcome,
513            made_by: actor,
514            confidence,
515            conditions,
516            expired_unresolved: false,
517        }
518    }
519}
520
521/// v0.34: Resolution closes out a Prediction.
522///
523/// A `Resolution` records what actually happened, who observed it,
524/// when, with what evidence, and whether the actual outcome matched
525/// the predicted one. Calibration scoring (Brier, log score, hit rate)
526/// runs over the resolved subset of predictions per actor.
527#[derive(Debug, Clone, Serialize, Deserialize)]
528pub struct Resolution {
529    /// `vres_<16hex>`, content-addressed.
530    pub id: String,
531    /// `vpred_<id>` of the prediction this resolves.
532    pub prediction_id: String,
533    /// Free-text description of what actually happened. The
534    /// `matched_expected` flag is the structured judgment.
535    pub actual_outcome: String,
536    /// True if the observed outcome matched the prediction's
537    /// `expected_outcome`. Drives hit-rate and Brier scoring.
538    pub matched_expected: bool,
539    /// RFC 3339 timestamp of resolution.
540    pub resolved_at: String,
541    /// Stable actor id of the resolver. May or may not be the same
542    /// actor that made the prediction (independent resolution is
543    /// stronger).
544    pub resolved_by: String,
545    /// Evidence supporting the resolution — typically the paper /
546    /// trial readout / observation that closes out the bet.
547    pub evidence: Evidence,
548    /// Resolver's confidence in the match judgment, on [0, 1].
549    /// Useful when the actual outcome is partial or ambiguous.
550    pub confidence: f64,
551}
552
553impl Resolution {
554    /// Compute the content-addressed ID per v0.34 spec:
555    /// `SHA-256(prediction_id | normalize(actual_outcome) | resolved_by | resolved_at | matched)`.
556    /// Returns first 16 hex chars prefixed with "vres_".
557    pub fn content_address(
558        prediction_id: &str,
559        actual_outcome: &str,
560        resolved_by: &str,
561        resolved_at: &str,
562        matched_expected: bool,
563    ) -> String {
564        let preimage = format!(
565            "{}|{}|{}|{}|{}",
566            prediction_id,
567            FindingBundle::normalize_text(actual_outcome),
568            resolved_by,
569            resolved_at,
570            matched_expected,
571        );
572        let hash = Sha256::digest(preimage.as_bytes());
573        format!("vres_{}", &hex::encode(hash)[..16])
574    }
575
576    /// Construct a Resolution with a freshly-derived id and `resolved_at`
577    /// timestamp.
578    pub fn new(
579        prediction_id: impl Into<String>,
580        actual_outcome: impl Into<String>,
581        matched_expected: bool,
582        resolved_by: impl Into<String>,
583        evidence: Evidence,
584        confidence: f64,
585    ) -> Self {
586        let now = Utc::now().to_rfc3339();
587        let pid = prediction_id.into();
588        let outcome = actual_outcome.into();
589        let resolver = resolved_by.into();
590        let id = Self::content_address(&pid, &outcome, &resolver, &now, matched_expected);
591        Self {
592            id,
593            prediction_id: pid,
594            actual_outcome: outcome,
595            matched_expected,
596            resolved_at: now,
597            resolved_by: resolver,
598            evidence,
599            confidence,
600        }
601    }
602}
603
604/// v0.49: NegativeResult as a first-class kernel object.
605///
606/// The essay-driven primitive: when an experiment or trial does not
607/// support its hypothesis, the substrate has to be able to record what
608/// was tried, in what context, with what observed outcome — without
609/// silently flipping the corresponding finding's confidence. Two
610/// shapes carry the depositor's intent:
611///
612/// - `RegisteredTrial`: pre-registered trial reads out negative on its
613///   primary endpoint. Carries `power` and `effect_size_ci` so a
614///   downstream reader can tell an *informative* null (CI tightly
615///   bounded around zero, adequate power) from an *uninformative* one
616///   (wide CI, low power) — the distinction the essay calls out:
617///   "an underpowered null does not poison downstream confidence."
618/// - `Exploratory`: wet-lab dead end. Most failures here cannot be
619///   statistically bounded; the substrate's first job is capturing
620///   the (reagent, condition, observed outcome) tuple so the next
621///   chemist designing a similar synthesis sees the dead end before
622///   she runs the experiment.
623///
624/// `vnr_<id>` is content-addressed over the canonical preimage of the
625/// kind-specific fields plus `deposited_by` and `created`. NegativeResults
626/// link to the findings they bear against via `target_findings`; review
627/// and retraction follow the same proposal -> canonical event ->
628/// reducer pipeline as findings.
629#[derive(Debug, Clone, Serialize, Deserialize)]
630#[serde(tag = "kind", rename_all = "snake_case")]
631pub enum NegativeResultKind {
632    /// Pre-registered trial whose primary endpoint read out negative.
633    RegisteredTrial {
634        /// Pre-specified primary endpoint (e.g., "CDR-SB change at 18 months").
635        endpoint: String,
636        /// Intervention arm description (drug + dose, device + protocol, etc.).
637        intervention: String,
638        /// Comparator arm description (placebo, active control, standard of care).
639        comparator: String,
640        /// Population scope: indication, stage, age range, biomarker eligibility.
641        population: String,
642        /// Number of participants enrolled (any arm).
643        n_enrolled: u32,
644        /// Statistical power for the primary endpoint, on [0, 1]. Below
645        /// 0.8 is the "underpowered null" the essay warns about; this
646        /// field is what lets a downstream reader distinguish that case
647        /// from an adequately-powered null.
648        power: f64,
649        /// Confidence interval for the observed primary effect size,
650        /// `(lower, upper)` in the trial's reported units. A CI tightly
651        /// bracketing zero with adequate power is an *informative* null;
652        /// a wide CI under low power is not.
653        effect_size_ci: (f64, f64),
654        /// Pre-registered minimum effect size of interest, in the same
655        /// units as `effect_size_ci`. When the CI excludes this
656        /// threshold, the null is a positive epistemic claim about the
657        /// absence of clinically meaningful effect. None when no
658        /// pre-registered MCID was declared.
659        #[serde(default, skip_serializing_if = "Option::is_none")]
660        effect_size_threshold: Option<f64>,
661        /// Trial registry id (e.g., "NCT04532333"). Strongly preferred;
662        /// the registry id is the load-bearing audit trail.
663        #[serde(default, skip_serializing_if = "Option::is_none")]
664        registry_id: Option<String>,
665    },
666    /// Exploratory wet-lab failure. Captures the (reagent, condition,
667    /// observed outcome) tuple even when no statistical bound applies.
668    Exploratory {
669        /// Reagent, compound, vector, or perturbation tried.
670        reagent: String,
671        /// Free-text observed outcome (e.g., "no measurable expression",
672        /// "yields plateau at 6%", "cytotoxicity at all tested doses").
673        observation: String,
674        /// Number of independent attempts whose outcome agreed.
675        /// Single attempts are fine but should be honest about it.
676        attempts: u32,
677    },
678}
679
680#[derive(Debug, Clone, Serialize, Deserialize)]
681pub struct NegativeResult {
682    /// `vnr_<16hex>`, content-addressed; see `NegativeResult::content_address`.
683    pub id: String,
684    /// The kind-specific payload.
685    pub kind: NegativeResultKind,
686    /// `vf_*` findings whose positive claim this null bears against.
687    /// May be empty — exploratory dead ends don't always have a
688    /// pre-existing claim to negate.
689    #[serde(default)]
690    pub target_findings: Vec<String>,
691    /// Stable actor id of the depositing lab / curator / agent.
692    pub deposited_by: String,
693    /// Conditions under which the null was observed. Reuses the
694    /// `Conditions` shape so model relevance, scope, and translation
695    /// boundaries flow through to downstream confidence math.
696    pub conditions: Conditions,
697    /// Provenance of the trial readout / paper / preprint / lab notebook.
698    pub provenance: Provenance,
699    /// RFC 3339 creation timestamp.
700    pub created: String,
701    /// Free-text reviewer note. Often the most important field — the
702    /// "why this null matters" or "why we ran this in the first place"
703    /// context that licenses use as a dead-end signal.
704    #[serde(default)]
705    pub notes: String,
706    /// Optional review verdict. Mirrors the `Flags.review_state`
707    /// pattern on FindingBundle so reviewed/contested/needs-revision
708    /// nulls are first-class state.
709    #[serde(default, skip_serializing_if = "Option::is_none")]
710    pub review_state: Option<ReviewState>,
711    /// True once a `negative_result.retracted` event has been applied.
712    #[serde(default, skip_serializing_if = "std::ops::Not::not")]
713    pub retracted: bool,
714    /// v0.51: Read-side access tier. See `FindingBundle.access_tier`
715    /// for the doctrine. Defaults to `Public` and skips serialization
716    /// when public so pre-v0.51 frontiers round-trip byte-identically.
717    #[serde(default, skip_serializing_if = "is_public_tier")]
718    pub access_tier: crate::access_tier::AccessTier,
719}
720
721impl NegativeResultKind {
722    /// Compact canonical representation used in the content-address
723    /// preimage. Stable across protocol versions — adding a new kind
724    /// must not change existing kinds' canonical strings.
725    pub fn canonical(&self) -> String {
726        match self {
727            NegativeResultKind::RegisteredTrial {
728                endpoint,
729                intervention,
730                comparator,
731                population,
732                n_enrolled,
733                power,
734                effect_size_ci,
735                effect_size_threshold,
736                registry_id,
737            } => format!(
738                "trial|{}|{}|{}|{}|{}|{:.4}|{:.6},{:.6}|{}|{}",
739                FindingBundle::normalize_text(endpoint),
740                FindingBundle::normalize_text(intervention),
741                FindingBundle::normalize_text(comparator),
742                FindingBundle::normalize_text(population),
743                n_enrolled,
744                power,
745                effect_size_ci.0,
746                effect_size_ci.1,
747                effect_size_threshold
748                    .map(|t| format!("{t:.6}"))
749                    .unwrap_or_default(),
750                registry_id.clone().unwrap_or_default(),
751            ),
752            NegativeResultKind::Exploratory {
753                reagent,
754                observation,
755                attempts,
756            } => format!(
757                "exploratory|{}|{}|{}",
758                FindingBundle::normalize_text(reagent),
759                FindingBundle::normalize_text(observation),
760                attempts,
761            ),
762        }
763    }
764}
765
766impl NegativeResult {
767    /// Compute the content-addressed ID per v0.49 spec:
768    /// `SHA-256(kind.canonical() | deposited_by | created | normalize(conditions.text))`.
769    /// Returns first 16 hex chars prefixed with "vnr_".
770    pub fn content_address(
771        kind: &NegativeResultKind,
772        deposited_by: &str,
773        created: &str,
774        conditions: &Conditions,
775    ) -> String {
776        let preimage = format!(
777            "{}|{}|{}|{}",
778            kind.canonical(),
779            deposited_by,
780            created,
781            FindingBundle::normalize_text(&conditions.text),
782        );
783        let hash = Sha256::digest(preimage.as_bytes());
784        format!("vnr_{}", &hex::encode(hash)[..16])
785    }
786
787    /// Construct a new NegativeResult with a freshly-derived id and
788    /// `created` timestamp set to now.
789    pub fn new(
790        kind: NegativeResultKind,
791        target_findings: Vec<String>,
792        deposited_by: impl Into<String>,
793        conditions: Conditions,
794        provenance: Provenance,
795        notes: impl Into<String>,
796    ) -> Self {
797        let depositor = deposited_by.into();
798        let created = Utc::now().to_rfc3339();
799        let id = Self::content_address(&kind, &depositor, &created, &conditions);
800        Self {
801            id,
802            kind,
803            target_findings,
804            deposited_by: depositor,
805            conditions,
806            provenance,
807            created,
808            notes: notes.into(),
809            review_state: None,
810            retracted: false,
811            access_tier: crate::access_tier::AccessTier::Public,
812        }
813    }
814
815    /// True when the null is informative under the registered-trial
816    /// criterion: adequate power AND CI excludes the pre-registered
817    /// MCID. Returns `None` for exploratory nulls or trials missing
818    /// the required fields. Used by downstream confidence math to
819    /// distinguish "absence of effect" from "absence of evidence."
820    pub fn is_informative_trial_null(&self) -> Option<bool> {
821        match &self.kind {
822            NegativeResultKind::RegisteredTrial {
823                power,
824                effect_size_ci,
825                effect_size_threshold,
826                ..
827            } => {
828                let threshold = (*effect_size_threshold)?;
829                Some(*power >= 0.8 && effect_size_ci.0 > -threshold && effect_size_ci.1 < threshold)
830            }
831            NegativeResultKind::Exploratory { .. } => None,
832        }
833    }
834}
835
836/// v0.50: Trajectory as a first-class kernel object.
837///
838/// The eighth essay primitive: "the search path that produced the
839/// finding, so the next agent does not re-derive what the last one
840/// already ruled out, with the caveat that this primitive will be
841/// deposited last and most thinly because labs have real reasons not
842/// to expose dead ends."
843///
844/// A `Trajectory` records the ordered steps a researcher or agent
845/// took on the way to a finding (or nowhere): hypotheses considered,
846/// branches tried, branches ruled out and why. Steps are append-only
847/// in the canonical event log via `trajectory.step_appended` events;
848/// the materialized `steps` collection is reproduced by replay from
849/// genesis.
850///
851/// `vtr_<id>` is content-addressed over `target_findings + deposited_by
852/// + created` — fixed at creation, so appending steps doesn't mint a
853/// new id. Idempotent on duplicate `vtr_id` at create time, idempotent
854/// on duplicate step content-addresses at append time.
855#[derive(Debug, Clone, Serialize, Deserialize, PartialEq, Eq)]
856#[serde(rename_all = "snake_case")]
857pub enum TrajectoryStepKind {
858    /// A candidate explanation or direction the researcher considered.
859    Hypothesis,
860    /// A specific experiment, analysis, or path the researcher tried.
861    Tried,
862    /// A branch the researcher excluded, with reason. The most
863    /// information-dense step kind for downstream agents.
864    RuledOut,
865    /// A neutral observation that constrained the search without
866    /// confirming or ruling out a branch.
867    Observed,
868    /// A refinement of an earlier hypothesis or condition.
869    Refined,
870}
871
872impl TrajectoryStepKind {
873    pub fn canonical(&self) -> &'static str {
874        match self {
875            TrajectoryStepKind::Hypothesis => "hypothesis",
876            TrajectoryStepKind::Tried => "tried",
877            TrajectoryStepKind::RuledOut => "ruled_out",
878            TrajectoryStepKind::Observed => "observed",
879            TrajectoryStepKind::Refined => "refined",
880        }
881    }
882}
883
884#[derive(Debug, Clone, Serialize, Deserialize)]
885pub struct TrajectoryStep {
886    /// `vts_<16hex>`, content-addressed over the parent trajectory id
887    /// + step kind + description + at + actor.
888    pub id: String,
889    /// What kind of step this is.
890    pub kind: TrajectoryStepKind,
891    /// Free-text description. For RuledOut, prose should name the
892    /// reason for exclusion — that's the load-bearing field for the
893    /// next agent reading the search.
894    pub description: String,
895    /// RFC 3339 timestamp the step happened.
896    pub at: String,
897    /// Stable actor id of who took the step. May differ from the
898    /// trajectory's `deposited_by` when an agent appends to a
899    /// trajectory another actor opened.
900    pub actor: String,
901    /// Optional referenced kernel objects (`vf_*`, `vnr_*`, `vrep_*`,
902    /// `vpred_*`, `vd_*`, `vc_*`). Lets a step cite the negative
903    /// result it produced, the dataset it ran against, etc., without
904    /// duplicating their content.
905    #[serde(default)]
906    pub references: Vec<String>,
907}
908
909impl TrajectoryStep {
910    /// Compute the content-addressed step id per v0.50 spec:
911    /// `SHA-256(trajectory_id | kind.canonical() | normalize(description) | at | actor)`.
912    /// Returns first 16 hex chars prefixed with "vts_".
913    pub fn content_address(
914        trajectory_id: &str,
915        kind: &TrajectoryStepKind,
916        description: &str,
917        at: &str,
918        actor: &str,
919    ) -> String {
920        let preimage = format!(
921            "{}|{}|{}|{}|{}",
922            trajectory_id,
923            kind.canonical(),
924            FindingBundle::normalize_text(description),
925            at,
926            actor,
927        );
928        let hash = Sha256::digest(preimage.as_bytes());
929        format!("vts_{}", &hex::encode(hash)[..16])
930    }
931
932    /// Construct a new TrajectoryStep with a freshly-derived id and
933    /// `at` timestamp set to now if not supplied.
934    pub fn new(
935        trajectory_id: &str,
936        kind: TrajectoryStepKind,
937        description: impl Into<String>,
938        actor: impl Into<String>,
939        at: Option<String>,
940        references: Vec<String>,
941    ) -> Self {
942        let at = at.unwrap_or_else(|| Utc::now().to_rfc3339());
943        let actor = actor.into();
944        let description = description.into();
945        let id = Self::content_address(trajectory_id, &kind, &description, &at, &actor);
946        Self {
947            id,
948            kind,
949            description,
950            at,
951            actor,
952            references,
953        }
954    }
955}
956
957#[derive(Debug, Clone, Serialize, Deserialize)]
958pub struct Trajectory {
959    /// `vtr_<16hex>`, content-addressed at creation.
960    pub id: String,
961    /// `vf_*` findings this trajectory describes the search for. May
962    /// be empty when the trajectory leads nowhere yet — the search
963    /// can be deposited before its target finding exists.
964    #[serde(default)]
965    pub target_findings: Vec<String>,
966    /// Stable actor id of the depositor (the lab / curator / agent
967    /// that opens the trajectory).
968    pub deposited_by: String,
969    /// RFC 3339 creation timestamp (also folded into the id).
970    pub created: String,
971    /// Append-only ordered list of steps. Reproduced by replay from
972    /// `trajectory.step_appended` events.
973    #[serde(default)]
974    pub steps: Vec<TrajectoryStep>,
975    /// Free-text reviewer note on the trajectory as a whole.
976    #[serde(default)]
977    pub notes: String,
978    /// Optional review verdict — same `ReviewState` enum the rest of
979    /// the kernel uses.
980    #[serde(default, skip_serializing_if = "Option::is_none")]
981    pub review_state: Option<ReviewState>,
982    /// True once a `trajectory.retracted` event has been applied.
983    #[serde(default, skip_serializing_if = "std::ops::Not::not")]
984    pub retracted: bool,
985    /// v0.51: Read-side access tier. See `FindingBundle.access_tier`
986    /// for the doctrine. Search paths can be especially sensitive
987    /// when they document protocols that capability-gate downstream;
988    /// the substrate accommodates this through the same tier
989    /// machinery as findings and nulls.
990    #[serde(default, skip_serializing_if = "is_public_tier")]
991    pub access_tier: crate::access_tier::AccessTier,
992}
993
994impl Trajectory {
995    /// Compute the content-addressed trajectory id per v0.50 spec:
996    /// `SHA-256(target_findings.join(",") | deposited_by | created)`.
997    /// Returns first 16 hex chars prefixed with "vtr_".
998    ///
999    /// Note: `target_findings` is sorted before concatenation so the
1000    /// id is stable under permutations of an unordered input set.
1001    /// Steps are NOT folded into the id — they're append-only and
1002    /// would otherwise force a new id every append.
1003    pub fn content_address(
1004        target_findings: &[String],
1005        deposited_by: &str,
1006        created: &str,
1007    ) -> String {
1008        let mut sorted: Vec<&str> = target_findings.iter().map(String::as_str).collect();
1009        sorted.sort();
1010        let preimage = format!("{}|{}|{}", sorted.join(","), deposited_by, created);
1011        let hash = Sha256::digest(preimage.as_bytes());
1012        format!("vtr_{}", &hex::encode(hash)[..16])
1013    }
1014
1015    /// Construct a new Trajectory with a freshly-derived id and
1016    /// empty steps. Steps are appended via
1017    /// `trajectory.step_appended` events through the reducer.
1018    pub fn new(
1019        target_findings: Vec<String>,
1020        deposited_by: impl Into<String>,
1021        notes: impl Into<String>,
1022    ) -> Self {
1023        let depositor = deposited_by.into();
1024        let created = Utc::now().to_rfc3339();
1025        let id = Self::content_address(&target_findings, &depositor, &created);
1026        Self {
1027            id,
1028            target_findings,
1029            deposited_by: depositor,
1030            created,
1031            steps: Vec::new(),
1032            notes: notes.into(),
1033            review_state: None,
1034            retracted: false,
1035            access_tier: crate::access_tier::AccessTier::Public,
1036        }
1037    }
1038}
1039
1040/// v0.33: Dataset as a first-class kernel object.
1041///
1042/// A `Dataset` is a versioned, content-addressed reference to data
1043/// that anchors empirical claims. Before v0.33, datasets were strings
1044/// in `Provenance.title` or entity-typed mentions in assertions —
1045/// a claim could say "we used ADNI" without anchoring which release
1046/// of ADNI the analysis ran against, and re-running the same code on
1047/// a refreshed cohort silently produced a "different" claim.
1048///
1049/// `vd_<id>` is content-addressed over `name + version + content_hash
1050/// + url`. Two dataset records with the same name but different
1051/// versions get distinct ids; two records pointing at the same
1052/// snapshot collapse to the same id. Claims can reference the exact
1053/// bytes they rest on, not only a dataset name in prose.
1054#[derive(Debug, Clone, Serialize, Deserialize)]
1055pub struct Dataset {
1056    /// `vd_<16hex>`, content-addressed; see `Dataset::content_address`.
1057    pub id: String,
1058    /// Human-readable name (e.g. "ADNI", "TRAILBLAZER-ALZ", "MIMIC-IV").
1059    pub name: String,
1060    /// Semantic version or release tag (e.g. "ADNI-3", "v2.2", "SR0").
1061    /// Two entries differing only in version are distinct kernel objects.
1062    pub version: Option<String>,
1063    /// Optional column-level schema as `(name, type)` pairs. For
1064    /// non-tabular datasets, leave empty.
1065    #[serde(default)]
1066    pub schema: Vec<(String, String)>,
1067    /// Number of rows / observations / records, when known.
1068    pub row_count: Option<u64>,
1069    /// SHA-256 of the canonical contents, when computable. For
1070    /// large datasets stored remotely, this is the publisher's
1071    /// declared content hash; integrity verification is the puller's
1072    /// job (same pattern as `vfr_*` snapshots).
1073    pub content_hash: String,
1074    /// Where the dataset is reachable (https URL, file://, s3://, etc.).
1075    pub url: Option<String>,
1076    /// License identifier or URL (e.g. "CC-BY-4.0", a Crossref license).
1077    pub license: Option<String>,
1078    /// Provenance of the dataset itself — typically the paper or release
1079    /// that publishes it. Reuses `Provenance` for shape parity with
1080    /// findings.
1081    pub provenance: Provenance,
1082    /// RFC 3339 creation timestamp.
1083    pub created: String,
1084}
1085
1086impl Dataset {
1087    /// Compute the content-addressed ID per v0.33 spec:
1088    /// `SHA-256(name | version | content_hash | url)`.
1089    /// Returns first 16 hex chars prefixed with "vd_".
1090    pub fn content_address(
1091        name: &str,
1092        version: Option<&str>,
1093        content_hash: &str,
1094        url: Option<&str>,
1095    ) -> String {
1096        let preimage = format!(
1097            "{}|{}|{}|{}",
1098            name,
1099            version.unwrap_or(""),
1100            content_hash,
1101            url.unwrap_or("")
1102        );
1103        let hash = Sha256::digest(preimage.as_bytes());
1104        format!("vd_{}", &hex::encode(hash)[..16])
1105    }
1106
1107    /// Construct a new Dataset with a freshly-derived id and `created`
1108    /// timestamp set to now.
1109    pub fn new(
1110        name: impl Into<String>,
1111        version: Option<String>,
1112        content_hash: impl Into<String>,
1113        url: Option<String>,
1114        license: Option<String>,
1115        provenance: Provenance,
1116    ) -> Self {
1117        let n = name.into();
1118        let h = content_hash.into();
1119        let id = Self::content_address(&n, version.as_deref(), &h, url.as_deref());
1120        Self {
1121            id,
1122            name: n,
1123            version,
1124            schema: Vec::new(),
1125            row_count: None,
1126            content_hash: h,
1127            url,
1128            license,
1129            provenance,
1130            created: Utc::now().to_rfc3339(),
1131        }
1132    }
1133}
1134
1135/// v0.33: CodeArtifact as a first-class kernel object.
1136///
1137/// A `CodeArtifact` is a content-addressed pointer at a specific
1138/// region of source code (a function, a notebook cell, a script, a
1139/// pipeline step) at a specific git commit. Before v0.33, code was
1140/// captured as a string in `Evidence.method` — "we ran a logistic
1141/// regression" — with no way for a reader to verify which code
1142/// produced the result, or to re-run it.
1143///
1144/// `vc_<id>` is content-addressed over `repo_url + git_commit + path
1145/// + line_range + content_hash`. The same code at two commits gets
1146/// two records (the relevant historical fact); the same code in two
1147/// paths in the same repo also gets two records (location matters
1148/// for re-execution).
1149#[derive(Debug, Clone, Serialize, Deserialize)]
1150pub struct CodeArtifact {
1151    /// `vc_<16hex>`, content-addressed; see `CodeArtifact::content_address`.
1152    pub id: String,
1153    /// Source language: `python` / `r` / `julia` / `rust` / `bash`,
1154    /// etc. Not validated against a closed allow-list — code provenance
1155    /// should accept whatever language the analysis was actually in.
1156    pub language: String,
1157    /// Repository URL (e.g. `https://github.com/vela-science/vela`).
1158    pub repo_url: Option<String>,
1159    /// Specific git commit (40-char SHA preferred). Required for
1160    /// reproducibility; `None` means "unpinned" and weakens the
1161    /// substrate claim.
1162    pub git_commit: Option<String>,
1163    /// Path within the repository (e.g. `crates/vela-scientist/src/notes.rs`).
1164    pub path: String,
1165    /// Optional line range as `(start, end)`, both inclusive.
1166    pub line_range: Option<(u32, u32)>,
1167    /// SHA-256 of the snippet body. Decouples the artifact from the
1168    /// repository's external state — even if a repo is deleted, the
1169    /// content_hash remains anchored.
1170    pub content_hash: String,
1171    /// Optional entry point: function name, notebook cell id, or
1172    /// `__main__`. Used by re-execution tooling.
1173    pub entry_point: Option<String>,
1174    /// RFC 3339 creation timestamp.
1175    pub created: String,
1176}
1177
1178impl CodeArtifact {
1179    /// Compute the content-addressed ID per v0.33 spec:
1180    /// `SHA-256(repo_url | git_commit | path | line_range | content_hash)`.
1181    /// Returns first 16 hex chars prefixed with "vc_".
1182    pub fn content_address(
1183        repo_url: Option<&str>,
1184        git_commit: Option<&str>,
1185        path: &str,
1186        line_range: Option<(u32, u32)>,
1187        content_hash: &str,
1188    ) -> String {
1189        let lr = line_range
1190            .map(|(a, b)| format!("{a}-{b}"))
1191            .unwrap_or_default();
1192        let preimage = format!(
1193            "{}|{}|{}|{}|{}",
1194            repo_url.unwrap_or(""),
1195            git_commit.unwrap_or(""),
1196            path,
1197            lr,
1198            content_hash
1199        );
1200        let hash = Sha256::digest(preimage.as_bytes());
1201        format!("vc_{}", &hex::encode(hash)[..16])
1202    }
1203
1204    /// Construct a new CodeArtifact with a freshly-derived id and
1205    /// `created` timestamp.
1206    pub fn new(
1207        language: impl Into<String>,
1208        repo_url: Option<String>,
1209        git_commit: Option<String>,
1210        path: impl Into<String>,
1211        line_range: Option<(u32, u32)>,
1212        content_hash: impl Into<String>,
1213        entry_point: Option<String>,
1214    ) -> Self {
1215        let p = path.into();
1216        let h = content_hash.into();
1217        let id = Self::content_address(
1218            repo_url.as_deref(),
1219            git_commit.as_deref(),
1220            &p,
1221            line_range,
1222            &h,
1223        );
1224        Self {
1225            id,
1226            language: language.into(),
1227            repo_url,
1228            git_commit,
1229            path: p,
1230            line_range,
1231            content_hash: h,
1232            entry_point,
1233            created: Utc::now().to_rfc3339(),
1234        }
1235    }
1236}
1237
1238/// Generic content-addressed artifact.
1239///
1240/// This is the common substrate object for records and files that are not
1241/// only papers: trial registry snapshots, protocols, supplements, notebooks,
1242/// tables, figures, model outputs, lab files, and dataset manifests. Typed
1243/// objects such as `Dataset` and `CodeArtifact` still exist because they
1244/// carry stronger domain-specific fields. `Artifact` gives every byte or
1245/// pointer the same minimum durability contract.
1246#[derive(Debug, Clone, Serialize, Deserialize)]
1247pub struct Artifact {
1248    /// `va_<16hex>`, content-addressed over kind, name, hash, source, and
1249    /// locator.
1250    pub id: String,
1251    /// One of `VALID_ARTIFACT_KINDS`.
1252    pub kind: String,
1253    /// Human-readable label.
1254    pub name: String,
1255    /// SHA-256 commitment. Convention: `sha256:<64hex>`.
1256    pub content_hash: String,
1257    /// Byte count when known.
1258    #[serde(default, skip_serializing_if = "Option::is_none")]
1259    pub size_bytes: Option<u64>,
1260    /// MIME type or close equivalent.
1261    #[serde(default, skip_serializing_if = "Option::is_none")]
1262    pub media_type: Option<String>,
1263    /// `local_blob`, `local_file`, `remote`, or `pointer`.
1264    pub storage_mode: String,
1265    /// Local relative path, file path, HTTPS URL, S3 URL, or registry locator.
1266    #[serde(default, skip_serializing_if = "Option::is_none")]
1267    pub locator: Option<String>,
1268    /// Original upstream URL or accession, distinct from a mirrored blob path.
1269    #[serde(default, skip_serializing_if = "Option::is_none")]
1270    pub source_url: Option<String>,
1271    /// License identifier, URL, or access terms note.
1272    #[serde(default, skip_serializing_if = "Option::is_none")]
1273    pub license: Option<String>,
1274    /// Findings this artifact directly bears on.
1275    #[serde(default, skip_serializing_if = "Vec::is_empty")]
1276    pub target_findings: Vec<String>,
1277    /// Pointer to the source record that described this artifact, if one
1278    /// already exists in `sources`.
1279    #[serde(default, skip_serializing_if = "Option::is_none")]
1280    pub source_id: Option<String>,
1281    /// Artifact-level provenance. The source record may be a registry,
1282    /// repository, dataset portal, protocol page, or paper.
1283    pub provenance: Provenance,
1284    /// Structured adapter metadata such as NCT id, outcomes, accession ids,
1285    /// version tags, or retrieval timestamps.
1286    #[serde(default, skip_serializing_if = "BTreeMap::is_empty")]
1287    pub metadata: BTreeMap<String, Value>,
1288    /// Review lifecycle for the artifact itself.
1289    #[serde(default, skip_serializing_if = "Option::is_none")]
1290    pub review_state: Option<ReviewState>,
1291    #[serde(default)]
1292    pub retracted: bool,
1293    #[serde(default)]
1294    pub access_tier: crate::access_tier::AccessTier,
1295    /// RFC 3339 creation timestamp.
1296    pub created: String,
1297}
1298
1299impl Artifact {
1300    pub fn content_address(
1301        kind: &str,
1302        name: &str,
1303        content_hash: &str,
1304        source_url: Option<&str>,
1305        locator: Option<&str>,
1306    ) -> String {
1307        let preimage = format!(
1308            "{}|{}|{}|{}|{}",
1309            kind,
1310            name,
1311            content_hash,
1312            source_url.unwrap_or(""),
1313            locator.unwrap_or("")
1314        );
1315        let hash = Sha256::digest(preimage.as_bytes());
1316        format!("va_{}", &hex::encode(hash)[..16])
1317    }
1318
1319    #[allow(clippy::too_many_arguments)]
1320    pub fn new(
1321        kind: impl Into<String>,
1322        name: impl Into<String>,
1323        content_hash: impl Into<String>,
1324        size_bytes: Option<u64>,
1325        media_type: Option<String>,
1326        storage_mode: impl Into<String>,
1327        locator: Option<String>,
1328        source_url: Option<String>,
1329        license: Option<String>,
1330        target_findings: Vec<String>,
1331        provenance: Provenance,
1332        metadata: BTreeMap<String, Value>,
1333        access_tier: crate::access_tier::AccessTier,
1334    ) -> Result<Self, String> {
1335        let kind = kind.into();
1336        if !valid_artifact_kind(&kind) {
1337            return Err(format!(
1338                "artifact kind '{kind}' is not supported; valid: {}",
1339                VALID_ARTIFACT_KINDS.join(", ")
1340            ));
1341        }
1342        let name = name.into();
1343        if name.trim().is_empty() {
1344            return Err("artifact name must be non-empty".to_string());
1345        }
1346        let content_hash = normalize_sha256(content_hash.into())?;
1347        let storage_mode = storage_mode.into();
1348        if !matches!(
1349            storage_mode.as_str(),
1350            "local_blob" | "local_file" | "remote" | "pointer"
1351        ) {
1352            return Err(format!(
1353                "artifact storage_mode '{storage_mode}' is not supported; valid: local_blob, local_file, remote, pointer"
1354            ));
1355        }
1356        let id = Self::content_address(
1357            &kind,
1358            &name,
1359            &content_hash,
1360            source_url.as_deref(),
1361            locator.as_deref(),
1362        );
1363        Ok(Self {
1364            id,
1365            kind,
1366            name,
1367            content_hash,
1368            size_bytes,
1369            media_type,
1370            storage_mode,
1371            locator,
1372            source_url,
1373            license,
1374            target_findings,
1375            source_id: None,
1376            provenance,
1377            metadata,
1378            review_state: None,
1379            retracted: false,
1380            access_tier,
1381            created: Utc::now().to_rfc3339(),
1382        })
1383    }
1384}
1385
1386fn normalize_sha256(value: String) -> Result<String, String> {
1387    let trimmed = value.trim();
1388    let hex = trimmed.strip_prefix("sha256:").unwrap_or(trimmed);
1389    if hex.len() != 64 || !hex.chars().all(|c| c.is_ascii_hexdigit()) {
1390        return Err(format!(
1391            "content_hash must be sha256:<64hex> or 64 hex chars, got {trimmed:?}"
1392        ));
1393    }
1394    Ok(format!("sha256:{}", hex.to_ascii_lowercase()))
1395}
1396
1397#[derive(Debug, Clone, Serialize, Deserialize)]
1398pub struct Conditions {
1399    #[serde(default)]
1400    pub text: String,
1401    #[serde(default)]
1402    pub species_verified: Vec<String>,
1403    #[serde(default)]
1404    pub species_unverified: Vec<String>,
1405    #[serde(default)]
1406    pub in_vitro: bool,
1407    #[serde(default)]
1408    pub in_vivo: bool,
1409    #[serde(default)]
1410    pub human_data: bool,
1411    #[serde(default)]
1412    pub clinical_trial: bool,
1413    pub concentration_range: Option<String>,
1414    pub duration: Option<String>,
1415    pub age_group: Option<String>,
1416    pub cell_type: Option<String>,
1417}
1418
1419/// Structured breakdown of frontier epistemic confidence (v0.2.0).
1420#[derive(Debug, Clone, Serialize, Deserialize)]
1421pub struct ConfidenceComponents {
1422    /// Derived from evidence.type (meta_analysis=0.95, systematic_review=0.90,
1423    /// experimental=0.80, observational=0.65, computational=0.55, case_report=0.40,
1424    /// theoretical=0.30).
1425    #[serde(alias = "evidence_grade")]
1426    pub evidence_strength: f64,
1427    /// 1.0 if replicated with high count, 0.7 if not replicated.
1428    /// When replicated: min(1.0, 0.7 + 0.1 * replication_count).
1429    #[serde(alias = "replication_factor")]
1430    pub replication_strength: f64,
1431    /// Derived from sample_size: >1000 -> 1.0, >100 -> 0.9, >30 -> 0.8,
1432    /// >10 -> 0.7, <=10 or null -> 0.6.
1433    pub sample_strength: f64,
1434    /// human_data=1.0, in_vivo=0.8, in_vitro=0.6, else=0.5.
1435    #[serde(alias = "species_relevance")]
1436    pub model_relevance: f64,
1437    /// Reduces score when finding is contested. 0.15 if contested, else 0.0.
1438    #[serde(alias = "contradiction_penalty")]
1439    pub review_penalty: f64,
1440    /// Additive calibration signal layered on top of the deterministic support score.
1441    #[serde(default)]
1442    pub calibration_adjustment: f64,
1443    /// v0.38.1: causal-claim × evidence-grade compatibility multiplier.
1444    /// Defaults to 1.0 — neutral — when either field is `None` (the
1445    /// pre-v0.38 case). RCT bumps any claim slightly; an observational-
1446    /// grade *intervention* claim gets a meaningful penalty (the
1447    /// design doesn't actually support the claim being made).
1448    #[serde(default = "default_causal_consistency")]
1449    pub causal_consistency: f64,
1450    /// Confidence formula version stamp. v0.3 introduced this; v0.4
1451    /// bumps it to "v0.4" for the same scoring formula recomputed
1452    /// against substrate-level changes (genesis events, signed actors,
1453    /// canonical/derived split — none of which alter scoring math).
1454    /// v0.38.1 bumps to "v0.7" for the addition of `causal_consistency`.
1455    /// A second implementation may refuse to interpret components
1456    /// computed with an unknown formula version.
1457    #[serde(default = "default_formula_version")]
1458    pub formula_version: String,
1459}
1460
1461fn default_causal_consistency() -> f64 {
1462    1.0
1463}
1464
1465fn default_formula_version() -> String {
1466    "v0.8".to_string()
1467}
1468
1469/// Confidence method: how the score was determined.
1470#[derive(Debug, Clone, Serialize, Deserialize, PartialEq, Eq)]
1471#[serde(rename_all = "snake_case")]
1472#[derive(Default)]
1473pub enum ConfidenceMethod {
1474    /// Computed from structured frontier support components (v0.2.0).
1475    Computed,
1476    /// A human expert assigned it.
1477    ExpertJudgment,
1478    /// Legacy import path for confidence seeded before component breakdown existed.
1479    #[default]
1480    LlmInitial,
1481}
1482
1483/// Semantic category of the confidence score stored on the frontier.
1484#[derive(Debug, Clone, Serialize, Deserialize, PartialEq, Eq, Default)]
1485#[serde(rename_all = "snake_case")]
1486pub enum ConfidenceKind {
1487    /// Bounded epistemic support for the finding as currently represented in frontier state.
1488    #[default]
1489    FrontierEpistemic,
1490}
1491
1492#[derive(Debug, Clone, Serialize, Deserialize)]
1493pub struct Confidence {
1494    /// Semantic meaning of `score`. v0 emits `frontier_epistemic`.
1495    #[serde(default)]
1496    pub kind: ConfidenceKind,
1497    pub score: f64,
1498    pub basis: String,
1499    /// How this score was determined.
1500    #[serde(default)]
1501    pub method: ConfidenceMethod,
1502    /// Structured component breakdown required by the current schema.
1503    #[serde(default, skip_serializing_if = "Option::is_none")]
1504    pub components: Option<ConfidenceComponents>,
1505    /// Confidence in the extraction itself (separate from scientific confidence).
1506    #[serde(default = "default_extraction_conf")]
1507    pub extraction_confidence: f64,
1508}
1509
1510fn default_extraction_conf() -> f64 {
1511    0.85
1512}
1513
1514impl Confidence {
1515    /// Construct a `Confidence` with a raw score and basis string but
1516    /// without the structured `components` breakdown. The agent layer
1517    /// uses this when an LLM produces a single confidence value; the
1518    /// computed deterministic components arrive later via
1519    /// `compute_confidence`.
1520    ///
1521    /// Renamed from `legacy()` in v0.36; the previous name was a
1522    /// historical accident (the constructor was never actually
1523    /// deprecated, just misnamed when the structured components shipped
1524    /// alongside it).
1525    pub fn raw(score: f64, basis: impl Into<String>, extraction_confidence: f64) -> Self {
1526        Self {
1527            kind: ConfidenceKind::FrontierEpistemic,
1528            score,
1529            basis: basis.into(),
1530            method: ConfidenceMethod::LlmInitial,
1531            components: None,
1532            extraction_confidence,
1533        }
1534    }
1535}
1536
1537/// Parse a sample_size string into a numeric value for scoring.
1538/// Handles formats like "n=30", "n = 120", "3 cohorts of 20", "500", "n=24 per group".
1539fn parse_sample_size(s: &str) -> Option<u64> {
1540    let mut max_num: Option<u64> = None;
1541    for word in s.split(|c: char| !c.is_ascii_digit()) {
1542        if let Ok(n) = word.parse::<u64>() {
1543            max_num = Some(max_num.map_or(n, |prev: u64| prev.max(n)));
1544        }
1545    }
1546    max_num
1547}
1548
1549/// Compute frontier epistemic confidence from evidence and condition fields.
1550/// Returns a fully populated Confidence with components and aggregate score,
1551/// using a deterministic, auditable support computation.
1552///
1553/// Back-compat wrapper: derives `n_replicated` from the legacy
1554/// `Evidence.replicated` / `Evidence.replication_count` scalars, with
1555/// `n_failed` and `n_partial` defaulting to zero. Use
1556/// `Project::compute_confidence_for` when the v0.32 `Replication`
1557/// collection is available — that's the authoritative path.
1558pub fn compute_confidence(
1559    evidence: &Evidence,
1560    conditions: &Conditions,
1561    contested: bool,
1562) -> Confidence {
1563    let n_replicated = if evidence.replicated {
1564        evidence.replication_count.unwrap_or(1)
1565    } else {
1566        0
1567    };
1568    compute_confidence_from_components(
1569        evidence,
1570        conditions,
1571        contested,
1572        n_replicated,
1573        0,
1574        0,
1575        None,
1576        None,
1577    )
1578}
1579
1580/// v0.38.1: causal-claim × evidence-grade compatibility multiplier.
1581/// An RCT supports any claim slightly better than baseline; an
1582/// observational study weakly supports correlation; an *intervention*
1583/// claim from observational data gets a meaningful penalty (the
1584/// design doesn't actually identify the causal effect being claimed).
1585/// Returns `1.0` when either field is `None` — the pre-v0.38 case is
1586/// neutral.
1587#[must_use]
1588pub fn causal_consistency_multiplier(
1589    claim: Option<CausalClaim>,
1590    grade: Option<CausalEvidenceGrade>,
1591) -> f64 {
1592    use CausalClaim::*;
1593    use CausalEvidenceGrade::*;
1594    let (Some(c), Some(g)) = (claim, grade) else {
1595        return 1.0;
1596    };
1597    match (c, g) {
1598        // RCT: gold standard. Slight bump for any claim it supports.
1599        (_, Rct) => 1.10,
1600        // Correlation: any reasonable design supports it.
1601        (Correlation, _) => 1.0,
1602        // Mediation: needs design that handles confounders.
1603        (Mediation, QuasiExperimental) => 1.05,
1604        (Mediation, Observational) => 0.85,
1605        (Mediation, Theoretical) => 0.90,
1606        // Intervention: the strongest claim. Without RCT or strong
1607        // QE, the design under-supports the assertion.
1608        (Intervention, QuasiExperimental) => 0.90,
1609        (Intervention, Observational) => 0.65,
1610        (Intervention, Theoretical) => 0.75,
1611    }
1612}
1613
1614/// Pure-math kernel for the frontier-epistemic confidence formula. Takes
1615/// replication counts and (v0.38.1) the optional causal typing as inputs
1616/// so the same math drives both the legacy scalar path
1617/// (`compute_confidence`) and the v0.32+ Project-aware path
1618/// (`Project::compute_confidence_for`).
1619///
1620/// Replication strength schedule:
1621/// `clamp(0.7 + 0.1 * n_replicated + 0.05 * n_partial - 0.10 * n_failed, 0.4, 1.0)`
1622///
1623/// Floor at 0.4 keeps a single failed replication from zeroing out the
1624/// computation; ceiling at 1.0 caps the bonus from accumulated successes.
1625/// `inconclusive` outcomes do not move the score (deliberate — they
1626/// represent methodological ambiguity, not evidence).
1627///
1628/// v0.38.1: a `causal_consistency` factor multiplies the support
1629/// product. `None` for either field is neutral (pre-v0.38 frontiers
1630/// behave identically). See `causal_consistency_multiplier`.
1631#[must_use]
1632pub fn compute_confidence_from_components(
1633    evidence: &Evidence,
1634    conditions: &Conditions,
1635    contested: bool,
1636    n_replicated: u32,
1637    n_failed: u32,
1638    n_partial: u32,
1639    causal_claim: Option<CausalClaim>,
1640    causal_evidence_grade: Option<CausalEvidenceGrade>,
1641) -> Confidence {
1642    let evidence_strength = match evidence.evidence_type.as_str() {
1643        "meta_analysis" => 0.95,
1644        "systematic_review" => 0.90,
1645        "experimental" => 0.80,
1646        "observational" => 0.65,
1647        "computational" => 0.55,
1648        "case_report" => 0.40,
1649        "theoretical" => 0.30,
1650        _ => 0.50,
1651    };
1652
1653    let replication_strength = (0.7 + 0.1 * f64::from(n_replicated) + 0.05 * f64::from(n_partial)
1654        - 0.10 * f64::from(n_failed))
1655    .clamp(0.4, 1.0);
1656
1657    let sample_strength = match evidence.sample_size.as_deref().and_then(parse_sample_size) {
1658        Some(n) if n > 1000 => 1.0,
1659        Some(n) if n > 100 => 0.9,
1660        Some(n) if n > 30 => 0.8,
1661        Some(n) if n > 10 => 0.7,
1662        Some(_) => 0.6,
1663        None => 0.6,
1664    };
1665
1666    let model_relevance = if conditions.human_data {
1667        1.0
1668    } else if conditions.in_vivo {
1669        0.8
1670    } else if conditions.in_vitro {
1671        0.6
1672    } else {
1673        0.5
1674    };
1675
1676    let review_penalty = if contested { 0.15 } else { 0.0 };
1677    let calibration_adjustment = 0.0;
1678    let causal_consistency = causal_consistency_multiplier(causal_claim, causal_evidence_grade);
1679
1680    let raw = evidence_strength
1681        * replication_strength
1682        * model_relevance
1683        * sample_strength
1684        * causal_consistency
1685        - review_penalty
1686        + calibration_adjustment;
1687    let score = raw.clamp(0.0, 1.0);
1688    let score = (score * 1000.0).round() / 1000.0;
1689
1690    let components = ConfidenceComponents {
1691        evidence_strength,
1692        replication_strength,
1693        sample_strength,
1694        model_relevance,
1695        review_penalty,
1696        calibration_adjustment,
1697        causal_consistency,
1698        formula_version: "v0.7".to_string(),
1699    };
1700
1701    let basis = format!(
1702        "frontier_epistemic: evidence={:.2} * replication={:.2} * model={:.2} * sample={:.2} * causal={:.2} - review_penalty={:.2} + calibration={:.2} = {:.3}",
1703        evidence_strength,
1704        replication_strength,
1705        model_relevance,
1706        sample_strength,
1707        causal_consistency,
1708        review_penalty,
1709        calibration_adjustment,
1710        score,
1711    );
1712
1713    Confidence {
1714        kind: ConfidenceKind::FrontierEpistemic,
1715        score,
1716        basis,
1717        method: ConfidenceMethod::Computed,
1718        components: Some(components),
1719        extraction_confidence: default_extraction_conf(),
1720    }
1721}
1722
1723/// Count v0.32 replication outcomes targeting a given finding id.
1724/// Returns `(n_replicated, n_failed, n_partial)`. Inconclusive outcomes
1725/// are deliberately excluded — they represent methodological ambiguity
1726/// and don't move the confidence score.
1727#[must_use]
1728pub fn count_replication_outcomes(
1729    replications: &[Replication],
1730    target_finding: &str,
1731) -> (u32, u32, u32) {
1732    let mut n_replicated = 0u32;
1733    let mut n_failed = 0u32;
1734    let mut n_partial = 0u32;
1735    for r in replications {
1736        if r.target_finding != target_finding {
1737            continue;
1738        }
1739        match r.outcome.as_str() {
1740            "replicated" => n_replicated += 1,
1741            "failed" => n_failed += 1,
1742            "partial" => n_partial += 1,
1743            _ => {}
1744        }
1745    }
1746    (n_replicated, n_failed, n_partial)
1747}
1748
1749/// Recompute confidence scores for all findings in a slice using the
1750/// v0.32 `Replication` collection as the source of truth. Returns the
1751/// number of findings whose score changed by more than 0.001.
1752///
1753/// When `replications` is empty (e.g., legacy frontiers pre-v0.32), the
1754/// math falls back through `compute_confidence_from_components` with
1755/// counts derived from the scalar `Evidence.replicated` /
1756/// `Evidence.replication_count` fields, preserving prior behavior.
1757pub fn recompute_all_confidence(
1758    findings: &mut [FindingBundle],
1759    replications: &[Replication],
1760) -> usize {
1761    let mut changed = 0;
1762    for bundle in findings.iter_mut() {
1763        let old_score = bundle.confidence.score;
1764        let extraction_conf = bundle.confidence.extraction_confidence;
1765        let (n_repl, n_failed, n_partial) = count_replication_outcomes(replications, &bundle.id);
1766        // If the v0.32 collection has nothing for this finding, fall back
1767        // to the legacy scalar so unmigrated frontiers keep their prior
1768        // computed confidence.
1769        let (n_repl, n_failed, n_partial) = if n_repl + n_failed + n_partial == 0 {
1770            let legacy = if bundle.evidence.replicated {
1771                bundle.evidence.replication_count.unwrap_or(1)
1772            } else {
1773                0
1774            };
1775            (legacy, 0, 0)
1776        } else {
1777            (n_repl, n_failed, n_partial)
1778        };
1779        let mut new_conf = compute_confidence_from_components(
1780            &bundle.evidence,
1781            &bundle.conditions,
1782            bundle.flags.contested,
1783            n_repl,
1784            n_failed,
1785            n_partial,
1786            bundle.assertion.causal_claim,
1787            bundle.assertion.causal_evidence_grade,
1788        );
1789        // Preserve the extraction confidence from the original extraction.
1790        new_conf.extraction_confidence = extraction_conf;
1791        if (new_conf.score - old_score).abs() > 0.001 {
1792            changed += 1;
1793        }
1794        bundle.confidence = new_conf;
1795    }
1796    changed
1797}
1798
1799#[derive(Debug, Clone, Default, Serialize, Deserialize)]
1800pub struct Extraction {
1801    #[serde(default = "default_extraction_method")]
1802    pub method: String,
1803    pub model: Option<String>,
1804    pub model_version: Option<String>,
1805    #[serde(default)]
1806    pub extracted_at: String,
1807    #[serde(default = "default_extractor_version")]
1808    pub extractor_version: String,
1809}
1810
1811fn default_extraction_method() -> String {
1812    "llm_extraction".into()
1813}
1814fn default_extractor_version() -> String {
1815    "vela/0.2.0".into()
1816}
1817
1818#[derive(Debug, Clone, Serialize, Deserialize)]
1819pub struct Review {
1820    #[serde(default)]
1821    pub reviewed: bool,
1822    pub reviewer: Option<String>,
1823    pub reviewed_at: Option<String>,
1824    #[serde(default)]
1825    pub corrections: Vec<serde_json::Value>,
1826}
1827
1828#[derive(Debug, Clone, Serialize, Deserialize)]
1829pub struct Author {
1830    pub name: String,
1831    pub orcid: Option<String>,
1832}
1833
1834#[derive(Debug, Clone, Serialize, Deserialize)]
1835pub struct Provenance {
1836    #[serde(default = "default_source_type")]
1837    pub source_type: String,
1838    pub doi: Option<String>,
1839    pub pmid: Option<String>,
1840    pub pmc: Option<String>,
1841    pub openalex_id: Option<String>,
1842    /// v0.11: generic source URL when none of the structured identifiers
1843    /// fit (preprint server URL, dataset landing page, talk recording, etc.).
1844    /// Skipped when None so pre-v0.11 frontiers serialise byte-identically.
1845    #[serde(default, skip_serializing_if = "Option::is_none")]
1846    pub url: Option<String>,
1847    #[serde(default)]
1848    pub title: String,
1849    #[serde(default)]
1850    pub authors: Vec<Author>,
1851    pub year: Option<i32>,
1852    pub journal: Option<String>,
1853    /// License URL (e.g., Creative Commons), typically from Crossref.
1854    #[serde(default, skip_serializing_if = "Option::is_none")]
1855    pub license: Option<String>,
1856    /// Publisher name, typically from Crossref.
1857    #[serde(default, skip_serializing_if = "Option::is_none")]
1858    pub publisher: Option<String>,
1859    /// Funding sources, typically from Crossref.
1860    #[serde(default, skip_serializing_if = "Vec::is_empty")]
1861    pub funders: Vec<String>,
1862    #[serde(default)]
1863    pub extraction: Extraction,
1864    pub review: Option<Review>,
1865    /// Citation count of the source paper (from OpenAlex).
1866    #[serde(default)]
1867    pub citation_count: Option<u64>,
1868}
1869
1870fn default_source_type() -> String {
1871    "published_paper".into()
1872}
1873
1874/// Typed review state. Replaces the v0.2 `flags.contested: bool` collapse
1875/// of three semantically distinct review judgments. Doctrine line 6:
1876/// "scientific disagreement should remain live state."
1877#[derive(Debug, Clone, PartialEq, Eq, Serialize, Deserialize)]
1878#[serde(rename_all = "snake_case")]
1879pub enum ReviewState {
1880    /// Review verdict was "accepted" or "approved" — finding stands.
1881    Accepted,
1882    /// Review verdict was "contested" — disagreement preserved as live state.
1883    Contested,
1884    /// Review verdict was "needs_revision" — finding stays but flagged for
1885    /// confidence revision or condition refinement.
1886    NeedsRevision,
1887    /// Review verdict was "rejected" — finding kept for replay history but
1888    /// not treated as active state.
1889    Rejected,
1890}
1891
1892impl ReviewState {
1893    /// Whether `flags.contested` should be true given this review_state.
1894    /// Backwards-compat shim: contested is the v0.2 derived bit.
1895    #[must_use]
1896    pub fn implies_contested(&self) -> bool {
1897        matches!(
1898            self,
1899            ReviewState::Contested | ReviewState::NeedsRevision | ReviewState::Rejected
1900        )
1901    }
1902}
1903
1904#[derive(Debug, Clone, Default, Serialize, Deserialize)]
1905pub struct Flags {
1906    #[serde(default)]
1907    pub gap: bool,
1908    #[serde(default)]
1909    pub negative_space: bool,
1910    /// Derived from `review_state` for backward compatibility. Code that
1911    /// reads `flags.contested` still works; new code should read
1912    /// `review_state` for the typed verdict.
1913    #[serde(default)]
1914    pub contested: bool,
1915    #[serde(default)]
1916    pub retracted: bool,
1917    #[serde(default)]
1918    pub declining: bool,
1919    #[serde(default)]
1920    pub gravity_well: bool,
1921    /// Typed review verdict (v0.3+). When set, drives `flags.contested`
1922    /// for backward compatibility. `None` means no review verdict has
1923    /// been recorded.
1924    #[serde(default, skip_serializing_if = "Option::is_none")]
1925    pub review_state: Option<ReviewState>,
1926    /// v0.14: true once a newer content-addressed finding supersedes
1927    /// this one via the `finding.supersede` proposal kind. The newer
1928    /// finding carries a `supersedes` link back to this finding's id.
1929    /// Skipped when false so pre-v0.14 frontiers serialize byte-identically.
1930    #[serde(default, skip_serializing_if = "std::ops::Not::not")]
1931    pub superseded: bool,
1932    /// v0.37: minimum number of unique valid signatures required for
1933    /// this finding to qualify as `jointly_accepted`. `None` (the
1934    /// default) preserves single-sig semantics — any one valid
1935    /// signature is accepted. When `Some(k)`, the finding only counts
1936    /// as joint-accepted once `k` distinct registered actors have
1937    /// each contributed a valid Ed25519 signature over the canonical
1938    /// finding bytes. Pre-v0.37 frontiers omit the field; loading is
1939    /// backward-compatible.
1940    #[serde(default, skip_serializing_if = "Option::is_none")]
1941    pub signature_threshold: Option<u32>,
1942    /// v0.37: true once at least `signature_threshold` unique actors
1943    /// have signed this finding. Set by the verify pass; not written
1944    /// directly by any other code path. Skipped when false so pre-v0.37
1945    /// frontiers serialize byte-identically.
1946    #[serde(default, skip_serializing_if = "std::ops::Not::not")]
1947    pub jointly_accepted: bool,
1948}
1949
1950/// v0.38: Pearlian causal typing for an assertion. The kernel's
1951/// pre-v0.38 record carried only `direction: Some("positive" |
1952/// "negative")` — enough to know that "X covaries with Y" but not
1953/// whether the speaker meant correlation, mediation, or intervention.
1954/// In real review work those are different epistemic claims with
1955/// different evidence requirements; conflating them produced silent
1956/// over-claiming.
1957///
1958/// This release lands the schema layer. The reasoning surface
1959/// (do-calculus, identifiability, derived bridges that propagate
1960/// causal vs correlational claims separately) ships in a follow-up.
1961/// The same staging used v0.32 (Replication as object) → v0.36.1
1962/// (Project.replications becomes the source of truth for confidence).
1963#[derive(Debug, Clone, Copy, PartialEq, Eq, Serialize, Deserialize)]
1964#[serde(rename_all = "snake_case")]
1965pub enum CausalClaim {
1966    /// "X covaries with Y" — no claim about generative direction.
1967    Correlation,
1968    /// "X mediates Y → Z" — pathway claim, weaker than intervention.
1969    Mediation,
1970    /// "Setting X=x changes Y" — Pearl's `do(X=x)`.
1971    Intervention,
1972}
1973
1974/// v0.38: study-design grade backing a causal claim.
1975/// The grade is what makes the difference between "the data is
1976/// consistent with X causing Y" (Observational) and "X causes Y"
1977/// (Rct). The kernel carries the design label so reviewers can
1978/// re-grade without re-extracting.
1979#[derive(Debug, Clone, Copy, PartialEq, Eq, Serialize, Deserialize)]
1980#[serde(rename_all = "snake_case")]
1981pub enum CausalEvidenceGrade {
1982    /// Randomized controlled trial. Strongest grade for intervention claims.
1983    Rct,
1984    /// Mendelian randomization, instrumental variables, regression
1985    /// discontinuity, natural experiments, etc.
1986    QuasiExperimental,
1987    /// Cohort, case-control, cross-sectional. Identifies association
1988    /// only without further design assumptions.
1989    Observational,
1990    /// Computational simulation, theoretical model, mathematical proof.
1991    Theoretical,
1992}
1993
1994/// Valid string forms for serialized `CausalClaim`. The kernel
1995/// validates against this on load.
1996pub const VALID_CAUSAL_CLAIMS: &[&str] = &["correlation", "mediation", "intervention"];
1997
1998/// Valid string forms for serialized `CausalEvidenceGrade`.
1999pub const VALID_CAUSAL_EVIDENCE_GRADES: &[&str] =
2000    &["rct", "quasi_experimental", "observational", "theoretical"];
2001
2002#[derive(Debug, Clone, Serialize, Deserialize)]
2003pub struct Assertion {
2004    pub text: String,
2005    #[serde(rename = "type")]
2006    pub assertion_type: String,
2007    #[serde(default)]
2008    pub entities: Vec<Entity>,
2009    pub relation: Option<String>,
2010    pub direction: Option<String>,
2011    /// v0.38: the kind of causal claim this assertion makes. `None`
2012    /// means the kernel hasn't been told yet — the legacy default for
2013    /// pre-v0.38 findings. `Some(Correlation)` is the safe minimum
2014    /// claim; `Some(Intervention)` is the strongest.
2015    #[serde(default, skip_serializing_if = "Option::is_none")]
2016    pub causal_claim: Option<CausalClaim>,
2017    /// v0.38: study-design grade backing the causal claim. Drives the
2018    /// reasoning layer's identifiability checks (deferred). Pre-v0.38
2019    /// findings omit the field; loading is backward-compatible.
2020    #[serde(default, skip_serializing_if = "Option::is_none")]
2021    pub causal_evidence_grade: Option<CausalEvidenceGrade>,
2022}
2023
2024#[derive(Debug, Clone, Serialize, Deserialize)]
2025pub struct Link {
2026    pub target: String,
2027    #[serde(rename = "type")]
2028    pub link_type: String,
2029    #[serde(default)]
2030    pub note: String,
2031    #[serde(default = "default_compiler")]
2032    pub inferred_by: String,
2033    /// When this link was created (immutable timestamp). Uses serde default for backward compat.
2034    #[serde(default)]
2035    pub created_at: String,
2036    /// v0.45: optional structural causal mechanism on a `depends` /
2037    /// `supports` edge. When present, the edge participates in
2038    /// counterfactual (Pearl level 3) queries via twin-network
2039    /// construction. Edges without a mechanism still participate in
2040    /// level 2 (back-door / front-door identification); they simply
2041    /// can't answer twin-network counterfactuals.
2042    #[serde(default, skip_serializing_if = "Option::is_none")]
2043    pub mechanism: Option<Mechanism>,
2044}
2045
2046fn default_compiler() -> String {
2047    "compiler".into()
2048}
2049
2050/// v0.45: structural causal mechanism on a directed edge.
2051///
2052/// A `Mechanism` captures *how* a parent finding determines a child's
2053/// value, not just that a dependency exists. With mechanisms in place,
2054/// the kernel can answer counterfactual (Pearl level 3) queries: "given
2055/// that we observed X under parent=p, what would X have been under
2056/// parent=p'?" via twin-network construction.
2057///
2058/// Doctrine: mechanisms are deliberately coarse. Science rarely warrants
2059/// precise functional forms; what we need is enough algebraic structure
2060/// to propagate counterfactual perturbations sign-and-magnitude. Five
2061/// shapes cover the empirical distribution of biology / clinical claims:
2062///
2063/// - `Linear { sign, slope }`: dY = slope * dX (with sign packing the
2064///   direction; slope is a unitless effect-size on the [0,1] confidence
2065///   scale).
2066/// - `Monotonic { sign }`: dY agrees with sign(dX) but magnitude is
2067///   ungraded (used when direction is known but effect-size isn't).
2068/// - `Threshold { sign, threshold }`: parent must cross `threshold` for
2069///   any child response (binary above/below).
2070/// - `Saturating { sign, half_max }`: hyperbolic / Hill-style; large dX
2071///   above `half_max` produces vanishing dY.
2072/// - `Unknown`: explicitly annotated as causally connected but
2073///   mechanism unspecified. Twin-network treats this as opaque (the
2074///   counterfactual is reported as `MechanismUnspecified`).
2075#[derive(Debug, Clone, Copy, Serialize, Deserialize, PartialEq)]
2076#[serde(tag = "kind", rename_all = "snake_case")]
2077pub enum Mechanism {
2078    Linear {
2079        sign: MechanismSign,
2080        /// Effect-size on [0, 1] confidence scale.
2081        slope: f64,
2082    },
2083    Monotonic {
2084        sign: MechanismSign,
2085    },
2086    Threshold {
2087        sign: MechanismSign,
2088        threshold: f64,
2089    },
2090    Saturating {
2091        sign: MechanismSign,
2092        half_max: f64,
2093    },
2094    Unknown,
2095}
2096
2097/// v0.45: causal direction on a `Mechanism`.
2098///
2099/// `Positive`: parent confidence ↑ ⇒ child confidence ↑.
2100/// `Negative`: parent confidence ↑ ⇒ child confidence ↓.
2101#[derive(Debug, Clone, Copy, Serialize, Deserialize, PartialEq, Eq)]
2102#[serde(rename_all = "lowercase")]
2103pub enum MechanismSign {
2104    Positive,
2105    Negative,
2106}
2107
2108impl MechanismSign {
2109    #[must_use]
2110    pub fn as_f64(self) -> f64 {
2111        match self {
2112            Self::Positive => 1.0,
2113            Self::Negative => -1.0,
2114        }
2115    }
2116}
2117
2118impl Mechanism {
2119    /// Apply this mechanism to a parent perturbation `delta_x`,
2120    /// returning the implied child perturbation `delta_y` on the
2121    /// confidence scale. Returns `None` for `Unknown`.
2122    #[must_use]
2123    pub fn apply(&self, delta_x: f64) -> Option<f64> {
2124        match *self {
2125            Self::Linear { sign, slope } => Some(sign.as_f64() * slope * delta_x),
2126            Self::Monotonic { sign } => {
2127                Some(sign.as_f64() * delta_x.signum() * delta_x.abs().min(1.0))
2128            }
2129            Self::Threshold { sign, threshold } => {
2130                if delta_x.abs() >= threshold {
2131                    Some(sign.as_f64() * delta_x.signum())
2132                } else {
2133                    Some(0.0)
2134                }
2135            }
2136            Self::Saturating { sign, half_max } => {
2137                // Hill-style: delta_y = sign * dx / (|dx| + half_max), bounded to [-1,1]
2138                let denom = delta_x.abs() + half_max.max(1e-9);
2139                Some(sign.as_f64() * delta_x / denom)
2140            }
2141            Self::Unknown => None,
2142        }
2143    }
2144}
2145
2146/// v0.8: typed reference resolved from `Link.target`.
2147///
2148/// Targets stay opaque `String` on the wire (canonical-JSON stable). At
2149/// validation/render time callers parse via `LinkRef::parse`. The
2150/// `Local` variant is the v0–v0.7 shape; `Cross` is new in v0.8 and
2151/// requires the dependent frontier to declare a matching `vfr_id` in
2152/// `frontier.dependencies`.
2153#[derive(Debug, Clone, PartialEq, Eq)]
2154pub enum LinkRef {
2155    /// `vf_<16hex>` — the target finding lives in this same frontier.
2156    Local { vf_id: String },
2157    /// `vf_<16hex>@vfr_<16hex>` — the target finding lives in a
2158    /// different frontier. Strict validation requires the `vfr_id` to
2159    /// appear in `Project.frontier.dependencies`.
2160    Cross { vf_id: String, vfr_id: String },
2161}
2162
2163#[derive(Debug, Clone, PartialEq, Eq)]
2164pub enum LinkParseError {
2165    Empty,
2166    BadVfPrefix,
2167    BadVfrPrefix,
2168    EmptyVfId,
2169    EmptyVfrId,
2170    TooManyAtSigns,
2171}
2172
2173impl std::fmt::Display for LinkParseError {
2174    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
2175        match self {
2176            LinkParseError::Empty => write!(f, "empty link target"),
2177            LinkParseError::BadVfPrefix => write!(f, "link target must start with 'vf_'"),
2178            LinkParseError::BadVfrPrefix => {
2179                write!(f, "cross-frontier suffix must start with 'vfr_'")
2180            }
2181            LinkParseError::EmptyVfId => write!(f, "link target's vf_ id is empty"),
2182            LinkParseError::EmptyVfrId => write!(f, "cross-frontier vfr_ id is empty"),
2183            LinkParseError::TooManyAtSigns => {
2184                write!(f, "link target has more than one '@' separator")
2185            }
2186        }
2187    }
2188}
2189
2190impl std::error::Error for LinkParseError {}
2191
2192impl LinkRef {
2193    /// Parse `vf_<id>` or `vf_<id>@vfr_<id>` into a typed reference.
2194    /// Treats inputs as opaque hex-ish blobs — does not validate hex
2195    /// length or character set, since the substrate's content-address
2196    /// derivation already handles that.
2197    pub fn parse(s: &str) -> Result<Self, LinkParseError> {
2198        if s.is_empty() {
2199            return Err(LinkParseError::Empty);
2200        }
2201        let mut parts = s.split('@');
2202        let local = parts.next().ok_or(LinkParseError::Empty)?;
2203        let remote = parts.next();
2204        if parts.next().is_some() {
2205            return Err(LinkParseError::TooManyAtSigns);
2206        }
2207        let vf_id = local
2208            .strip_prefix("vf_")
2209            .ok_or(LinkParseError::BadVfPrefix)?;
2210        if vf_id.is_empty() {
2211            return Err(LinkParseError::EmptyVfId);
2212        }
2213        match remote {
2214            None => Ok(LinkRef::Local {
2215                vf_id: local.to_string(),
2216            }),
2217            Some(r) => {
2218                let vfr_id = r.strip_prefix("vfr_").ok_or(LinkParseError::BadVfrPrefix)?;
2219                if vfr_id.is_empty() {
2220                    return Err(LinkParseError::EmptyVfrId);
2221                }
2222                Ok(LinkRef::Cross {
2223                    vf_id: local.to_string(),
2224                    vfr_id: r.to_string(),
2225                })
2226            }
2227        }
2228    }
2229
2230    /// Round-trip: format back to the canonical wire string.
2231    pub fn format(&self) -> String {
2232        match self {
2233            LinkRef::Local { vf_id } => vf_id.clone(),
2234            LinkRef::Cross { vf_id, vfr_id } => format!("{vf_id}@{vfr_id}"),
2235        }
2236    }
2237
2238    /// True if this reference points outside the current frontier.
2239    pub fn is_cross_frontier(&self) -> bool {
2240        matches!(self, LinkRef::Cross { .. })
2241    }
2242}
2243
2244#[cfg(test)]
2245mod link_ref_tests {
2246    use super::*;
2247
2248    #[test]
2249    fn parses_local_vf_id() {
2250        let r = LinkRef::parse("vf_abc123").unwrap();
2251        assert_eq!(
2252            r,
2253            LinkRef::Local {
2254                vf_id: "vf_abc123".into()
2255            }
2256        );
2257        assert_eq!(r.format(), "vf_abc123");
2258        assert!(!r.is_cross_frontier());
2259    }
2260
2261    #[test]
2262    fn parses_cross_frontier_target() {
2263        let r = LinkRef::parse("vf_abc@vfr_def").unwrap();
2264        assert_eq!(
2265            r,
2266            LinkRef::Cross {
2267                vf_id: "vf_abc".into(),
2268                vfr_id: "vfr_def".into(),
2269            }
2270        );
2271        assert_eq!(r.format(), "vf_abc@vfr_def");
2272        assert!(r.is_cross_frontier());
2273    }
2274
2275    #[test]
2276    fn rejects_empty() {
2277        assert_eq!(LinkRef::parse(""), Err(LinkParseError::Empty));
2278    }
2279
2280    #[test]
2281    fn rejects_missing_vf_prefix() {
2282        assert_eq!(LinkRef::parse("xx_abc"), Err(LinkParseError::BadVfPrefix));
2283    }
2284
2285    #[test]
2286    fn rejects_empty_vf_id() {
2287        assert_eq!(LinkRef::parse("vf_"), Err(LinkParseError::EmptyVfId));
2288    }
2289
2290    #[test]
2291    fn rejects_missing_vfr_prefix_after_at() {
2292        assert_eq!(
2293            LinkRef::parse("vf_abc@xxx_def"),
2294            Err(LinkParseError::BadVfrPrefix)
2295        );
2296    }
2297
2298    #[test]
2299    fn rejects_empty_vfr_id() {
2300        assert_eq!(
2301            LinkRef::parse("vf_abc@vfr_"),
2302            Err(LinkParseError::EmptyVfrId)
2303        );
2304    }
2305
2306    #[test]
2307    fn rejects_double_at() {
2308        assert_eq!(
2309            LinkRef::parse("vf_abc@vfr_def@x"),
2310            Err(LinkParseError::TooManyAtSigns)
2311        );
2312    }
2313
2314    #[test]
2315    fn round_trips_real_ids() {
2316        for s in [
2317            "vf_d0a962d3251133dd",
2318            "vf_d0a962d3251133dd@vfr_7344e96c0f2669d5",
2319        ] {
2320            assert_eq!(LinkRef::parse(s).unwrap().format(), s);
2321        }
2322    }
2323}
2324
2325/// A lightweight annotation on a finding — like a comment on a line of code.
2326#[derive(Debug, Clone, Serialize, Deserialize)]
2327pub struct Annotation {
2328    /// Content-addressed ID (ann_{hash}).
2329    pub id: String,
2330    /// The annotation text.
2331    pub text: String,
2332    /// Who wrote it (ORCID preferred).
2333    pub author: String,
2334    /// When it was created (RFC 3339).
2335    pub timestamp: String,
2336    /// Phase β (v0.6): structured provenance for the annotation.
2337    /// Optional. When present, encodes which paper / preprint / extract
2338    /// span produced this note. Reviewers query by these fields:
2339    /// "show every annotation from PMID 25378646" works because the
2340    /// identifier is structure, not prose.
2341    #[serde(default, skip_serializing_if = "Option::is_none")]
2342    pub provenance: Option<ProvenanceRef>,
2343}
2344
2345/// Phase β (v0.6): structured provenance reference attached to an
2346/// annotation (or any future note-shaped object). At least one
2347/// identifying field (`doi`, `pmid`, `title`) must be set when the
2348/// provenance is present; an all-empty `ProvenanceRef` is rejected by
2349/// `validate_event_payload`.
2350#[derive(Debug, Clone, Serialize, Deserialize, PartialEq, Eq)]
2351pub struct ProvenanceRef {
2352    #[serde(default, skip_serializing_if = "Option::is_none")]
2353    pub doi: Option<String>,
2354    #[serde(default, skip_serializing_if = "Option::is_none")]
2355    pub pmid: Option<String>,
2356    #[serde(default, skip_serializing_if = "Option::is_none")]
2357    pub title: Option<String>,
2358    /// Verbatim quote / extraction span from the source.
2359    #[serde(default, skip_serializing_if = "Option::is_none")]
2360    pub span: Option<String>,
2361}
2362
2363impl ProvenanceRef {
2364    /// True iff at least one identifying field is set. Used by
2365    /// `validate_event_payload` to reject all-empty `provenance: {}` objects.
2366    #[must_use]
2367    pub fn has_identifier(&self) -> bool {
2368        self.doi.is_some() || self.pmid.is_some() || self.title.is_some()
2369    }
2370}
2371
2372/// A file attached to a finding (dataset, figure, supplementary material).
2373#[derive(Debug, Clone, Serialize, Deserialize)]
2374pub struct Attachment {
2375    pub filename: String,
2376    pub label: Option<String>,
2377    pub path: String,
2378    pub size_bytes: u64,
2379    pub mime_type: Option<String>,
2380    pub attached_at: String,
2381    pub attached_by: Option<String>,
2382}
2383
2384// ── REVIEW layer: content-addressed review events ──────────────────────────
2385
2386/// A review event is a content-addressed record of human judgment on a finding.
2387/// Like a Git commit, it records who, when, what changed, and why.
2388#[derive(Debug, Clone, Serialize, Deserialize)]
2389pub struct ReviewEvent {
2390    /// Content-addressed ID of this review event.
2391    pub id: String,
2392    /// Optional workspace-relative origin for repo-scoped reviews.
2393    #[serde(default, skip_serializing_if = "Option::is_none")]
2394    pub workspace: Option<String>,
2395    /// ID of the finding being reviewed.
2396    pub finding_id: String,
2397    /// The reviewer (ORCID preferred).
2398    pub reviewer: String,
2399    /// When the review happened (RFC 3339).
2400    pub reviewed_at: String,
2401    /// Optional review scope for richer curation workflows.
2402    #[serde(default, skip_serializing_if = "Option::is_none")]
2403    pub scope: Option<String>,
2404    /// Optional status for the review event (for example: accepted).
2405    #[serde(default, skip_serializing_if = "Option::is_none")]
2406    pub status: Option<String>,
2407    /// What action was taken.
2408    pub action: ReviewAction,
2409    /// Human-readable reason.
2410    #[serde(default)]
2411    pub reason: String,
2412    /// Supporting findings or artifacts considered during review.
2413    #[serde(default, skip_serializing_if = "Vec::is_empty")]
2414    pub evidence_considered: Vec<ReviewEvidence>,
2415    /// Optional structured interpretation update payload.
2416    #[serde(default, skip_serializing_if = "Option::is_none")]
2417    pub state_change: Option<serde_json::Value>,
2418}
2419
2420#[derive(Debug, Clone, Serialize, Deserialize, PartialEq)]
2421pub struct ReviewEvidence {
2422    pub finding_id: String,
2423    #[serde(default, skip_serializing_if = "Option::is_none")]
2424    pub role: Option<String>,
2425    #[serde(default, skip_serializing_if = "Option::is_none")]
2426    pub note: Option<String>,
2427}
2428
2429/// The action taken in a review event.
2430#[derive(Debug, Clone, Serialize, Deserialize)]
2431#[serde(tag = "type", rename_all = "snake_case")]
2432pub enum ReviewAction {
2433    /// Finding approved as correct.
2434    Approved,
2435    /// Finding interpretation was qualified to narrow or constrain the claim.
2436    Qualified { target: String },
2437    /// Finding corrected — a specific field was changed.
2438    Corrected {
2439        field: String,
2440        original: String,
2441        corrected: String,
2442    },
2443    /// Finding flagged with a specific flag type.
2444    Flagged { flag_type: String },
2445    /// Finding disputed — reviewer disagrees with the claim.
2446    Disputed {
2447        counter_evidence: String,
2448        #[serde(default, skip_serializing_if = "Option::is_none")]
2449        counter_doi: Option<String>,
2450    },
2451}
2452
2453// ── Interpretation layer: mutable confidence updates ───────────────────────
2454
2455/// A confidence update is a mutable interpretation layer event.
2456/// The finding's evidence is immutable; the confidence assessment can evolve.
2457#[derive(Debug, Clone, Serialize, Deserialize)]
2458pub struct ConfidenceUpdate {
2459    pub finding_id: String,
2460    pub previous_score: f64,
2461    pub new_score: f64,
2462    pub basis: String,
2463    /// Who or what produced this update (e.g., "grounding_pass", "reviewer:0000-0001-2345-6789").
2464    pub updated_by: String,
2465    /// When this update was produced (RFC 3339).
2466    pub updated_at: String,
2467}
2468
2469#[derive(Debug, Clone, Serialize, Deserialize)]
2470pub struct FindingBundle {
2471    pub id: String,
2472    #[serde(default = "default_version")]
2473    pub version: u32,
2474    pub previous_version: Option<String>,
2475    pub assertion: Assertion,
2476    pub evidence: Evidence,
2477    pub conditions: Conditions,
2478    pub confidence: Confidence,
2479    pub provenance: Provenance,
2480    pub flags: Flags,
2481    #[serde(default)]
2482    pub links: Vec<Link>,
2483    #[serde(default, skip_serializing_if = "Vec::is_empty")]
2484    pub annotations: Vec<Annotation>,
2485    #[serde(default, skip_serializing_if = "Vec::is_empty")]
2486    pub attachments: Vec<Attachment>,
2487    pub created: String,
2488    pub updated: Option<String>,
2489    /// v0.51: Read-side access tier. Default `Public` — pre-v0.51
2490    /// findings load with `Public` and serialize byte-identically
2491    /// (skip-if-public). Mutated through `tier.set` events; gated in
2492    /// MCP/HTTP read paths via `access_tier::redact_for_actor`. NOT
2493    /// part of the content-address preimage — re-classifying a
2494    /// finding does not mint a new id.
2495    #[serde(default, skip_serializing_if = "is_public_tier")]
2496    pub access_tier: crate::access_tier::AccessTier,
2497}
2498
2499fn is_public_tier(tier: &crate::access_tier::AccessTier) -> bool {
2500    matches!(tier, crate::access_tier::AccessTier::Public)
2501}
2502
2503fn default_version() -> u32 {
2504    1
2505}
2506
2507impl FindingBundle {
2508    /// Create a new finding bundle with a content-addressed ID.
2509    /// Normalize text for content-addressing: lowercase, collapse whitespace,
2510    /// strip trailing punctuation. Matches the v0.2.0 schema specification.
2511    /// Public since v0.32 so `Replication::content_address` can reuse the
2512    /// same canonicalization rule for its conditions preimage.
2513    pub fn normalize_text(s: &str) -> String {
2514        let lower = s.to_lowercase();
2515        // Collapse all runs of whitespace into a single space
2516        let collapsed: String = lower.split_whitespace().collect::<Vec<_>>().join(" ");
2517        // Strip trailing punctuation (., ;, :, !, ?)
2518        collapsed
2519            .trim_end_matches(['.', ';', ':', '!', '?'])
2520            .to_string()
2521    }
2522
2523    /// Compute the content-addressed ID per v0.2.0 spec:
2524    /// SHA-256(normalize(assertion.text) + "|" + assertion.type + "|" + (provenance.doi || provenance.pmid || provenance.title))
2525    /// Returns first 16 hex chars prefixed with "vf_".
2526    pub fn content_address(assertion: &Assertion, provenance: &Provenance) -> String {
2527        let norm_text = Self::normalize_text(&assertion.text);
2528        let prov_id = provenance
2529            .doi
2530            .as_deref()
2531            .or(provenance.pmid.as_deref())
2532            .unwrap_or(&provenance.title);
2533        let preimage = format!("{}|{}|{}", norm_text, assertion.assertion_type, prov_id);
2534        let hash = Sha256::digest(preimage.as_bytes());
2535        format!("vf_{}", &hex::encode(hash)[..16])
2536    }
2537
2538    pub fn new(
2539        assertion: Assertion,
2540        evidence: Evidence,
2541        conditions: Conditions,
2542        confidence: Confidence,
2543        provenance: Provenance,
2544        flags: Flags,
2545    ) -> Self {
2546        let now = Utc::now().to_rfc3339();
2547        let id = Self::content_address(&assertion, &provenance);
2548
2549        Self {
2550            id,
2551            version: 1,
2552            previous_version: None,
2553            assertion,
2554            evidence,
2555            conditions,
2556            confidence,
2557            provenance,
2558            flags,
2559            links: Vec::new(),
2560            annotations: Vec::new(),
2561            attachments: Vec::new(),
2562            created: now,
2563            updated: None,
2564            access_tier: crate::access_tier::AccessTier::Public,
2565        }
2566    }
2567
2568    pub fn add_link(&mut self, target_id: &str, link_type: &str, note: &str) {
2569        self.links.push(Link {
2570            target: target_id.to_string(),
2571            link_type: link_type.to_string(),
2572            note: note.to_string(),
2573            inferred_by: "compiler".to_string(),
2574            created_at: Utc::now().to_rfc3339(),
2575            mechanism: None,
2576        });
2577    }
2578
2579    pub fn add_link_with_source(
2580        &mut self,
2581        target_id: &str,
2582        link_type: &str,
2583        note: &str,
2584        inferred_by: &str,
2585    ) {
2586        self.links.push(Link {
2587            target: target_id.to_string(),
2588            link_type: link_type.to_string(),
2589            note: note.to_string(),
2590            inferred_by: inferred_by.to_string(),
2591            created_at: Utc::now().to_rfc3339(),
2592            mechanism: None,
2593        });
2594    }
2595}
2596
2597#[cfg(test)]
2598mod tests {
2599    use super::*;
2600
2601    fn sample_assertion() -> Assertion {
2602        Assertion {
2603            text: "NLRP3 activates IL-1B".into(),
2604            assertion_type: "mechanism".into(),
2605            entities: vec![Entity {
2606                name: "NLRP3".into(),
2607                entity_type: "protein".into(),
2608                identifiers: serde_json::Map::new(),
2609                canonical_id: None,
2610                candidates: vec![],
2611                aliases: vec![],
2612                resolution_provenance: None,
2613                resolution_confidence: 1.0,
2614                resolution_method: None,
2615                species_context: None,
2616                needs_review: false,
2617            }],
2618            relation: Some("activates".into()),
2619            direction: Some("positive".into()),
2620            causal_claim: None,
2621            causal_evidence_grade: None,
2622        }
2623    }
2624
2625    fn sample_evidence() -> Evidence {
2626        Evidence {
2627            evidence_type: "experimental".into(),
2628            model_system: "mouse".into(),
2629            species: Some("Mus musculus".into()),
2630            method: "Western blot".into(),
2631            sample_size: Some("n=30".into()),
2632            effect_size: None,
2633            p_value: Some("p<0.05".into()),
2634            replicated: true,
2635            replication_count: Some(3),
2636            evidence_spans: vec![],
2637        }
2638    }
2639
2640    fn sample_conditions() -> Conditions {
2641        Conditions {
2642            text: "In vitro, mouse microglia".into(),
2643            species_verified: vec!["Mus musculus".into()],
2644            species_unverified: vec![],
2645            in_vitro: true,
2646            in_vivo: false,
2647            human_data: false,
2648            clinical_trial: false,
2649            concentration_range: None,
2650            duration: None,
2651            age_group: None,
2652            cell_type: Some("microglia".into()),
2653        }
2654    }
2655
2656    fn sample_confidence() -> Confidence {
2657        Confidence {
2658            kind: ConfidenceKind::FrontierEpistemic,
2659            score: 0.85,
2660            basis: "Experimental with replication".into(),
2661            method: ConfidenceMethod::LlmInitial,
2662            components: None,
2663            extraction_confidence: 0.9,
2664        }
2665    }
2666
2667    fn sample_provenance() -> Provenance {
2668        Provenance {
2669            source_type: "published_paper".into(),
2670            doi: Some("10.1234/test".into()),
2671            pmid: None,
2672            pmc: None,
2673            openalex_id: None,
2674            url: None,
2675            title: "Test Paper".into(),
2676            authors: vec![Author {
2677                name: "Smith J".into(),
2678                orcid: None,
2679            }],
2680            year: Some(2024),
2681            journal: Some("Nature".into()),
2682            license: None,
2683            publisher: None,
2684            funders: vec![],
2685            extraction: Extraction::default(),
2686            review: None,
2687            citation_count: Some(100),
2688        }
2689    }
2690
2691    fn sample_flags() -> Flags {
2692        Flags {
2693            gap: false,
2694            negative_space: false,
2695            contested: false,
2696            retracted: false,
2697            declining: false,
2698            gravity_well: false,
2699            review_state: None,
2700            superseded: false,
2701            signature_threshold: None,
2702            jointly_accepted: false,
2703        }
2704    }
2705
2706    // ── Content-addressed ID tests ───────────────────────────────────
2707
2708    #[test]
2709    fn same_content_same_id() {
2710        let b1 = FindingBundle::new(
2711            sample_assertion(),
2712            sample_evidence(),
2713            sample_conditions(),
2714            sample_confidence(),
2715            sample_provenance(),
2716            sample_flags(),
2717        );
2718        let b2 = FindingBundle::new(
2719            sample_assertion(),
2720            sample_evidence(),
2721            sample_conditions(),
2722            sample_confidence(),
2723            sample_provenance(),
2724            sample_flags(),
2725        );
2726        assert_eq!(b1.id, b2.id);
2727    }
2728
2729    #[test]
2730    fn different_content_different_id() {
2731        let b1 = FindingBundle::new(
2732            sample_assertion(),
2733            sample_evidence(),
2734            sample_conditions(),
2735            sample_confidence(),
2736            sample_provenance(),
2737            sample_flags(),
2738        );
2739        let mut different_assertion = sample_assertion();
2740        different_assertion.text = "Completely different claim".into();
2741        let b2 = FindingBundle::new(
2742            different_assertion,
2743            sample_evidence(),
2744            sample_conditions(),
2745            sample_confidence(),
2746            sample_provenance(),
2747            sample_flags(),
2748        );
2749        assert_ne!(b1.id, b2.id);
2750    }
2751
2752    #[test]
2753    fn id_starts_with_vf_prefix() {
2754        let b = FindingBundle::new(
2755            sample_assertion(),
2756            sample_evidence(),
2757            sample_conditions(),
2758            sample_confidence(),
2759            sample_provenance(),
2760            sample_flags(),
2761        );
2762        assert!(b.id.starts_with("vf_"));
2763        assert_eq!(b.id.len(), 3 + 16); // "vf_" + 16 hex chars
2764    }
2765
2766    #[test]
2767    fn new_bundle_version_is_one() {
2768        let b = FindingBundle::new(
2769            sample_assertion(),
2770            sample_evidence(),
2771            sample_conditions(),
2772            sample_confidence(),
2773            sample_provenance(),
2774            sample_flags(),
2775        );
2776        assert_eq!(b.version, 1);
2777        assert!(b.previous_version.is_none());
2778    }
2779
2780    #[test]
2781    fn new_bundle_has_no_links() {
2782        let b = FindingBundle::new(
2783            sample_assertion(),
2784            sample_evidence(),
2785            sample_conditions(),
2786            sample_confidence(),
2787            sample_provenance(),
2788            sample_flags(),
2789        );
2790        assert!(b.links.is_empty());
2791    }
2792
2793    #[test]
2794    fn new_bundle_has_created_timestamp() {
2795        let b = FindingBundle::new(
2796            sample_assertion(),
2797            sample_evidence(),
2798            sample_conditions(),
2799            sample_confidence(),
2800            sample_provenance(),
2801            sample_flags(),
2802        );
2803        assert!(!b.created.is_empty());
2804        assert!(b.updated.is_none());
2805    }
2806
2807    // ── add_link tests ───────────────────────────────────────────────
2808
2809    #[test]
2810    fn add_link_works() {
2811        let mut b = FindingBundle::new(
2812            sample_assertion(),
2813            sample_evidence(),
2814            sample_conditions(),
2815            sample_confidence(),
2816            sample_provenance(),
2817            sample_flags(),
2818        );
2819        b.add_link("target_id", "extends", "shared entity");
2820        assert_eq!(b.links.len(), 1);
2821        assert_eq!(b.links[0].target, "target_id");
2822        assert_eq!(b.links[0].link_type, "extends");
2823        assert_eq!(b.links[0].note, "shared entity");
2824        assert_eq!(b.links[0].inferred_by, "compiler");
2825    }
2826
2827    #[test]
2828    fn add_link_with_source_works() {
2829        let mut b = FindingBundle::new(
2830            sample_assertion(),
2831            sample_evidence(),
2832            sample_conditions(),
2833            sample_confidence(),
2834            sample_provenance(),
2835            sample_flags(),
2836        );
2837        b.add_link_with_source(
2838            "target_id",
2839            "contradicts",
2840            "opposite direction",
2841            "entity_overlap",
2842        );
2843        assert_eq!(b.links.len(), 1);
2844        assert_eq!(b.links[0].inferred_by, "entity_overlap");
2845    }
2846
2847    #[test]
2848    fn multiple_links_accumulate() {
2849        let mut b = FindingBundle::new(
2850            sample_assertion(),
2851            sample_evidence(),
2852            sample_conditions(),
2853            sample_confidence(),
2854            sample_provenance(),
2855            sample_flags(),
2856        );
2857        b.add_link("t1", "extends", "note1");
2858        b.add_link("t2", "contradicts", "note2");
2859        b.add_link("t3", "supports", "note3");
2860        assert_eq!(b.links.len(), 3);
2861    }
2862
2863    // ── ReviewEvent creation test ────────────────────────────────────
2864
2865    #[test]
2866    fn review_event_creation() {
2867        let event = ReviewEvent {
2868            id: "rev_abc123".into(),
2869            workspace: None,
2870            finding_id: "vf_abc".into(),
2871            reviewer: "0000-0001-2345-6789".into(),
2872            reviewed_at: "2024-01-01T00:00:00Z".into(),
2873            scope: None,
2874            status: None,
2875            action: ReviewAction::Approved,
2876            reason: "Looks correct".into(),
2877            evidence_considered: vec![],
2878            state_change: None,
2879        };
2880        assert_eq!(event.finding_id, "vf_abc");
2881        assert_eq!(event.reviewer, "0000-0001-2345-6789");
2882    }
2883
2884    #[test]
2885    fn review_action_corrected() {
2886        let action = ReviewAction::Corrected {
2887            field: "direction".into(),
2888            original: "positive".into(),
2889            corrected: "negative".into(),
2890        };
2891        if let ReviewAction::Corrected {
2892            field,
2893            original,
2894            corrected,
2895        } = action
2896        {
2897            assert_eq!(field, "direction");
2898            assert_eq!(original, "positive");
2899            assert_eq!(corrected, "negative");
2900        } else {
2901            panic!("Expected Corrected variant");
2902        }
2903    }
2904
2905    #[test]
2906    fn review_action_disputed() {
2907        let action = ReviewAction::Disputed {
2908            counter_evidence: "Later study contradicts".into(),
2909            counter_doi: Some("10.1234/counter".into()),
2910        };
2911        if let ReviewAction::Disputed {
2912            counter_evidence,
2913            counter_doi,
2914        } = action
2915        {
2916            assert_eq!(counter_evidence, "Later study contradicts");
2917            assert_eq!(counter_doi, Some("10.1234/counter".into()));
2918        } else {
2919            panic!("Expected Disputed variant");
2920        }
2921    }
2922
2923    // ── ConfidenceUpdate creation test ───────────────────────────────
2924
2925    #[test]
2926    fn confidence_update_creation() {
2927        let update = ConfidenceUpdate {
2928            finding_id: "vf_abc".into(),
2929            previous_score: 0.7,
2930            new_score: 0.85,
2931            basis: "grounded".into(),
2932            updated_by: "grounding_pass".into(),
2933            updated_at: "2024-01-01T00:00:00Z".into(),
2934        };
2935        assert_eq!(update.previous_score, 0.7);
2936        assert_eq!(update.new_score, 0.85);
2937        assert_eq!(update.updated_by, "grounding_pass");
2938    }
2939
2940    // ── Serialization round-trip test ────────────────────────────────
2941
2942    #[test]
2943    fn finding_serializes_and_deserializes() {
2944        let b = FindingBundle::new(
2945            sample_assertion(),
2946            sample_evidence(),
2947            sample_conditions(),
2948            sample_confidence(),
2949            sample_provenance(),
2950            sample_flags(),
2951        );
2952        let json = serde_json::to_string(&b).unwrap();
2953        let b2: FindingBundle = serde_json::from_str(&json).unwrap();
2954        assert_eq!(b.id, b2.id);
2955        assert_eq!(b.assertion.text, b2.assertion.text);
2956        assert_eq!(b.confidence.score, b2.confidence.score);
2957    }
2958
2959    #[test]
2960    fn valid_entity_types_list() {
2961        // Pre-v0.10 (bio) entries
2962        for t in ["gene", "protein", "compound", "other"] {
2963            assert!(VALID_ENTITY_TYPES.contains(&t), "missing {t}");
2964        }
2965        // v0.10 domain-neutral additions
2966        for t in ["particle", "instrument", "dataset", "quantity"] {
2967            assert!(VALID_ENTITY_TYPES.contains(&t), "missing {t}");
2968        }
2969        assert_eq!(VALID_ENTITY_TYPES.len(), 14);
2970    }
2971
2972    #[test]
2973    fn v0_10_assertion_and_source_extensions() {
2974        assert!(VALID_ASSERTION_TYPES.contains(&"measurement"));
2975        assert!(VALID_ASSERTION_TYPES.contains(&"exclusion"));
2976        assert!(VALID_PROVENANCE_SOURCE_TYPES.contains(&"data_release"));
2977    }
2978
2979    // ── Different fields change the ID ───────────────────────────────
2980
2981    #[test]
2982    fn confidence_does_not_affect_id() {
2983        // v0.2.0: confidence is the mutable interpretation layer, not part of content address
2984        let b1 = FindingBundle::new(
2985            sample_assertion(),
2986            sample_evidence(),
2987            sample_conditions(),
2988            sample_confidence(),
2989            sample_provenance(),
2990            sample_flags(),
2991        );
2992        let mut conf2 = sample_confidence();
2993        conf2.score = 0.5;
2994        let b2 = FindingBundle::new(
2995            sample_assertion(),
2996            sample_evidence(),
2997            sample_conditions(),
2998            conf2,
2999            sample_provenance(),
3000            sample_flags(),
3001        );
3002        assert_eq!(b1.id, b2.id);
3003    }
3004
3005    #[test]
3006    fn flags_do_not_affect_id() {
3007        let b1 = FindingBundle::new(
3008            sample_assertion(),
3009            sample_evidence(),
3010            sample_conditions(),
3011            sample_confidence(),
3012            sample_provenance(),
3013            sample_flags(),
3014        );
3015        let mut flags2 = sample_flags();
3016        flags2.gap = true;
3017        flags2.contested = true;
3018        let b2 = FindingBundle::new(
3019            sample_assertion(),
3020            sample_evidence(),
3021            sample_conditions(),
3022            sample_confidence(),
3023            sample_provenance(),
3024            flags2,
3025        );
3026        // Flags are NOT in the content hash, so IDs should be the same
3027        assert_eq!(b1.id, b2.id);
3028    }
3029
3030    #[test]
3031    fn different_assertion_text_different_id() {
3032        let b1 = FindingBundle::new(
3033            sample_assertion(),
3034            sample_evidence(),
3035            sample_conditions(),
3036            sample_confidence(),
3037            sample_provenance(),
3038            sample_flags(),
3039        );
3040        let mut assertion2 = sample_assertion();
3041        assertion2.assertion_type = "therapeutic".into();
3042        let b2 = FindingBundle::new(
3043            assertion2,
3044            sample_evidence(),
3045            sample_conditions(),
3046            sample_confidence(),
3047            sample_provenance(),
3048            sample_flags(),
3049        );
3050        assert_ne!(b1.id, b2.id);
3051    }
3052
3053    #[test]
3054    fn different_doi_different_id() {
3055        let b1 = FindingBundle::new(
3056            sample_assertion(),
3057            sample_evidence(),
3058            sample_conditions(),
3059            sample_confidence(),
3060            sample_provenance(),
3061            sample_flags(),
3062        );
3063        let mut prov2 = sample_provenance();
3064        prov2.doi = Some("10.5678/other".into());
3065        let b2 = FindingBundle::new(
3066            sample_assertion(),
3067            sample_evidence(),
3068            sample_conditions(),
3069            sample_confidence(),
3070            prov2,
3071            sample_flags(),
3072        );
3073        assert_ne!(b1.id, b2.id);
3074    }
3075
3076    // ── v0.2.0 content-addressing determinism ───────────────────────
3077
3078    #[test]
3079    fn content_address_is_deterministic_across_runs() {
3080        // Two independent extraction runs with the same assertion text,
3081        // assertion type, and DOI must produce the same finding ID.
3082        let assertion1 = Assertion {
3083            text: "Mitochondrial dysfunction precedes amyloid plaque formation.".into(),
3084            assertion_type: "mechanism".into(),
3085            entities: vec![],
3086            relation: None,
3087            direction: None,
3088            causal_claim: None,
3089            causal_evidence_grade: None,
3090        };
3091        let prov1 = Provenance {
3092            source_type: "published_paper".into(),
3093            doi: Some("10.1038/s41586-023-06789-1".into()),
3094            pmid: None,
3095            pmc: None,
3096            openalex_id: None,
3097            url: None,
3098            title: "Mitochondria in AD".into(),
3099            authors: vec![],
3100            year: Some(2023),
3101            journal: None,
3102            license: None,
3103            publisher: None,
3104            funders: vec![],
3105            extraction: Extraction::default(),
3106            review: None,
3107            citation_count: None,
3108        };
3109
3110        // Different entities, evidence, conditions, confidence -- should NOT matter
3111        let assertion2 = Assertion {
3112            text: "Mitochondrial dysfunction precedes amyloid plaque formation.".into(),
3113            assertion_type: "mechanism".into(),
3114            entities: vec![Entity {
3115                name: "mitochondria".into(),
3116                entity_type: "anatomical_structure".into(),
3117                identifiers: serde_json::Map::new(),
3118                canonical_id: None,
3119                candidates: vec![],
3120                aliases: vec![],
3121                resolution_provenance: None,
3122                resolution_confidence: 1.0,
3123                resolution_method: None,
3124                species_context: None,
3125                needs_review: false,
3126            }],
3127            relation: Some("precedes".into()),
3128            direction: Some("positive".into()),
3129            causal_claim: None,
3130            causal_evidence_grade: None,
3131        };
3132        let prov2 = Provenance {
3133            source_type: "published_paper".into(),
3134            doi: Some("10.1038/s41586-023-06789-1".into()),
3135            pmid: Some("37654321".into()),
3136            pmc: None,
3137            openalex_id: None,
3138            url: None,
3139            title: "Different title".into(),
3140            authors: vec![Author {
3141                name: "Jones A".into(),
3142                orcid: None,
3143            }],
3144            year: Some(2023),
3145            journal: Some("Nature".into()),
3146            license: None,
3147            publisher: None,
3148            funders: vec![],
3149            extraction: Extraction::default(),
3150            review: None,
3151            citation_count: Some(50),
3152        };
3153
3154        let id1 = FindingBundle::content_address(&assertion1, &prov1);
3155        let id2 = FindingBundle::content_address(&assertion2, &prov2);
3156        assert_eq!(
3157            id1, id2,
3158            "Same assertion text + type + DOI must produce same ID"
3159        );
3160    }
3161
3162    #[test]
3163    fn content_address_normalizes_whitespace_and_punctuation() {
3164        let assertion1 = Assertion {
3165            text: "  NLRP3  activates   IL-1B.  ".into(),
3166            assertion_type: "mechanism".into(),
3167            entities: vec![],
3168            relation: None,
3169            direction: None,
3170            causal_claim: None,
3171            causal_evidence_grade: None,
3172        };
3173        let assertion2 = Assertion {
3174            text: "NLRP3 activates IL-1B".into(),
3175            assertion_type: "mechanism".into(),
3176            entities: vec![],
3177            relation: None,
3178            direction: None,
3179            causal_claim: None,
3180            causal_evidence_grade: None,
3181        };
3182        let prov = sample_provenance();
3183        let id1 = FindingBundle::content_address(&assertion1, &prov);
3184        let id2 = FindingBundle::content_address(&assertion2, &prov);
3185        assert_eq!(
3186            id1, id2,
3187            "Whitespace and trailing punctuation should be normalized away"
3188        );
3189    }
3190
3191    #[test]
3192    fn content_address_falls_back_to_title_when_no_doi_or_pmid() {
3193        let assertion = sample_assertion();
3194        let mut prov = sample_provenance();
3195        prov.doi = None;
3196        prov.pmid = None;
3197        prov.title = "Fallback Title".into();
3198        let id = FindingBundle::content_address(&assertion, &prov);
3199        assert!(id.starts_with("vf_"));
3200        assert_eq!(id.len(), 19); // "vf_" + 16 hex chars
3201
3202        // Same title -> same ID
3203        let mut prov2 = sample_provenance();
3204        prov2.doi = None;
3205        prov2.pmid = None;
3206        prov2.title = "Fallback Title".into();
3207        let id2 = FindingBundle::content_address(&assertion, &prov2);
3208        assert_eq!(id, id2);
3209    }
3210
3211    #[test]
3212    fn content_address_prefers_doi_over_pmid_over_title() {
3213        let assertion = sample_assertion();
3214
3215        let mut prov_doi = sample_provenance();
3216        prov_doi.doi = Some("10.1234/test".into());
3217        prov_doi.pmid = Some("12345".into());
3218        prov_doi.title = "Title".into();
3219
3220        let mut prov_pmid = sample_provenance();
3221        prov_pmid.doi = None;
3222        prov_pmid.pmid = Some("12345".into());
3223        prov_pmid.title = "Title".into();
3224
3225        let mut prov_title = sample_provenance();
3226        prov_title.doi = None;
3227        prov_title.pmid = None;
3228        prov_title.title = "Title".into();
3229
3230        let id_doi = FindingBundle::content_address(&assertion, &prov_doi);
3231        let id_pmid = FindingBundle::content_address(&assertion, &prov_pmid);
3232        let id_title = FindingBundle::content_address(&assertion, &prov_title);
3233
3234        // All three should be different since the provenance component differs
3235        assert_ne!(id_doi, id_pmid, "DOI vs PMID should differ");
3236        assert_ne!(id_pmid, id_title, "PMID vs title should differ");
3237        assert_ne!(id_doi, id_title, "DOI vs title should differ");
3238    }
3239
3240    // ── compute_confidence tests ────────────────────────────────────
3241
3242    #[test]
3243    fn compute_confidence_meta_analysis_human() {
3244        let evidence = Evidence {
3245            evidence_type: "meta_analysis".into(),
3246            model_system: "human cohorts".into(),
3247            species: Some("Homo sapiens".into()),
3248            method: "meta-analysis".into(),
3249            sample_size: Some("n=5000".into()),
3250            effect_size: None,
3251            p_value: None,
3252            replicated: true,
3253            replication_count: Some(5),
3254            evidence_spans: vec![],
3255        };
3256        let conditions = Conditions {
3257            text: String::new(),
3258            species_verified: vec![],
3259            species_unverified: vec![],
3260            in_vitro: false,
3261            in_vivo: false,
3262            human_data: true,
3263            clinical_trial: false,
3264            concentration_range: None,
3265            duration: None,
3266            age_group: None,
3267            cell_type: None,
3268        };
3269        let conf = compute_confidence(&evidence, &conditions, false);
3270        assert_eq!(conf.method, ConfidenceMethod::Computed);
3271        assert_eq!(conf.kind, ConfidenceKind::FrontierEpistemic);
3272        assert!(conf.components.is_some());
3273        let c = conf.components.unwrap();
3274        assert!((c.evidence_strength - 0.95).abs() < 0.001);
3275        assert!((c.replication_strength - 1.0).abs() < 0.001); // 0.7 + 0.1*5 = 1.2 -> clamped to 1.0
3276        assert!((c.sample_strength - 1.0).abs() < 0.001); // >1000
3277        assert!((c.model_relevance - 1.0).abs() < 0.001); // human_data
3278        assert!((c.review_penalty - 0.0).abs() < 0.001);
3279        assert!((c.calibration_adjustment - 0.0).abs() < 0.001);
3280        // 0.95 * 1.0 * 1.0 * 1.0 - 0.0 = 0.95
3281        assert!((conf.score - 0.95).abs() < 0.001);
3282    }
3283
3284    #[test]
3285    fn compute_confidence_theoretical_no_replication() {
3286        let evidence = Evidence {
3287            evidence_type: "theoretical".into(),
3288            model_system: "computational".into(),
3289            species: None,
3290            method: "simulation".into(),
3291            sample_size: None,
3292            effect_size: None,
3293            p_value: None,
3294            replicated: false,
3295            replication_count: None,
3296            evidence_spans: vec![],
3297        };
3298        let conditions = Conditions {
3299            text: String::new(),
3300            species_verified: vec![],
3301            species_unverified: vec![],
3302            in_vitro: false,
3303            in_vivo: false,
3304            human_data: false,
3305            clinical_trial: false,
3306            concentration_range: None,
3307            duration: None,
3308            age_group: None,
3309            cell_type: None,
3310        };
3311        let conf = compute_confidence(&evidence, &conditions, false);
3312        let c = conf.components.unwrap();
3313        assert!((c.evidence_strength - 0.30).abs() < 0.001);
3314        assert!((c.replication_strength - 0.70).abs() < 0.001);
3315        assert!((c.sample_strength - 0.60).abs() < 0.001);
3316        assert!((c.model_relevance - 0.50).abs() < 0.001);
3317        // 0.30 * 0.70 * 0.50 * 0.60 = 0.063
3318        assert!((conf.score - 0.063).abs() < 0.001);
3319    }
3320
3321    #[test]
3322    fn compute_confidence_contested_penalty() {
3323        let evidence = Evidence {
3324            evidence_type: "experimental".into(),
3325            model_system: "mouse".into(),
3326            species: Some("Mus musculus".into()),
3327            method: "Western blot".into(),
3328            sample_size: Some("n=30".into()),
3329            effect_size: None,
3330            p_value: None,
3331            replicated: false,
3332            replication_count: None,
3333            evidence_spans: vec![],
3334        };
3335        let conditions = Conditions {
3336            text: String::new(),
3337            species_verified: vec![],
3338            species_unverified: vec![],
3339            in_vitro: false,
3340            in_vivo: true,
3341            human_data: false,
3342            clinical_trial: false,
3343            concentration_range: None,
3344            duration: None,
3345            age_group: None,
3346            cell_type: None,
3347        };
3348        let uncontested = compute_confidence(&evidence, &conditions, false);
3349        let contested = compute_confidence(&evidence, &conditions, true);
3350        assert!((contested.score - (uncontested.score - 0.15)).abs() < 0.001);
3351    }
3352
3353    #[test]
3354    fn compute_confidence_sample_size_parsing() {
3355        assert_eq!(parse_sample_size("n=30"), Some(30));
3356        assert_eq!(parse_sample_size("n = 120"), Some(120));
3357        assert_eq!(parse_sample_size("3 cohorts of 20"), Some(20));
3358        assert_eq!(parse_sample_size("500"), Some(500));
3359        assert_eq!(parse_sample_size(""), None);
3360    }
3361
3362    #[test]
3363    fn compute_confidence_v010_deserialize_compat() {
3364        // Simulate an older JSON confidence object (no method, no components).
3365        let json = r#"{"score": 0.75, "basis": "legacy seeded confidence", "extraction_confidence": 0.85}"#;
3366        let conf: Confidence = serde_json::from_str(json).unwrap();
3367        assert!((conf.score - 0.75).abs() < 0.001);
3368        assert_eq!(conf.kind, ConfidenceKind::FrontierEpistemic);
3369        assert_eq!(conf.method, ConfidenceMethod::LlmInitial); // default
3370        assert!(conf.components.is_none());
3371    }
3372
3373    #[test]
3374    fn compute_confidence_components_deserialize_legacy_names() {
3375        let json = r#"{
3376            "score": 0.75,
3377            "basis": "legacy components",
3378            "method": "computed",
3379            "components": {
3380                "evidence_grade": 0.8,
3381                "replication_factor": 0.7,
3382                "sample_strength": 0.6,
3383                "species_relevance": 0.8,
3384                "contradiction_penalty": 0.15
3385            },
3386            "extraction_confidence": 0.85
3387        }"#;
3388        let conf: Confidence = serde_json::from_str(json).unwrap();
3389        let components = conf.components.unwrap();
3390        assert!((components.evidence_strength - 0.8).abs() < 0.001);
3391        assert!((components.replication_strength - 0.7).abs() < 0.001);
3392        assert!((components.sample_strength - 0.6).abs() < 0.001);
3393        assert!((components.model_relevance - 0.8).abs() < 0.001);
3394        assert!((components.review_penalty - 0.15).abs() < 0.001);
3395        assert!((components.calibration_adjustment - 0.0).abs() < 0.001);
3396    }
3397
3398    #[test]
3399    fn compute_confidence_serializes_new_component_names_and_kind() {
3400        let conf = compute_confidence(&sample_evidence(), &sample_conditions(), false);
3401        let value = serde_json::to_value(&conf).unwrap();
3402        assert_eq!(value["kind"], "frontier_epistemic");
3403        let components = &value["components"];
3404        assert!(components.get("evidence_strength").is_some());
3405        assert!(components.get("replication_strength").is_some());
3406        assert!(components.get("model_relevance").is_some());
3407        assert!(components.get("review_penalty").is_some());
3408        assert!(components.get("calibration_adjustment").is_some());
3409        assert!(components.get("evidence_grade").is_none());
3410        assert!(components.get("replication_factor").is_none());
3411        assert!(components.get("species_relevance").is_none());
3412        assert!(components.get("contradiction_penalty").is_none());
3413    }
3414
3415    #[test]
3416    fn recompute_all_updates_findings() {
3417        let mut b = FindingBundle::new(
3418            sample_assertion(),
3419            sample_evidence(),
3420            sample_conditions(),
3421            sample_confidence(),
3422            sample_provenance(),
3423            sample_flags(),
3424        );
3425        // Original score is a seeded prior. The computed frontier support should differ.
3426        let old_score = b.confidence.score;
3427        assert!((old_score - 0.85).abs() < 0.001);
3428        let changed = recompute_all_confidence(std::slice::from_mut(&mut b), &[]);
3429        assert_eq!(b.confidence.method, ConfidenceMethod::Computed);
3430        assert!(b.confidence.components.is_some());
3431        // experimental=0.80, replicated(3)=min(1.0,0.7+0.3)=1.0, in_vitro=0.6, sample=n=30 (not >30)->0.7
3432        // 0.80 * 1.0 * 0.6 * 0.7 = 0.336
3433        assert!((b.confidence.score - 0.336).abs() < 0.001);
3434        assert_eq!(changed, 1);
3435    }
3436
3437    // ── v0.38.1 causal-consistency tests ─────────────────────────────
3438
3439    #[test]
3440    fn causal_multiplier_neutral_when_either_field_none() {
3441        assert!((causal_consistency_multiplier(None, None) - 1.0).abs() < 1e-12);
3442        assert!(
3443            (causal_consistency_multiplier(Some(CausalClaim::Intervention), None) - 1.0).abs()
3444                < 1e-12
3445        );
3446        assert!(
3447            (causal_consistency_multiplier(None, Some(CausalEvidenceGrade::Rct)) - 1.0).abs()
3448                < 1e-12
3449        );
3450    }
3451
3452    #[test]
3453    fn rct_grade_bumps_any_claim() {
3454        for c in [
3455            CausalClaim::Correlation,
3456            CausalClaim::Mediation,
3457            CausalClaim::Intervention,
3458        ] {
3459            assert!(
3460                (causal_consistency_multiplier(Some(c), Some(CausalEvidenceGrade::Rct)) - 1.10)
3461                    .abs()
3462                    < 1e-12,
3463                "RCT should bump claim {c:?}"
3464            );
3465        }
3466    }
3467
3468    #[test]
3469    fn observational_intervention_gets_strong_penalty() {
3470        let m = causal_consistency_multiplier(
3471            Some(CausalClaim::Intervention),
3472            Some(CausalEvidenceGrade::Observational),
3473        );
3474        assert!(
3475            (m - 0.65).abs() < 1e-12,
3476            "intervention from observational should be 0.65, got {m}"
3477        );
3478    }
3479
3480    #[test]
3481    fn correlation_neutral_under_any_grade() {
3482        for g in [
3483            CausalEvidenceGrade::QuasiExperimental,
3484            CausalEvidenceGrade::Observational,
3485            CausalEvidenceGrade::Theoretical,
3486        ] {
3487            let m = causal_consistency_multiplier(Some(CausalClaim::Correlation), Some(g));
3488            assert!(
3489                (m - 1.0).abs() < 1e-12,
3490                "correlation should be neutral for grade {g:?}, got {m}"
3491            );
3492        }
3493    }
3494
3495    #[test]
3496    fn confidence_score_unchanged_for_pre_v0_38_findings() {
3497        // Backward compat: a finding with no causal fields produces
3498        // exactly the same score as before v0.38.1 (when n_replicated=0
3499        // legacy ⇔ replicated=false scalar).
3500        let mut e = sample_evidence();
3501        e.replicated = false;
3502        e.replication_count = None;
3503        let c = sample_conditions();
3504        let score_legacy_path = compute_confidence(&e, &c, false).score;
3505        let score_kernel_path =
3506            compute_confidence_from_components(&e, &c, false, 0, 0, 0, None, None).score;
3507        assert!((score_legacy_path - score_kernel_path).abs() < 1e-12);
3508        // And the components carry the neutral 1.0 multiplier.
3509        let conf = compute_confidence_from_components(&e, &c, false, 0, 0, 0, None, None);
3510        let cc = conf.components.unwrap().causal_consistency;
3511        assert!((cc - 1.0).abs() < 1e-12);
3512    }
3513
3514    #[test]
3515    fn intervention_from_observational_drops_score_meaningfully() {
3516        // Same evidence + conditions, two readings: neutral vs
3517        // intervention-from-observational. The latter should drop.
3518        let e = sample_evidence();
3519        let c = sample_conditions();
3520        let neutral = compute_confidence_from_components(&e, &c, false, 0, 0, 0, None, None);
3521        let observational_intervention = compute_confidence_from_components(
3522            &e,
3523            &c,
3524            false,
3525            0,
3526            0,
3527            0,
3528            Some(CausalClaim::Intervention),
3529            Some(CausalEvidenceGrade::Observational),
3530        );
3531        let drop = neutral.score - observational_intervention.score;
3532        assert!(
3533            drop > 0.05,
3534            "observational-intervention should drop score noticeably; got {drop}"
3535        );
3536    }
3537
3538    #[test]
3539    fn parses_bbb_review_event_with_richer_schema() {
3540        let raw = include_str!("../embedded/tests/fixtures/legacy/rev_001_bbb_correction.json");
3541        let review: ReviewEvent = serde_json::from_str(raw).unwrap();
3542
3543        assert_eq!(review.id, "rev_001_bbb_correction");
3544        assert_eq!(review.workspace.as_deref(), Some("projects/bbb-flagship"));
3545        assert_eq!(review.scope.as_deref(), Some("bbb_opening_trusted_subset"));
3546        assert_eq!(review.status.as_deref(), Some("accepted"));
3547        assert!(matches!(
3548            review.action,
3549            ReviewAction::Qualified { ref target } if target == "trusted_interpretation"
3550        ));
3551        assert_eq!(review.evidence_considered.len(), 3);
3552        assert_eq!(
3553            review.evidence_considered[0].role.as_deref(),
3554            Some("qualifier")
3555        );
3556        assert_eq!(
3557            review
3558                .state_change
3559                .as_ref()
3560                .and_then(|value| value.get("assumption_retired"))
3561                .and_then(|value| value.as_str()),
3562            Some("safe opening implies therapeutic efficacy")
3563        );
3564    }
3565
3566    #[test]
3567    fn artifact_requires_sha256_and_stable_kind() {
3568        let artifact = Artifact::new(
3569            "clinical_trial_record",
3570            "AHEAD 3-45",
3571            "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa",
3572            Some(42),
3573            Some("application/json".into()),
3574            "local_blob",
3575            Some(".vela/artifact-blobs/sha256/aaaaaaaa".into()),
3576            Some("https://clinicaltrials.gov/study/NCT04468659".into()),
3577            Some("ClinicalTrials.gov public record".into()),
3578            vec!["vf_demo".into()],
3579            sample_provenance(),
3580            BTreeMap::new(),
3581            crate::access_tier::AccessTier::Public,
3582        )
3583        .unwrap();
3584
3585        assert!(artifact.id.starts_with("va_"));
3586        assert_eq!(
3587            artifact.content_hash,
3588            "sha256:aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa"
3589        );
3590        assert_eq!(artifact.kind, "clinical_trial_record");
3591    }
3592}
vela_protocol/bundle.rs

vela_protocol/
bundle.rs