Skip to main content

vela_protocol/
project.rs

1//! Stage 5: ASSEMBLE — build the project with stats and metadata.
2
3use std::collections::{HashMap, HashSet};
4
5use chrono::Utc;
6use serde::{Deserialize, Serialize};
7
8use crate::bundle::{ConfidenceUpdate, FindingBundle, ReviewEvent};
9use crate::events::StateEvent;
10use crate::proposals::{ProofState, StateProposal};
11use crate::sign::{ActorRecord, SignedEnvelope};
12use crate::sources::{ConditionRecord, EvidenceAtom, SourceRecord};
13
14/// A dependency on another project (like a Cargo dependency for science).
15///
16/// v0.8 extends this with three optional fields that turn it into a
17/// **cross-frontier dependency**: when `vfr_id` is set, the entry pins
18/// a remote frontier by its content-addressed id and a snapshot hash.
19/// `Link.target` values of the form `vf_<id>@vfr_<id>` resolve through
20/// here. Without `vfr_id`, the entry behaves as a pre-v0.8 compile-time
21/// dependency record.
22#[derive(Debug, Clone, Serialize, Deserialize, PartialEq, Eq)]
23pub struct ProjectDependency {
24    pub name: String,
25    pub source: String,
26    pub version: Option<String>,
27    pub pinned_hash: Option<String>,
28    /// v0.8: content-addressed id of the dependent frontier.
29    #[serde(default, skip_serializing_if = "Option::is_none")]
30    pub vfr_id: Option<String>,
31    /// v0.8: where to fetch the dependent frontier file from
32    /// (typically an `https://…` URL pointing at raw JSON).
33    #[serde(default, skip_serializing_if = "Option::is_none")]
34    pub locator: Option<String>,
35    /// v0.8: SHA-256 of the canonical snapshot the dependent commits
36    /// to. Strict pull verifies the fetched dependency's actual
37    /// `snapshot_hash` matches this value before satisfying any link.
38    #[serde(default, skip_serializing_if = "Option::is_none")]
39    pub pinned_snapshot_hash: Option<String>,
40}
41
42impl ProjectDependency {
43    /// True if this entry declares a cross-frontier dependency
44    /// (`vfr_id` is set). Pre-v0.8 entries return `false`.
45    pub fn is_cross_frontier(&self) -> bool {
46        self.vfr_id.is_some()
47    }
48}
49
50#[derive(Debug, Serialize, Deserialize)]
51pub struct Project {
52    pub vela_version: String,
53    pub schema: String,
54    /// Stable Vela-addressable frontier ID, derived from a `frontier.created`
55    /// genesis event hash. Optional for backward compatibility with v0.2
56    /// frontiers; new v0.3 frontiers populate it on `assemble()`.
57    #[serde(default, skip_serializing_if = "Option::is_none")]
58    pub frontier_id: Option<String>,
59    #[serde(rename = "frontier")]
60    pub project: ProjectMeta,
61    pub stats: ProjectStats,
62    pub findings: Vec<FindingBundle>,
63    /// Source artifacts that produced evidence-bearing units.
64    #[serde(default)]
65    pub sources: Vec<SourceRecord>,
66    /// Materialized source-grounded evidence units linked to findings.
67    #[serde(default)]
68    pub evidence_atoms: Vec<EvidenceAtom>,
69    /// Materialized condition boundaries used to avoid claim overgeneralization.
70    #[serde(default)]
71    pub condition_records: Vec<ConditionRecord>,
72    /// Append-only log of review events (content-addressed).
73    #[serde(default, skip_serializing_if = "Vec::is_empty")]
74    pub review_events: Vec<ReviewEvent>,
75    /// Append-only log of confidence updates.
76    #[serde(default, skip_serializing_if = "Vec::is_empty")]
77    pub confidence_updates: Vec<ConfidenceUpdate>,
78    /// Canonical append-only event log for replayable frontier state.
79    #[serde(default)]
80    pub events: Vec<StateEvent>,
81    /// Portable pending/applied proposal records for proposal-first writes.
82    #[serde(default)]
83    pub proposals: Vec<StateProposal>,
84    /// Frontier-local proof freshness projection.
85    #[serde(default)]
86    pub proof_state: ProofState,
87    /// Cryptographic signatures for findings (Ed25519).
88    #[serde(default)]
89    pub signatures: Vec<SignedEnvelope>,
90    /// Registered actor identities, mapping a stable actor.id to an
91    /// Ed25519 public key. Phase M (v0.4): once an actor is registered,
92    /// any canonical event referencing that actor.id under
93    /// `--strict` must carry a verifiable Ed25519 signature.
94    #[serde(default, skip_serializing_if = "Vec::is_empty")]
95    pub actors: Vec<ActorRecord>,
96    /// v0.32: Replication attempts as first-class kernel objects. Each
97    /// `Replication` is content-addressed (`vrep_<hash>`) over its
98    /// target finding, attempting actor, conditions, and outcome. Replaces
99    /// the prior scalar pattern (`Evidence.replicated: bool` +
100    /// `Evidence.replication_count: u32`) which couldn't represent
101    /// independent attempts under different conditions. The legacy
102    /// scalar fields are preserved on `Evidence` for backward
103    /// compatibility; v0.32+ frontiers can derive them from this
104    /// collection.
105    #[serde(default, skip_serializing_if = "Vec::is_empty")]
106    pub replications: Vec<crate::bundle::Replication>,
107    /// v0.33: Datasets as first-class kernel objects. A `vd_<hash>`
108    /// captures a versioned, content-addressed reference to data that
109    /// anchors empirical claims. Distinct from `Provenance` (which
110    /// describes the paper) — a single paper may publish multiple
111    /// datasets, and a single dataset may be reused across many papers.
112    #[serde(default, skip_serializing_if = "Vec::is_empty")]
113    pub datasets: Vec<crate::bundle::Dataset>,
114    /// v0.33: Code artifacts as first-class kernel objects. A `vc_<hash>`
115    /// is a content-addressed pointer at a specific region of source
116    /// code at a specific git commit. Claims can reference the code
117    /// that produced them, not only a repository name in prose.
118    #[serde(default, skip_serializing_if = "Vec::is_empty")]
119    pub code_artifacts: Vec<crate::bundle::CodeArtifact>,
120    /// Generic content-addressed artifacts: protocols, trial registry
121    /// records, supplements, notebooks, source files, and dataset
122    /// manifests that need a durable byte or pointer commitment.
123    #[serde(default, skip_serializing_if = "Vec::is_empty")]
124    pub artifacts: Vec<crate::bundle::Artifact>,
125    /// v0.34: Predictions as first-class kernel objects. A `vpred_<hash>`
126    /// is a falsifiable claim about a future observation, scoped to
127    /// existing findings and tied to a registered actor. Calibration
128    /// scoring runs over the resolved subset.
129    #[serde(default, skip_serializing_if = "Vec::is_empty")]
130    pub predictions: Vec<crate::bundle::Prediction>,
131    /// v0.34: Resolutions as first-class kernel objects. A `vres_<hash>`
132    /// closes out a Prediction by recording what actually happened.
133    /// Together with `Project.predictions`, this is the kernel's
134    /// epistemic accountability ledger.
135    #[serde(default, skip_serializing_if = "Vec::is_empty")]
136    pub resolutions: Vec<crate::bundle::Resolution>,
137    /// v0.39: Federation peer registry. Each `PeerHub` declares
138    /// another hub this frontier knows about — id, HTTPS URL, and the
139    /// Ed25519 pubkey that peer signs their manifests with. Adding a
140    /// peer doesn't yet trust their state; it just establishes who we
141    /// know about. The actual sync runtime ships in v0.39.1+.
142    #[serde(default, skip_serializing_if = "Vec::is_empty")]
143    pub peers: Vec<crate::federation::PeerHub>,
144    /// v0.50: Trajectories as first-class kernel objects. A
145    /// `vtr_<hash>` records the ordered search path that produced (or
146    /// did not produce) a finding — hypotheses considered, branches
147    /// tried, branches ruled out and why. The eighth essay primitive,
148    /// "deposited last and most thinly because labs have real
149    /// reasons not to expose dead ends," but represented structurally
150    /// so an agent that does choose to deposit can prevent the next
151    /// agent from re-deriving a ruled-out branch.
152    #[serde(default, skip_serializing_if = "Vec::is_empty")]
153    pub trajectories: Vec<crate::bundle::Trajectory>,
154    /// v0.49: NegativeResults as first-class kernel objects. A `vnr_<hash>`
155    /// records an experiment or trial that did not support its
156    /// hypothesis — registered-trial with power and effect-size bounds,
157    /// or exploratory wet-lab dead end with the (reagent, condition,
158    /// observed outcome) tuple. The substrate primitive that lets
159    /// "absence of evidence" and "evidence of absence" round-trip
160    /// distinctly through downstream confidence math instead of being
161    /// flattened into a private "we tried that, it didn't work."
162    #[serde(default, skip_serializing_if = "Vec::is_empty")]
163    pub negative_results: Vec<crate::bundle::NegativeResult>,
164}
165
166#[derive(Debug, Serialize, Deserialize)]
167pub struct ProjectMeta {
168    pub name: String,
169    pub description: String,
170    pub compiled_at: String,
171    pub compiler: String,
172    pub papers_processed: usize,
173    pub errors: usize,
174    #[serde(default)]
175    pub dependencies: Vec<ProjectDependency>,
176}
177
178#[derive(Debug, Serialize, Deserialize, Default)]
179pub struct ProjectStats {
180    pub findings: usize,
181    pub links: usize,
182    pub replicated: usize,
183    pub unreplicated: usize,
184    pub avg_confidence: f64,
185    pub gaps: usize,
186    pub negative_space: usize,
187    pub contested: usize,
188    pub categories: HashMap<String, usize>,
189    pub link_types: HashMap<String, usize>,
190    pub human_reviewed: usize,
191    /// Number of review events in this frontier.
192    #[serde(default)]
193    pub review_event_count: usize,
194    /// Number of confidence updates in this frontier.
195    #[serde(default)]
196    pub confidence_update_count: usize,
197    /// Number of canonical state events in this frontier.
198    #[serde(default)]
199    pub event_count: usize,
200    /// Number of source records in the frontier source registry.
201    #[serde(default)]
202    pub source_count: usize,
203    /// Number of materialized evidence atoms in the frontier.
204    #[serde(default)]
205    pub evidence_atom_count: usize,
206    /// Number of materialized condition records in the frontier.
207    #[serde(default)]
208    pub condition_record_count: usize,
209    /// Number of persisted proposals in the frontier.
210    #[serde(default)]
211    pub proposal_count: usize,
212    pub confidence_distribution: ConfidenceDistribution,
213}
214
215#[derive(Debug, Serialize, Deserialize, Default)]
216pub struct ConfidenceDistribution {
217    pub high_gt_80: usize,
218    pub medium_60_80: usize,
219    pub low_lt_60: usize,
220}
221
222/// Schema and compiler defaults for the current Vela protocol release.
223pub const VELA_SCHEMA_URL: &str = "https://vela.science/schema/finding-bundle/v0.10.0";
224pub const VELA_SCHEMA_VERSION: &str = "0.10.0";
225pub const VELA_COMPILER_VERSION: &str = concat!("vela/", env!("CARGO_PKG_VERSION"));
226
227/// Derive a `vfr_<hash>` frontier ID from frontier metadata. Used as a
228/// fallback for legacy frontiers without a `frontier.created` genesis
229/// event; v0.4+ frontiers derive from the genesis event itself via
230/// `frontier_id_from_genesis`.
231#[must_use]
232pub fn derive_frontier_id_from_meta(meta: &ProjectMeta) -> String {
233    let preimage = serde_json::json!({
234        "name": meta.name,
235        "compiled_at": meta.compiled_at,
236        "compiler": meta.compiler,
237    });
238    let bytes = crate::canonical::to_canonical_bytes(&preimage).unwrap_or_default();
239    use sha2::{Digest, Sha256};
240    format!("vfr_{}", &hex::encode(Sha256::digest(bytes))[..16])
241}
242
243/// Derive a `vfr_<hash>` frontier ID from the canonical hash of the
244/// `frontier.created` genesis event. Returns `None` if `events[0]` is
245/// absent or not a `frontier.created` event (legacy frontiers fall back
246/// to meta-derivation via `derive_frontier_id_from_meta`).
247///
248/// The preimage shape matches `event_id` exactly so the same canonical
249/// rule produces both the event's `vev_…` and the frontier's `vfr_…`
250/// from the same logical content. Doctrine line: a frontier IS what the
251/// `frontier.created` event creates.
252#[must_use]
253pub fn frontier_id_from_genesis(events: &[crate::events::StateEvent]) -> Option<String> {
254    let genesis = events.first()?;
255    if genesis.kind != "frontier.created" {
256        return None;
257    }
258    let preimage = serde_json::json!({
259        "schema": genesis.schema,
260        "kind": genesis.kind,
261        "target": genesis.target,
262        "actor": genesis.actor,
263        "timestamp": genesis.timestamp,
264        "reason": genesis.reason,
265        "before_hash": genesis.before_hash,
266        "after_hash": genesis.after_hash,
267        "payload": genesis.payload,
268        "caveats": genesis.caveats,
269    });
270    let bytes = crate::canonical::to_canonical_bytes(&preimage).ok()?;
271    use sha2::{Digest, Sha256};
272    Some(format!("vfr_{}", &hex::encode(Sha256::digest(bytes))[..16]))
273}
274
275/// Construct the `frontier.created` canonical event for a freshly
276/// compiled frontier. The event becomes `events[0]` and the frontier_id
277/// derives from its canonical hash.
278///
279/// Targets `frontier:<name>` (not `finding:…`) so replay's orphan-target
280/// detection does not flag it; the genesis event carries identity, not a
281/// finding mutation.
282fn build_genesis_event(name: &str, compiled_at: &str, creator: &str) -> crate::events::StateEvent {
283    use crate::events::{EVENT_SCHEMA, NULL_HASH, StateActor, StateEvent, StateTarget};
284    let mut event = StateEvent {
285        schema: EVENT_SCHEMA.to_string(),
286        id: String::new(),
287        kind: "frontier.created".to_string(),
288        target: StateTarget {
289            r#type: "frontier".to_string(),
290            id: name.to_string(),
291        },
292        actor: StateActor {
293            id: creator.to_string(),
294            r#type: "frontier".to_string(),
295        },
296        timestamp: compiled_at.to_string(),
297        reason: "frontier compiled".to_string(),
298        before_hash: NULL_HASH.to_string(),
299        after_hash: NULL_HASH.to_string(),
300        payload: serde_json::json!({
301            "name": name,
302            "creator": creator,
303            "schema_version": VELA_SCHEMA_VERSION,
304            "compiled_at": compiled_at,
305        }),
306        caveats: vec![],
307        signature: None,
308    };
309    event.id = crate::events::compute_event_id(&event);
310    event
311}
312
313pub fn assemble(
314    name: &str,
315    bundles: Vec<FindingBundle>,
316    papers_processed: usize,
317    errors: usize,
318    description: &str,
319) -> Project {
320    let compiled_at = Utc::now().to_rfc3339();
321    let meta = ProjectMeta {
322        name: name.to_string(),
323        description: description.to_string(),
324        compiled_at: compiled_at.clone(),
325        compiler: VELA_COMPILER_VERSION.to_string(),
326        papers_processed,
327        errors,
328        dependencies: Vec::new(),
329    };
330    // Phase J (v0.4): emit a `frontier.created` canonical event as
331    // events[0] and derive frontier_id from its canonical hash. The
332    // address primitive becomes doctrine-grounded — a frontier IS what
333    // the genesis event creates, not a convenience over its metadata.
334    let genesis = build_genesis_event(name, &compiled_at, VELA_COMPILER_VERSION);
335    let frontier_id = frontier_id_from_genesis(std::slice::from_ref(&genesis));
336    let mut project = Project {
337        vela_version: VELA_SCHEMA_VERSION.to_string(),
338        schema: VELA_SCHEMA_URL.to_string(),
339        frontier_id,
340        project: meta,
341        stats: ProjectStats::default(),
342        findings: bundles,
343        sources: Vec::new(),
344        evidence_atoms: Vec::new(),
345        condition_records: Vec::new(),
346        review_events: Vec::new(),
347        confidence_updates: Vec::new(),
348        events: vec![genesis],
349        proposals: Vec::new(),
350        proof_state: ProofState::default(),
351        signatures: Vec::new(),
352        actors: Vec::new(),
353        replications: Vec::new(),
354        datasets: Vec::new(),
355        code_artifacts: Vec::new(),
356        artifacts: Vec::new(),
357        predictions: Vec::new(),
358        resolutions: Vec::new(),
359        peers: Vec::new(),
360        negative_results: Vec::new(),
361        trajectories: Vec::new(),
362    };
363    crate::sources::materialize_project(&mut project);
364    project
365}
366
367impl Project {
368    /// Return the stable Vela-addressable frontier ID. Prefers the stored
369    /// field; if absent, derives from the `frontier.created` genesis
370    /// event in `events[0]`; if no genesis event is present, falls back
371    /// to meta-derivation (legacy v0.3 frontiers).
372    #[must_use]
373    pub fn frontier_id(&self) -> String {
374        if let Some(id) = self.frontier_id.clone() {
375            return id;
376        }
377        if let Some(id) = frontier_id_from_genesis(&self.events) {
378            return id;
379        }
380        derive_frontier_id_from_meta(&self.project)
381    }
382
383    /// Materialize the frontier_id field if absent. Idempotent.
384    pub fn ensure_frontier_id(&mut self) -> String {
385        if self.frontier_id.is_none() {
386            self.frontier_id = Some(self.frontier_id());
387        }
388        self.frontier_id.clone().unwrap()
389    }
390
391    /// v0.36.1: Compute frontier-epistemic confidence for a finding using
392    /// the v0.32 `Replication` collection as the authoritative source. A
393    /// failed replication subtracts from confidence; a successful one
394    /// adds to it; partials half-add. This closes the long-standing
395    /// "two sources of truth" between `Evidence.replicated` (the legacy
396    /// scalar set when a finding was first asserted) and
397    /// `Project.replications` (the kernel objects accumulated over time).
398    ///
399    /// Falls back to the legacy scalar only when no `Replication` record
400    /// targets this finding's id — preserves behavior for unmigrated
401    /// frontiers.
402    #[must_use]
403    pub fn compute_confidence_for(&self, bundle: &FindingBundle) -> crate::bundle::Confidence {
404        let (n_repl, n_failed, n_partial) =
405            crate::bundle::count_replication_outcomes(&self.replications, &bundle.id);
406        let (n_repl, n_failed, n_partial) = if n_repl + n_failed + n_partial == 0 {
407            let legacy = if bundle.evidence.replicated {
408                bundle.evidence.replication_count.unwrap_or(1)
409            } else {
410                0
411            };
412            (legacy, 0, 0)
413        } else {
414            (n_repl, n_failed, n_partial)
415        };
416        crate::bundle::compute_confidence_from_components(
417            &bundle.evidence,
418            &bundle.conditions,
419            bundle.flags.contested,
420            n_repl,
421            n_failed,
422            n_partial,
423            bundle.assertion.causal_claim,
424            bundle.assertion.causal_evidence_grade,
425        )
426    }
427
428    /// v0.8: iterate the cross-frontier dependencies (those with
429    /// `vfr_id` set). Pre-v0.8 compile-time deps without `vfr_id`
430    /// are filtered out.
431    pub fn cross_frontier_deps(&self) -> impl Iterator<Item = &ProjectDependency> {
432        self.project
433            .dependencies
434            .iter()
435            .filter(|d| d.is_cross_frontier())
436    }
437
438    /// v0.8: look up the dependency record for a specific `vfr_id`.
439    /// Returns `None` if no matching cross-frontier dep is declared.
440    pub fn dep_for_vfr(&self, vfr_id: &str) -> Option<&ProjectDependency> {
441        self.cross_frontier_deps()
442            .find(|d| d.vfr_id.as_deref() == Some(vfr_id))
443    }
444
445    /// v0.49.3: build a reverse-dependency index from the forward
446    /// `links: Vec<Link>` data on each finding. The forward direction
447    /// (which findings does this finding depend on?) is O(1) per
448    /// finding because it's just `f.links`. The reverse direction
449    /// (which findings depend on this finding?) previously required
450    /// scanning every finding for every query — O(N×L). This index
451    /// flips that to O(1) lookup once built.
452    ///
453    /// Cost to build: O(N×L) one-time scan over all findings × links.
454    /// At 48 findings × ~3 links each (the legacy BBB proof fixture),
455    /// that's ~150 hash-insert operations and microseconds. At
456    /// 100K findings × 10 links, it's still well under a second.
457    ///
458    /// Used by retraction-impact queries (serve.rs), cascade
459    /// computation, and any consumer that needs to walk the dependent
460    /// graph rather than the dependency graph. The index is not
461    /// serialized — it's a derived structure that callers build when
462    /// they need it and drop when they don't.
463    #[must_use]
464    pub fn build_reverse_dep_index(&self) -> ReverseDepIndex {
465        let mut map: std::collections::HashMap<String, Vec<String>> =
466            std::collections::HashMap::with_capacity(self.findings.len());
467        for f in &self.findings {
468            for link in &f.links {
469                map.entry(link.target.clone())
470                    .or_default()
471                    .push(f.id.clone());
472            }
473        }
474        // Stable sort each dependent list so two implementations of the
475        // index agree on ordering for any downstream serialization.
476        for v in map.values_mut() {
477            v.sort();
478            v.dedup();
479        }
480        ReverseDepIndex { map }
481    }
482}
483
484/// v0.49.3: reverse-dependency index built from a Project's forward
485/// `links` graph. Maps `finding_id → [dependent_finding_id, …]` so a
486/// "what depends on X?" lookup is O(1) instead of O(N×L).
487///
488/// Construct via `Project::build_reverse_dep_index`. The index is a
489/// snapshot — it does not auto-update if the Project mutates after.
490/// For long-lived consumers that mutate state, rebuild after each
491/// reduce step.
492#[derive(Debug, Clone, Default)]
493pub struct ReverseDepIndex {
494    map: std::collections::HashMap<String, Vec<String>>,
495}
496
497impl ReverseDepIndex {
498    /// Findings whose forward `links` list a target with this id.
499    /// Empty slice if nothing depends on this finding (or if the id
500    /// isn't in the index at all).
501    #[must_use]
502    pub fn dependents_of(&self, finding_id: &str) -> &[String] {
503        self.map
504            .get(finding_id)
505            .map(|v| v.as_slice())
506            .unwrap_or(&[])
507    }
508
509    /// Total number of dependent edges in the index. Useful for
510    /// quick sanity checks and metric reporting.
511    #[must_use]
512    pub fn edge_count(&self) -> usize {
513        self.map.values().map(Vec::len).sum()
514    }
515
516    /// Number of distinct findings that have at least one dependent.
517    #[must_use]
518    pub fn target_count(&self) -> usize {
519        self.map.len()
520    }
521
522    /// Iterate `(target_finding_id, dependents)` pairs. Order is
523    /// HashMap-iteration-order, not stable across runs; sort if a
524    /// consumer needs determinism.
525    pub fn iter(&self) -> impl Iterator<Item = (&String, &Vec<String>)> {
526        self.map.iter()
527    }
528}
529
530#[cfg(test)]
531mod cross_frontier_dep_tests {
532    use super::*;
533
534    fn dep_local(name: &str) -> ProjectDependency {
535        ProjectDependency {
536            name: name.into(),
537            source: "local".into(),
538            version: None,
539            pinned_hash: None,
540            vfr_id: None,
541            locator: None,
542            pinned_snapshot_hash: None,
543        }
544    }
545
546    fn dep_cross(vfr: &str) -> ProjectDependency {
547        ProjectDependency {
548            name: "ext".into(),
549            source: "vela.hub".into(),
550            version: None,
551            pinned_hash: None,
552            vfr_id: Some(vfr.into()),
553            locator: Some(format!("https://example.test/{vfr}.json")),
554            pinned_snapshot_hash: Some("a".repeat(64)),
555        }
556    }
557
558    #[test]
559    fn is_cross_frontier_only_when_vfr_id_set() {
560        assert!(!dep_local("x").is_cross_frontier());
561        assert!(dep_cross("vfr_abc").is_cross_frontier());
562    }
563
564    #[test]
565    fn dep_serializes_byte_identical_when_v0_8_fields_absent() {
566        // Backward compat: a pre-v0.8 dep round-trips through serde
567        // without emitting any of the new optional v0.8 fields.
568        let d = dep_local("legacy");
569        let s = serde_json::to_string(&d).unwrap();
570        assert!(!s.contains("vfr_id"));
571        assert!(!s.contains("locator"));
572        assert!(!s.contains("pinned_snapshot_hash"));
573    }
574}
575
576#[cfg(test)]
577mod reverse_dep_index_tests {
578    use super::*;
579    use crate::bundle::{
580        Assertion, Author, Conditions, Confidence, ConfidenceKind, ConfidenceMethod, Evidence,
581        Extraction, FindingBundle, Flags, Link, Provenance,
582    };
583
584    fn synth_finding(idx: usize, links: Vec<Link>) -> FindingBundle {
585        let assertion = Assertion {
586            text: format!("Synthetic finding {idx}"),
587            assertion_type: "mechanism".into(),
588            entities: vec![],
589            relation: None,
590            direction: None,
591            causal_claim: None,
592            causal_evidence_grade: None,
593        };
594        let evidence = Evidence {
595            evidence_type: "experimental".into(),
596            model_system: "test".into(),
597            species: None,
598            method: "test".into(),
599            sample_size: None,
600            effect_size: None,
601            p_value: None,
602            replicated: false,
603            replication_count: None,
604            evidence_spans: vec![],
605        };
606        let conditions = Conditions {
607            text: "test".into(),
608            species_verified: vec![],
609            species_unverified: vec![],
610            in_vitro: false,
611            in_vivo: false,
612            human_data: false,
613            clinical_trial: false,
614            concentration_range: None,
615            duration: None,
616            age_group: None,
617            cell_type: None,
618        };
619        let confidence = Confidence {
620            kind: ConfidenceKind::FrontierEpistemic,
621            score: 0.5,
622            basis: "test".into(),
623            method: ConfidenceMethod::LlmInitial,
624            components: None,
625            extraction_confidence: 0.9,
626        };
627        let provenance = Provenance {
628            source_type: "published_paper".into(),
629            doi: Some(format!("10.0000/reverse-dep-index-test.{idx:04}")),
630            pmid: None,
631            pmc: None,
632            openalex_id: None,
633            url: None,
634            title: format!("Synthetic test paper {idx}"),
635            authors: vec![Author {
636                name: "T".into(),
637                orcid: None,
638            }],
639            year: None,
640            journal: None,
641            license: None,
642            publisher: None,
643            funders: vec![],
644            extraction: Extraction::default(),
645            review: None,
646            citation_count: None,
647        };
648        let flags = Flags::default();
649        let mut bundle = FindingBundle::new(
650            assertion, evidence, conditions, confidence, provenance, flags,
651        );
652        bundle.links = links;
653        bundle
654    }
655
656    fn link_to(target: &str) -> Link {
657        Link {
658            target: target.into(),
659            link_type: "supports".into(),
660            note: "test".into(),
661            inferred_by: "test".into(),
662            created_at: "2026-05-02T00:00:00Z".into(),
663            mechanism: None,
664        }
665    }
666
667    /// Build a chain: 0 → 1 → 2 → 3 (each finding supports the next).
668    /// Then dependents_of(2) should return [1], dependents_of(1) → [0],
669    /// dependents_of(3) → [2], dependents_of(0) → [] (root, nothing
670    /// depends on it).
671    #[test]
672    fn dependents_of_returns_correct_set_for_simple_chain() {
673        let f3 = synth_finding(3, vec![]);
674        let f2 = synth_finding(2, vec![link_to(&f3.id)]);
675        let f1 = synth_finding(1, vec![link_to(&f2.id)]);
676        let f0 = synth_finding(0, vec![link_to(&f1.id)]);
677
678        let mut project = assemble("chain", vec![], 0, 0, "test");
679        project.findings = vec![f0.clone(), f1.clone(), f2.clone(), f3.clone()];
680
681        let idx = project.build_reverse_dep_index();
682        assert_eq!(idx.dependents_of(&f3.id), &[f2.id.clone()]);
683        assert_eq!(idx.dependents_of(&f2.id), &[f1.id.clone()]);
684        assert_eq!(idx.dependents_of(&f1.id), &[f0.id.clone()]);
685        assert!(idx.dependents_of(&f0.id).is_empty());
686        // Edge count = 3 (one per non-root link).
687        assert_eq!(idx.edge_count(), 3);
688        // Target count = 3 (f1, f2, f3 each have a dependent).
689        assert_eq!(idx.target_count(), 3);
690    }
691
692    /// Multiple findings depending on the same target should produce a
693    /// sorted, deduped dependent list.
694    #[test]
695    fn dependents_of_dedups_and_sorts() {
696        let target = synth_finding(99, vec![]);
697        let target_id = target.id.clone();
698        // f1, f2, f3 all link to target. Plus f1 has TWO links to
699        // target (to test dedup).
700        let f1 = synth_finding(1, vec![link_to(&target_id), link_to(&target_id)]);
701        let f2 = synth_finding(2, vec![link_to(&target_id)]);
702        let f3 = synth_finding(3, vec![link_to(&target_id)]);
703
704        let mut project = assemble("multi-dependents", vec![], 0, 0, "test");
705        project.findings = vec![target, f1.clone(), f2.clone(), f3.clone()];
706
707        let idx = project.build_reverse_dep_index();
708        let mut expected = vec![f1.id.clone(), f2.id.clone(), f3.id.clone()];
709        expected.sort();
710        assert_eq!(idx.dependents_of(&target_id), expected.as_slice());
711    }
712
713    /// A finding id with no dependents — and an id that doesn't exist
714    /// in the project at all — both return an empty slice.
715    #[test]
716    fn dependents_of_unknown_or_orphan_returns_empty() {
717        let lonely = synth_finding(7, vec![]);
718        let mut project = assemble("orphan", vec![], 0, 0, "test");
719        project.findings = vec![lonely.clone()];
720
721        let idx = project.build_reverse_dep_index();
722        assert!(idx.dependents_of(&lonely.id).is_empty());
723        assert!(idx.dependents_of("vf_does_not_exist").is_empty());
724    }
725
726    /// Empty project → empty index.
727    #[test]
728    fn empty_project_yields_empty_index() {
729        let project = assemble("empty", vec![], 0, 0, "test");
730        let idx = project.build_reverse_dep_index();
731        assert_eq!(idx.edge_count(), 0);
732        assert_eq!(idx.target_count(), 0);
733    }
734}
735
736/// Recompute derived frontier statistics after mechanical edits.
737pub fn recompute_stats(project: &mut Project) {
738    let total_links: usize = project.findings.iter().map(|b| b.links.len()).sum();
739
740    let mut link_types: HashMap<String, usize> = HashMap::new();
741    for b in &project.findings {
742        for l in &b.links {
743            *link_types.entry(l.link_type.clone()).or_default() += 1;
744        }
745    }
746
747    let mut categories: HashMap<String, usize> = HashMap::new();
748    for b in &project.findings {
749        *categories
750            .entry(b.assertion.assertion_type.clone())
751            .or_default() += 1;
752    }
753
754    // v0.36.2: count findings with at least one successful replication
755    // recorded in `project.replications`. The legacy
756    // `evidence.replicated` scalar is a fall-through for findings
757    // pre-v0.32 that have no `Replication` records yet — same shape as
758    // `Project::compute_confidence_for`. A finding is "replicated" if
759    // EITHER the structured collection holds a `replicated` outcome
760    // for it, OR (no records exist at all) the legacy flag is set.
761    let mut targets_with_success: HashSet<&str> = HashSet::new();
762    let mut targets_with_any_record: HashSet<&str> = HashSet::new();
763    for r in &project.replications {
764        targets_with_any_record.insert(r.target_finding.as_str());
765        if r.outcome == "replicated" {
766            targets_with_success.insert(r.target_finding.as_str());
767        }
768    }
769    let replicated = project
770        .findings
771        .iter()
772        .filter(|b| {
773            if targets_with_any_record.contains(b.id.as_str()) {
774                targets_with_success.contains(b.id.as_str())
775            } else {
776                b.evidence.replicated
777            }
778        })
779        .count();
780    let avg_confidence = if project.findings.is_empty() {
781        0.0
782    } else {
783        (project
784            .findings
785            .iter()
786            .map(|b| b.confidence.score)
787            .sum::<f64>()
788            / project.findings.len() as f64
789            * 1000.0)
790            .round()
791            / 1000.0
792    };
793
794    project.stats.findings = project.findings.len();
795    project.stats.links = total_links;
796    project.stats.replicated = replicated;
797    project.stats.unreplicated = project.findings.len().saturating_sub(replicated);
798    project.stats.avg_confidence = avg_confidence;
799    project.stats.gaps = project.findings.iter().filter(|b| b.flags.gap).count();
800    project.stats.negative_space = project
801        .findings
802        .iter()
803        .filter(|b| b.flags.negative_space)
804        .count();
805    project.stats.contested = project
806        .findings
807        .iter()
808        .filter(|b| b.flags.contested)
809        .count();
810    project.stats.categories = categories;
811    project.stats.link_types = link_types;
812    let reviewed_from_legacy = project
813        .findings
814        .iter()
815        .filter_map(|b| {
816            b.provenance
817                .review
818                .as_ref()
819                .filter(|r| r.reviewed)
820                .map(|_| b.id.clone())
821        })
822        .collect::<HashSet<_>>();
823    let reviewed_from_events = project
824        .events
825        .iter()
826        .filter(|event| {
827            matches!(
828                event.kind.as_str(),
829                "finding.reviewed"
830                    | "finding.noted"
831                    | "finding.caveated"
832                    | "finding.confidence_revised"
833                    | "finding.rejected"
834                    | "finding.retracted"
835            )
836        })
837        .filter(|event| {
838            project
839                .findings
840                .iter()
841                .any(|finding| finding.id == event.target.id)
842        })
843        .map(|event| event.target.id.clone())
844        .collect::<HashSet<_>>();
845    let reviewed_ids = reviewed_from_legacy.union(&reviewed_from_events).count();
846    project.stats.human_reviewed = reviewed_ids;
847    let canonical_review_events = project
848        .events
849        .iter()
850        .filter(|event| {
851            matches!(
852                event.kind.as_str(),
853                "finding.reviewed"
854                    | "finding.noted"
855                    | "finding.caveated"
856                    | "finding.rejected"
857                    | "finding.retracted"
858                    | "finding.asserted"
859            )
860        })
861        .count();
862    project.stats.review_event_count = canonical_review_events + project.review_events.len();
863    project.stats.confidence_update_count = project
864        .events
865        .iter()
866        .filter(|event| event.kind == "finding.confidence_revised")
867        .count()
868        + project.confidence_updates.len();
869    project.stats.event_count = project.events.len();
870    project.stats.source_count = project.sources.len();
871    project.stats.evidence_atom_count = project.evidence_atoms.len();
872    project.stats.condition_record_count = project.condition_records.len();
873    project.stats.proposal_count = project.proposals.len();
874    project.stats.confidence_distribution = ConfidenceDistribution {
875        high_gt_80: project
876            .findings
877            .iter()
878            .filter(|b| b.confidence.score > 0.8)
879            .count(),
880        medium_60_80: project
881            .findings
882            .iter()
883            .filter(|b| (0.6..=0.8).contains(&b.confidence.score))
884            .count(),
885        low_lt_60: project
886            .findings
887            .iter()
888            .filter(|b| b.confidence.score < 0.6)
889            .count(),
890    };
891}
892
893#[cfg(test)]
894mod tests {
895    use super::*;
896    use crate::bundle::*;
897
898    fn make_finding(
899        id: &str,
900        score: f64,
901        assertion_type: &str,
902        replicated: bool,
903        gap: bool,
904    ) -> FindingBundle {
905        FindingBundle {
906            id: id.into(),
907            version: 1,
908            previous_version: None,
909            assertion: Assertion {
910                text: format!("Finding {id}"),
911                assertion_type: assertion_type.into(),
912                entities: vec![],
913                relation: None,
914                direction: None,
915                causal_claim: None,
916                causal_evidence_grade: None,
917            },
918            evidence: Evidence {
919                evidence_type: "experimental".into(),
920                model_system: String::new(),
921                species: None,
922                method: String::new(),
923                sample_size: None,
924                effect_size: None,
925                p_value: None,
926                replicated,
927                replication_count: None,
928                evidence_spans: vec![],
929            },
930            conditions: Conditions {
931                text: String::new(),
932                species_verified: vec![],
933                species_unverified: vec![],
934                in_vitro: false,
935                in_vivo: false,
936                human_data: false,
937                clinical_trial: false,
938                concentration_range: None,
939                duration: None,
940                age_group: None,
941                cell_type: None,
942            },
943            confidence: Confidence::raw(score, "seeded prior", 0.85),
944            provenance: Provenance {
945                source_type: "published_paper".into(),
946                doi: None,
947                pmid: None,
948                pmc: None,
949                openalex_id: None,
950                url: None,
951                title: "Test".into(),
952                authors: vec![],
953                year: Some(2024),
954                journal: None,
955                license: None,
956                publisher: None,
957                funders: vec![],
958                extraction: Extraction::default(),
959                review: None,
960                citation_count: None,
961            },
962            flags: Flags {
963                gap,
964                negative_space: false,
965                contested: false,
966                retracted: false,
967                declining: false,
968                gravity_well: false,
969                review_state: None,
970                superseded: false,
971                signature_threshold: None,
972                jointly_accepted: false,
973            },
974            links: vec![],
975            annotations: vec![],
976            attachments: vec![],
977            created: String::new(),
978            updated: None,
979
980            access_tier: crate::access_tier::AccessTier::Public,
981        }
982    }
983
984    #[test]
985    fn empty_frontier() {
986        let c = assemble("test", vec![], 0, 0, "empty");
987        assert_eq!(c.stats.findings, 0);
988        assert_eq!(c.stats.links, 0);
989        assert_eq!(c.stats.avg_confidence, 0.0);
990        assert_eq!(c.stats.replicated, 0);
991        assert_eq!(c.stats.unreplicated, 0);
992        assert_eq!(c.project.name, "test");
993        assert_eq!(c.project.description, "empty");
994    }
995
996    #[test]
997    fn findings_count() {
998        let bundles = vec![
999            make_finding("f1", 0.8, "mechanism", false, false),
1000            make_finding("f2", 0.6, "therapeutic", true, false),
1001            make_finding("f3", 0.9, "mechanism", false, true),
1002        ];
1003        let c = assemble("test", bundles, 5, 1, "desc");
1004        assert_eq!(c.stats.findings, 3);
1005        assert_eq!(c.project.papers_processed, 5);
1006        assert_eq!(c.project.errors, 1);
1007    }
1008
1009    #[test]
1010    fn replicated_unreplicated_counts() {
1011        let bundles = vec![
1012            make_finding("f1", 0.8, "mechanism", true, false),
1013            make_finding("f2", 0.6, "mechanism", true, false),
1014            make_finding("f3", 0.9, "mechanism", false, false),
1015        ];
1016        let c = assemble("test", bundles, 3, 0, "desc");
1017        assert_eq!(c.stats.replicated, 2);
1018        assert_eq!(c.stats.unreplicated, 1);
1019    }
1020
1021    #[test]
1022    fn category_counts() {
1023        let bundles = vec![
1024            make_finding("f1", 0.8, "mechanism", false, false),
1025            make_finding("f2", 0.6, "mechanism", false, false),
1026            make_finding("f3", 0.9, "therapeutic", false, false),
1027        ];
1028        let c = assemble("test", bundles, 3, 0, "desc");
1029        assert_eq!(*c.stats.categories.get("mechanism").unwrap(), 2);
1030        assert_eq!(*c.stats.categories.get("therapeutic").unwrap(), 1);
1031    }
1032
1033    #[test]
1034    fn link_counting() {
1035        let mut f1 = make_finding("f1", 0.8, "mechanism", false, false);
1036        f1.add_link("f2", "extends", "shared entity");
1037        f1.add_link("f3", "contradicts", "opposite direction");
1038        let f2 = make_finding("f2", 0.7, "mechanism", false, false);
1039        let c = assemble("test", vec![f1, f2], 2, 0, "desc");
1040        assert_eq!(c.stats.links, 2);
1041        assert_eq!(*c.stats.link_types.get("extends").unwrap(), 1);
1042        assert_eq!(*c.stats.link_types.get("contradicts").unwrap(), 1);
1043    }
1044
1045    #[test]
1046    fn avg_confidence() {
1047        let bundles = vec![
1048            make_finding("f1", 0.8, "mechanism", false, false),
1049            make_finding("f2", 0.6, "mechanism", false, false),
1050        ];
1051        let c = assemble("test", bundles, 2, 0, "desc");
1052        assert!((c.stats.avg_confidence - 0.7).abs() < 0.01);
1053    }
1054
1055    #[test]
1056    fn confidence_distribution_buckets() {
1057        let bundles = vec![
1058            make_finding("f1", 0.9, "mechanism", false, false), // high
1059            make_finding("f2", 0.85, "mechanism", false, false), // high
1060            make_finding("f3", 0.7, "mechanism", false, false), // medium
1061            make_finding("f4", 0.6, "mechanism", false, false), // medium (0.6 is in 0.6..=0.8)
1062            make_finding("f5", 0.4, "mechanism", false, false), // low
1063        ];
1064        let c = assemble("test", bundles, 5, 0, "desc");
1065        assert_eq!(c.stats.confidence_distribution.high_gt_80, 2);
1066        assert_eq!(c.stats.confidence_distribution.medium_60_80, 2);
1067        assert_eq!(c.stats.confidence_distribution.low_lt_60, 1);
1068    }
1069
1070    #[test]
1071    fn gaps_counted() {
1072        let bundles = vec![
1073            make_finding("f1", 0.8, "mechanism", false, true),
1074            make_finding("f2", 0.6, "mechanism", false, false),
1075            make_finding("f3", 0.9, "mechanism", false, true),
1076        ];
1077        let c = assemble("test", bundles, 3, 0, "desc");
1078        assert_eq!(c.stats.gaps, 2);
1079    }
1080
1081    #[test]
1082    fn metadata_preserved() {
1083        let c = assemble("my frontier", vec![], 10, 2, "A description");
1084        assert_eq!(c.project.name, "my frontier");
1085        assert_eq!(c.project.description, "A description");
1086        assert_eq!(c.project.papers_processed, 10);
1087        assert_eq!(c.project.errors, 2);
1088        assert_eq!(c.vela_version, VELA_SCHEMA_VERSION);
1089        assert!(!c.project.compiled_at.is_empty());
1090    }
1091}