Skip to main content

vela_protocol/
project.rs

1//! Stage 5: ASSEMBLE — build the project with stats and metadata.
2
3use std::collections::{HashMap, HashSet};
4
5use chrono::Utc;
6use serde::{Deserialize, Serialize};
7
8use crate::bundle::{ConfidenceUpdate, FindingBundle, ReviewEvent};
9use crate::events::StateEvent;
10use crate::proposals::{ProofState, StateProposal};
11use crate::sign::{ActorRecord, SignedEnvelope};
12use crate::sources::{ConditionRecord, EvidenceAtom, SourceRecord};
13
14/// A dependency on another project (like a Cargo dependency for science).
15///
16/// v0.8 extends this with three optional fields that turn it into a
17/// **cross-frontier dependency**: when `vfr_id` is set, the entry pins
18/// a remote frontier by its content-addressed id and a snapshot hash.
19/// `Link.target` values of the form `vf_<id>@vfr_<id>` resolve through
20/// here. Without `vfr_id`, the entry behaves as a pre-v0.8 compile-time
21/// dependency record.
22#[derive(Debug, Clone, Serialize, Deserialize, PartialEq, Eq)]
23pub struct ProjectDependency {
24    pub name: String,
25    pub source: String,
26    pub version: Option<String>,
27    pub pinned_hash: Option<String>,
28    /// v0.8: content-addressed id of the dependent frontier.
29    #[serde(default, skip_serializing_if = "Option::is_none")]
30    pub vfr_id: Option<String>,
31    /// v0.8: where to fetch the dependent frontier file from
32    /// (typically an `https://…` URL pointing at raw JSON).
33    #[serde(default, skip_serializing_if = "Option::is_none")]
34    pub locator: Option<String>,
35    /// v0.8: SHA-256 of the canonical snapshot the dependent commits
36    /// to. Strict pull verifies the fetched dependency's actual
37    /// `snapshot_hash` matches this value before satisfying any link.
38    #[serde(default, skip_serializing_if = "Option::is_none")]
39    pub pinned_snapshot_hash: Option<String>,
40}
41
42impl ProjectDependency {
43    /// True if this entry declares a cross-frontier dependency
44    /// (`vfr_id` is set). Pre-v0.8 entries return `false`.
45    pub fn is_cross_frontier(&self) -> bool {
46        self.vfr_id.is_some()
47    }
48}
49
50#[derive(Debug, Serialize, Deserialize)]
51pub struct Project {
52    pub vela_version: String,
53    pub schema: String,
54    /// Stable Vela-addressable frontier ID, derived from a `frontier.created`
55    /// genesis event hash. Optional for backward compatibility with v0.2
56    /// frontiers; new v0.3 frontiers populate it on `assemble()`.
57    #[serde(default, skip_serializing_if = "Option::is_none")]
58    pub frontier_id: Option<String>,
59    #[serde(rename = "frontier")]
60    pub project: ProjectMeta,
61    pub stats: ProjectStats,
62    pub findings: Vec<FindingBundle>,
63    /// Source artifacts that produced evidence-bearing units.
64    #[serde(default)]
65    pub sources: Vec<SourceRecord>,
66    /// Materialized source-grounded evidence units linked to findings.
67    #[serde(default)]
68    pub evidence_atoms: Vec<EvidenceAtom>,
69    /// Materialized condition boundaries used to avoid claim overgeneralization.
70    #[serde(default)]
71    pub condition_records: Vec<ConditionRecord>,
72    /// Append-only log of review events (content-addressed).
73    #[serde(default, skip_serializing_if = "Vec::is_empty")]
74    pub review_events: Vec<ReviewEvent>,
75    /// Append-only log of confidence updates.
76    #[serde(default, skip_serializing_if = "Vec::is_empty")]
77    pub confidence_updates: Vec<ConfidenceUpdate>,
78    /// Canonical append-only event log for replayable frontier state.
79    #[serde(default)]
80    pub events: Vec<StateEvent>,
81    /// Portable pending/applied proposal records for proposal-first writes.
82    #[serde(default)]
83    pub proposals: Vec<StateProposal>,
84    /// Frontier-local proof freshness projection.
85    #[serde(default)]
86    pub proof_state: ProofState,
87    /// Cryptographic signatures for findings (Ed25519).
88    #[serde(default)]
89    pub signatures: Vec<SignedEnvelope>,
90    /// Registered actor identities, mapping a stable actor.id to an
91    /// Ed25519 public key. Phase M (v0.4): once an actor is registered,
92    /// any canonical event referencing that actor.id under
93    /// `--strict` must carry a verifiable Ed25519 signature.
94    #[serde(default, skip_serializing_if = "Vec::is_empty")]
95    pub actors: Vec<ActorRecord>,
96    /// v0.32: Replication attempts as first-class kernel objects. Each
97    /// `Replication` is content-addressed (`vrep_<hash>`) over its
98    /// target finding, attempting actor, conditions, and outcome. Replaces
99    /// the prior scalar pattern (`Evidence.replicated: bool` +
100    /// `Evidence.replication_count: u32`) which couldn't represent
101    /// independent attempts under different conditions. The legacy
102    /// scalar fields are preserved on `Evidence` for backward
103    /// compatibility; v0.32+ frontiers can derive them from this
104    /// collection.
105    #[serde(default, skip_serializing_if = "Vec::is_empty")]
106    pub replications: Vec<crate::bundle::Replication>,
107    /// v0.33: Datasets as first-class kernel objects. A `vd_<hash>`
108    /// captures a versioned, content-addressed reference to data that
109    /// anchors empirical claims. Distinct from `Provenance` (which
110    /// describes the paper) — a single paper may publish multiple
111    /// datasets, and a single dataset may be reused across many papers.
112    #[serde(default, skip_serializing_if = "Vec::is_empty")]
113    pub datasets: Vec<crate::bundle::Dataset>,
114    /// v0.33: Code artifacts as first-class kernel objects. A `vc_<hash>`
115    /// is a content-addressed pointer at a specific region of source
116    /// code at a specific git commit. Claims can reference the code
117    /// that produced them, not only a repository name in prose.
118    #[serde(default, skip_serializing_if = "Vec::is_empty")]
119    pub code_artifacts: Vec<crate::bundle::CodeArtifact>,
120    /// Generic content-addressed artifacts: protocols, trial registry
121    /// records, supplements, notebooks, source files, and dataset
122    /// manifests that need a durable byte or pointer commitment.
123    #[serde(default, skip_serializing_if = "Vec::is_empty")]
124    pub artifacts: Vec<crate::bundle::Artifact>,
125    /// v0.34: Predictions as first-class kernel objects. A `vpred_<hash>`
126    /// is a falsifiable claim about a future observation, scoped to
127    /// existing findings and tied to a registered actor. Calibration
128    /// scoring runs over the resolved subset.
129    #[serde(default, skip_serializing_if = "Vec::is_empty")]
130    pub predictions: Vec<crate::bundle::Prediction>,
131    /// v0.34: Resolutions as first-class kernel objects. A `vres_<hash>`
132    /// closes out a Prediction by recording what actually happened.
133    /// Together with `Project.predictions`, this is the kernel's
134    /// epistemic accountability ledger.
135    #[serde(default, skip_serializing_if = "Vec::is_empty")]
136    pub resolutions: Vec<crate::bundle::Resolution>,
137    /// v0.39: Federation peer registry. Each `PeerHub` declares
138    /// another hub this frontier knows about — id, HTTPS URL, and the
139    /// Ed25519 pubkey that peer signs their manifests with. Adding a
140    /// peer doesn't yet trust their state; it just establishes who we
141    /// know about. The actual sync runtime ships in v0.39.1+.
142    #[serde(default, skip_serializing_if = "Vec::is_empty")]
143    pub peers: Vec<crate::federation::PeerHub>,
144    /// v0.50: Trajectories as first-class kernel objects. A
145    /// `vtr_<hash>` records the ordered search path that produced (or
146    /// did not produce) a finding — hypotheses considered, branches
147    /// tried, branches ruled out and why. The eighth essay primitive,
148    /// "deposited last and most thinly because labs have real
149    /// reasons not to expose dead ends," but represented structurally
150    /// so an agent that does choose to deposit can prevent the next
151    /// agent from re-deriving a ruled-out branch.
152    #[serde(default, skip_serializing_if = "Vec::is_empty")]
153    pub trajectories: Vec<crate::bundle::Trajectory>,
154    /// v0.49: NegativeResults as first-class kernel objects. A `vnr_<hash>`
155    /// records an experiment or trial that did not support its
156    /// hypothesis — registered-trial with power and effect-size bounds,
157    /// or exploratory wet-lab dead end with the (reagent, condition,
158    /// observed outcome) tuple. The substrate primitive that lets
159    /// "absence of evidence" and "evidence of absence" round-trip
160    /// distinctly through downstream confidence math instead of being
161    /// flattened into a private "we tried that, it didn't work."
162    #[serde(default, skip_serializing_if = "Vec::is_empty")]
163    pub negative_results: Vec<crate::bundle::NegativeResult>,
164}
165
166#[derive(Debug, Serialize, Deserialize)]
167pub struct ProjectMeta {
168    pub name: String,
169    pub description: String,
170    pub compiled_at: String,
171    pub compiler: String,
172    pub papers_processed: usize,
173    pub errors: usize,
174    #[serde(default)]
175    pub dependencies: Vec<ProjectDependency>,
176}
177
178#[derive(Debug, Serialize, Deserialize, Default)]
179pub struct ProjectStats {
180    pub findings: usize,
181    pub links: usize,
182    pub replicated: usize,
183    pub unreplicated: usize,
184    pub avg_confidence: f64,
185    pub gaps: usize,
186    pub negative_space: usize,
187    pub contested: usize,
188    pub categories: HashMap<String, usize>,
189    pub link_types: HashMap<String, usize>,
190    pub human_reviewed: usize,
191    /// Number of review events in this frontier.
192    #[serde(default)]
193    pub review_event_count: usize,
194    /// Number of confidence updates in this frontier.
195    #[serde(default)]
196    pub confidence_update_count: usize,
197    /// Number of canonical state events in this frontier.
198    #[serde(default)]
199    pub event_count: usize,
200    /// Number of source records in the frontier source registry.
201    #[serde(default)]
202    pub source_count: usize,
203    /// Number of materialized evidence atoms in the frontier.
204    #[serde(default)]
205    pub evidence_atom_count: usize,
206    /// Number of materialized condition records in the frontier.
207    #[serde(default)]
208    pub condition_record_count: usize,
209    /// Number of persisted proposals in the frontier.
210    #[serde(default)]
211    pub proposal_count: usize,
212    pub confidence_distribution: ConfidenceDistribution,
213}
214
215#[derive(Debug, Serialize, Deserialize, Default)]
216pub struct ConfidenceDistribution {
217    pub high_gt_80: usize,
218    pub medium_60_80: usize,
219    pub low_lt_60: usize,
220}
221
222/// Schema and compiler defaults for the current Vela protocol release.
223pub const VELA_SCHEMA_URL: &str = "https://vela.science/schema/finding-bundle/v0.10.0";
224pub const VELA_SCHEMA_VERSION: &str = "0.10.0";
225pub const VELA_COMPILER_VERSION: &str = concat!("vela/", env!("CARGO_PKG_VERSION"));
226
227/// Derive a `vfr_<hash>` frontier ID from frontier metadata. Used as a
228/// fallback for legacy frontiers without a `frontier.created` genesis
229/// event; v0.4+ frontiers derive from the genesis event itself via
230/// `frontier_id_from_genesis`.
231#[must_use]
232pub fn derive_frontier_id_from_meta(meta: &ProjectMeta) -> String {
233    let preimage = serde_json::json!({
234        "name": meta.name,
235        "compiled_at": meta.compiled_at,
236        "compiler": meta.compiler,
237    });
238    let bytes = crate::canonical::to_canonical_bytes(&preimage).unwrap_or_default();
239    use sha2::{Digest, Sha256};
240    format!("vfr_{}", &hex::encode(Sha256::digest(bytes))[..16])
241}
242
243/// Derive a `vfr_<hash>` frontier ID from the canonical hash of the
244/// `frontier.created` genesis event. Returns `None` if `events[0]` is
245/// absent or not a `frontier.created` event (legacy frontiers fall back
246/// to meta-derivation via `derive_frontier_id_from_meta`).
247///
248/// The preimage shape matches `event_id` exactly so the same canonical
249/// rule produces both the event's `vev_…` and the frontier's `vfr_…`
250/// from the same logical content. Doctrine line: a frontier IS what the
251/// `frontier.created` event creates.
252#[must_use]
253pub fn frontier_id_from_genesis(events: &[crate::events::StateEvent]) -> Option<String> {
254    let genesis = events.first()?;
255    if genesis.kind != "frontier.created" {
256        return None;
257    }
258    let preimage = serde_json::json!({
259        "schema": genesis.schema,
260        "kind": genesis.kind,
261        "target": genesis.target,
262        "actor": genesis.actor,
263        "timestamp": genesis.timestamp,
264        "reason": genesis.reason,
265        "before_hash": genesis.before_hash,
266        "after_hash": genesis.after_hash,
267        "payload": genesis.payload,
268        "caveats": genesis.caveats,
269    });
270    let bytes = crate::canonical::to_canonical_bytes(&preimage).ok()?;
271    use sha2::{Digest, Sha256};
272    Some(format!("vfr_{}", &hex::encode(Sha256::digest(bytes))[..16]))
273}
274
275/// Construct the `frontier.created` canonical event for a freshly
276/// compiled frontier. The event becomes `events[0]` and the frontier_id
277/// derives from its canonical hash.
278///
279/// Targets `frontier:<name>` (not `finding:…`) so replay's orphan-target
280/// detection does not flag it; the genesis event carries identity, not a
281/// finding mutation.
282fn build_genesis_event(name: &str, compiled_at: &str, creator: &str) -> crate::events::StateEvent {
283    use crate::events::{EVENT_SCHEMA, NULL_HASH, StateActor, StateEvent, StateTarget};
284    let mut event = StateEvent {
285        schema: EVENT_SCHEMA.to_string(),
286        id: String::new(),
287        kind: "frontier.created".to_string(),
288        target: StateTarget {
289            r#type: "frontier".to_string(),
290            id: name.to_string(),
291        },
292        actor: StateActor {
293            id: creator.to_string(),
294            r#type: "frontier".to_string(),
295        },
296        timestamp: compiled_at.to_string(),
297        reason: "frontier compiled".to_string(),
298        before_hash: NULL_HASH.to_string(),
299        after_hash: NULL_HASH.to_string(),
300        payload: serde_json::json!({
301            "name": name,
302            "creator": creator,
303            "schema_version": VELA_SCHEMA_VERSION,
304            "compiled_at": compiled_at,
305        }),
306        caveats: vec![],
307        signature: None,
308        schema_artifact_id: None,
309    };
310    event.id = crate::events::compute_event_id(&event);
311    event
312}
313
314pub fn assemble(
315    name: &str,
316    bundles: Vec<FindingBundle>,
317    papers_processed: usize,
318    errors: usize,
319    description: &str,
320) -> Project {
321    let compiled_at = Utc::now().to_rfc3339();
322    let meta = ProjectMeta {
323        name: name.to_string(),
324        description: description.to_string(),
325        compiled_at: compiled_at.clone(),
326        compiler: VELA_COMPILER_VERSION.to_string(),
327        papers_processed,
328        errors,
329        dependencies: Vec::new(),
330    };
331    // Phase J (v0.4): emit a `frontier.created` canonical event as
332    // events[0] and derive frontier_id from its canonical hash. The
333    // address primitive becomes doctrine-grounded — a frontier IS what
334    // the genesis event creates, not a convenience over its metadata.
335    let genesis = build_genesis_event(name, &compiled_at, VELA_COMPILER_VERSION);
336    let frontier_id = frontier_id_from_genesis(std::slice::from_ref(&genesis));
337    let mut project = Project {
338        vela_version: VELA_SCHEMA_VERSION.to_string(),
339        schema: VELA_SCHEMA_URL.to_string(),
340        frontier_id,
341        project: meta,
342        stats: ProjectStats::default(),
343        findings: bundles,
344        sources: Vec::new(),
345        evidence_atoms: Vec::new(),
346        condition_records: Vec::new(),
347        review_events: Vec::new(),
348        confidence_updates: Vec::new(),
349        events: vec![genesis],
350        proposals: Vec::new(),
351        proof_state: ProofState::default(),
352        signatures: Vec::new(),
353        actors: Vec::new(),
354        replications: Vec::new(),
355        datasets: Vec::new(),
356        code_artifacts: Vec::new(),
357        artifacts: Vec::new(),
358        predictions: Vec::new(),
359        resolutions: Vec::new(),
360        peers: Vec::new(),
361        negative_results: Vec::new(),
362        trajectories: Vec::new(),
363    };
364    crate::sources::materialize_project(&mut project);
365    project
366}
367
368impl Project {
369    /// Return the stable Vela-addressable frontier ID. Prefers the stored
370    /// field; if absent, derives from the `frontier.created` genesis
371    /// event in `events[0]`; if no genesis event is present, falls back
372    /// to meta-derivation (legacy v0.3 frontiers).
373    #[must_use]
374    pub fn frontier_id(&self) -> String {
375        if let Some(id) = self.frontier_id.clone() {
376            return id;
377        }
378        if let Some(id) = frontier_id_from_genesis(&self.events) {
379            return id;
380        }
381        derive_frontier_id_from_meta(&self.project)
382    }
383
384    /// Materialize the frontier_id field if absent. Idempotent.
385    pub fn ensure_frontier_id(&mut self) -> String {
386        if self.frontier_id.is_none() {
387            self.frontier_id = Some(self.frontier_id());
388        }
389        self.frontier_id.clone().unwrap()
390    }
391
392    /// v0.36.1: Compute frontier-epistemic confidence for a finding using
393    /// the v0.32 `Replication` collection as the authoritative source. A
394    /// failed replication subtracts from confidence; a successful one
395    /// adds to it; partials half-add. This closes the long-standing
396    /// "two sources of truth" between `Evidence.replicated` (the legacy
397    /// scalar set when a finding was first asserted) and
398    /// `Project.replications` (the kernel objects accumulated over time).
399    ///
400    /// Falls back to the legacy scalar only when no `Replication` record
401    /// targets this finding's id — preserves behavior for unmigrated
402    /// frontiers.
403    #[must_use]
404    pub fn compute_confidence_for(&self, bundle: &FindingBundle) -> crate::bundle::Confidence {
405        let (n_repl, n_failed, n_partial) =
406            crate::bundle::count_replication_outcomes(&self.replications, &bundle.id);
407        let (n_repl, n_failed, n_partial) = if n_repl + n_failed + n_partial == 0 {
408            let legacy = if bundle.evidence.replicated {
409                bundle.evidence.replication_count.unwrap_or(1)
410            } else {
411                0
412            };
413            (legacy, 0, 0)
414        } else {
415            (n_repl, n_failed, n_partial)
416        };
417        crate::bundle::compute_confidence_from_components(
418            &bundle.evidence,
419            &bundle.conditions,
420            bundle.flags.contested,
421            n_repl,
422            n_failed,
423            n_partial,
424            bundle.assertion.causal_claim,
425            bundle.assertion.causal_evidence_grade,
426        )
427    }
428
429    /// v0.8: iterate the cross-frontier dependencies (those with
430    /// `vfr_id` set). Pre-v0.8 compile-time deps without `vfr_id`
431    /// are filtered out.
432    pub fn cross_frontier_deps(&self) -> impl Iterator<Item = &ProjectDependency> {
433        self.project
434            .dependencies
435            .iter()
436            .filter(|d| d.is_cross_frontier())
437    }
438
439    /// v0.8: look up the dependency record for a specific `vfr_id`.
440    /// Returns `None` if no matching cross-frontier dep is declared.
441    pub fn dep_for_vfr(&self, vfr_id: &str) -> Option<&ProjectDependency> {
442        self.cross_frontier_deps()
443            .find(|d| d.vfr_id.as_deref() == Some(vfr_id))
444    }
445
446    /// v0.49.3: build a reverse-dependency index from the forward
447    /// `links: Vec<Link>` data on each finding. The forward direction
448    /// (which findings does this finding depend on?) is O(1) per
449    /// finding because it's just `f.links`. The reverse direction
450    /// (which findings depend on this finding?) previously required
451    /// scanning every finding for every query — O(N×L). This index
452    /// flips that to O(1) lookup once built.
453    ///
454    /// Cost to build: O(N×L) one-time scan over all findings × links.
455    /// At 48 findings × ~3 links each (the legacy BBB proof fixture),
456    /// that's ~150 hash-insert operations and microseconds. At
457    /// 100K findings × 10 links, it's still well under a second.
458    ///
459    /// Used by retraction-impact queries (serve.rs), cascade
460    /// computation, and any consumer that needs to walk the dependent
461    /// graph rather than the dependency graph. The index is not
462    /// serialized — it's a derived structure that callers build when
463    /// they need it and drop when they don't.
464    #[must_use]
465    pub fn build_reverse_dep_index(&self) -> ReverseDepIndex {
466        let mut map: std::collections::HashMap<String, Vec<String>> =
467            std::collections::HashMap::with_capacity(self.findings.len());
468        for f in &self.findings {
469            for link in &f.links {
470                map.entry(link.target.clone())
471                    .or_default()
472                    .push(f.id.clone());
473            }
474        }
475        // Stable sort each dependent list so two implementations of the
476        // index agree on ordering for any downstream serialization.
477        for v in map.values_mut() {
478            v.sort();
479            v.dedup();
480        }
481        ReverseDepIndex { map }
482    }
483}
484
485/// v0.49.3: reverse-dependency index built from a Project's forward
486/// `links` graph. Maps `finding_id → [dependent_finding_id, …]` so a
487/// "what depends on X?" lookup is O(1) instead of O(N×L).
488///
489/// Construct via `Project::build_reverse_dep_index`. The index is a
490/// snapshot — it does not auto-update if the Project mutates after.
491/// For long-lived consumers that mutate state, rebuild after each
492/// reduce step.
493#[derive(Debug, Clone, Default)]
494pub struct ReverseDepIndex {
495    map: std::collections::HashMap<String, Vec<String>>,
496}
497
498impl ReverseDepIndex {
499    /// Findings whose forward `links` list a target with this id.
500    /// Empty slice if nothing depends on this finding (or if the id
501    /// isn't in the index at all).
502    #[must_use]
503    pub fn dependents_of(&self, finding_id: &str) -> &[String] {
504        self.map
505            .get(finding_id)
506            .map(|v| v.as_slice())
507            .unwrap_or(&[])
508    }
509
510    /// Total number of dependent edges in the index. Useful for
511    /// quick sanity checks and metric reporting.
512    #[must_use]
513    pub fn edge_count(&self) -> usize {
514        self.map.values().map(Vec::len).sum()
515    }
516
517    /// Number of distinct findings that have at least one dependent.
518    #[must_use]
519    pub fn target_count(&self) -> usize {
520        self.map.len()
521    }
522
523    /// Iterate `(target_finding_id, dependents)` pairs. Order is
524    /// HashMap-iteration-order, not stable across runs; sort if a
525    /// consumer needs determinism.
526    pub fn iter(&self) -> impl Iterator<Item = (&String, &Vec<String>)> {
527        self.map.iter()
528    }
529}
530
531#[cfg(test)]
532mod cross_frontier_dep_tests {
533    use super::*;
534
535    fn dep_local(name: &str) -> ProjectDependency {
536        ProjectDependency {
537            name: name.into(),
538            source: "local".into(),
539            version: None,
540            pinned_hash: None,
541            vfr_id: None,
542            locator: None,
543            pinned_snapshot_hash: None,
544        }
545    }
546
547    fn dep_cross(vfr: &str) -> ProjectDependency {
548        ProjectDependency {
549            name: "ext".into(),
550            source: "vela.hub".into(),
551            version: None,
552            pinned_hash: None,
553            vfr_id: Some(vfr.into()),
554            locator: Some(format!("https://example.test/{vfr}.json")),
555            pinned_snapshot_hash: Some("a".repeat(64)),
556        }
557    }
558
559    #[test]
560    fn is_cross_frontier_only_when_vfr_id_set() {
561        assert!(!dep_local("x").is_cross_frontier());
562        assert!(dep_cross("vfr_abc").is_cross_frontier());
563    }
564
565    #[test]
566    fn dep_serializes_byte_identical_when_v0_8_fields_absent() {
567        // Backward compat: a pre-v0.8 dep round-trips through serde
568        // without emitting any of the new optional v0.8 fields.
569        let d = dep_local("legacy");
570        let s = serde_json::to_string(&d).unwrap();
571        assert!(!s.contains("vfr_id"));
572        assert!(!s.contains("locator"));
573        assert!(!s.contains("pinned_snapshot_hash"));
574    }
575}
576
577#[cfg(test)]
578mod reverse_dep_index_tests {
579    use super::*;
580    use crate::bundle::{
581        Assertion, Author, Conditions, Confidence, ConfidenceKind, ConfidenceMethod, Evidence,
582        Extraction, FindingBundle, Flags, Link, Provenance,
583    };
584
585    fn synth_finding(idx: usize, links: Vec<Link>) -> FindingBundle {
586        let assertion = Assertion {
587            text: format!("Synthetic finding {idx}"),
588            assertion_type: "mechanism".into(),
589            entities: vec![],
590            relation: None,
591            direction: None,
592            causal_claim: None,
593            causal_evidence_grade: None,
594        };
595        let evidence = Evidence {
596            evidence_type: "experimental".into(),
597            model_system: "test".into(),
598            species: None,
599            method: "test".into(),
600            sample_size: None,
601            effect_size: None,
602            p_value: None,
603            replicated: false,
604            replication_count: None,
605            evidence_spans: vec![],
606        };
607        let conditions = Conditions {
608            text: "test".into(),
609            species_verified: vec![],
610            species_unverified: vec![],
611            in_vitro: false,
612            in_vivo: false,
613            human_data: false,
614            clinical_trial: false,
615            concentration_range: None,
616            duration: None,
617            age_group: None,
618            cell_type: None,
619        };
620        let confidence = Confidence {
621            kind: ConfidenceKind::FrontierEpistemic,
622            score: 0.5,
623            basis: "test".into(),
624            method: ConfidenceMethod::LlmInitial,
625            components: None,
626            extraction_confidence: 0.9,
627        };
628        let provenance = Provenance {
629            source_type: "published_paper".into(),
630            doi: Some(format!("10.0000/reverse-dep-index-test.{idx:04}")),
631            pmid: None,
632            pmc: None,
633            openalex_id: None,
634            url: None,
635            title: format!("Synthetic test paper {idx}"),
636            authors: vec![Author {
637                name: "T".into(),
638                orcid: None,
639            }],
640            year: None,
641            journal: None,
642            license: None,
643            publisher: None,
644            funders: vec![],
645            extraction: Extraction::default(),
646            review: None,
647            citation_count: None,
648        };
649        let flags = Flags::default();
650        let mut bundle = FindingBundle::new(
651            assertion, evidence, conditions, confidence, provenance, flags,
652        );
653        bundle.links = links;
654        bundle
655    }
656
657    fn link_to(target: &str) -> Link {
658        Link {
659            target: target.into(),
660            link_type: "supports".into(),
661            note: "test".into(),
662            inferred_by: "test".into(),
663            created_at: "2026-05-02T00:00:00Z".into(),
664            mechanism: None,
665        }
666    }
667
668    /// Build a chain: 0 → 1 → 2 → 3 (each finding supports the next).
669    /// Then dependents_of(2) should return [1], dependents_of(1) → [0],
670    /// dependents_of(3) → [2], dependents_of(0) → [] (root, nothing
671    /// depends on it).
672    #[test]
673    fn dependents_of_returns_correct_set_for_simple_chain() {
674        let f3 = synth_finding(3, vec![]);
675        let f2 = synth_finding(2, vec![link_to(&f3.id)]);
676        let f1 = synth_finding(1, vec![link_to(&f2.id)]);
677        let f0 = synth_finding(0, vec![link_to(&f1.id)]);
678
679        let mut project = assemble("chain", vec![], 0, 0, "test");
680        project.findings = vec![f0.clone(), f1.clone(), f2.clone(), f3.clone()];
681
682        let idx = project.build_reverse_dep_index();
683        assert_eq!(idx.dependents_of(&f3.id), &[f2.id.clone()]);
684        assert_eq!(idx.dependents_of(&f2.id), &[f1.id.clone()]);
685        assert_eq!(idx.dependents_of(&f1.id), &[f0.id.clone()]);
686        assert!(idx.dependents_of(&f0.id).is_empty());
687        // Edge count = 3 (one per non-root link).
688        assert_eq!(idx.edge_count(), 3);
689        // Target count = 3 (f1, f2, f3 each have a dependent).
690        assert_eq!(idx.target_count(), 3);
691    }
692
693    /// Multiple findings depending on the same target should produce a
694    /// sorted, deduped dependent list.
695    #[test]
696    fn dependents_of_dedups_and_sorts() {
697        let target = synth_finding(99, vec![]);
698        let target_id = target.id.clone();
699        // f1, f2, f3 all link to target. Plus f1 has TWO links to
700        // target (to test dedup).
701        let f1 = synth_finding(1, vec![link_to(&target_id), link_to(&target_id)]);
702        let f2 = synth_finding(2, vec![link_to(&target_id)]);
703        let f3 = synth_finding(3, vec![link_to(&target_id)]);
704
705        let mut project = assemble("multi-dependents", vec![], 0, 0, "test");
706        project.findings = vec![target, f1.clone(), f2.clone(), f3.clone()];
707
708        let idx = project.build_reverse_dep_index();
709        let mut expected = vec![f1.id.clone(), f2.id.clone(), f3.id.clone()];
710        expected.sort();
711        assert_eq!(idx.dependents_of(&target_id), expected.as_slice());
712    }
713
714    /// A finding id with no dependents — and an id that doesn't exist
715    /// in the project at all — both return an empty slice.
716    #[test]
717    fn dependents_of_unknown_or_orphan_returns_empty() {
718        let lonely = synth_finding(7, vec![]);
719        let mut project = assemble("orphan", vec![], 0, 0, "test");
720        project.findings = vec![lonely.clone()];
721
722        let idx = project.build_reverse_dep_index();
723        assert!(idx.dependents_of(&lonely.id).is_empty());
724        assert!(idx.dependents_of("vf_does_not_exist").is_empty());
725    }
726
727    /// Empty project → empty index.
728    #[test]
729    fn empty_project_yields_empty_index() {
730        let project = assemble("empty", vec![], 0, 0, "test");
731        let idx = project.build_reverse_dep_index();
732        assert_eq!(idx.edge_count(), 0);
733        assert_eq!(idx.target_count(), 0);
734    }
735}
736
737/// Recompute derived frontier statistics after mechanical edits.
738pub fn recompute_stats(project: &mut Project) {
739    let total_links: usize = project.findings.iter().map(|b| b.links.len()).sum();
740
741    let mut link_types: HashMap<String, usize> = HashMap::new();
742    for b in &project.findings {
743        for l in &b.links {
744            *link_types.entry(l.link_type.clone()).or_default() += 1;
745        }
746    }
747
748    let mut categories: HashMap<String, usize> = HashMap::new();
749    for b in &project.findings {
750        *categories
751            .entry(b.assertion.assertion_type.clone())
752            .or_default() += 1;
753    }
754
755    // v0.36.2: count findings with at least one successful replication
756    // recorded in `project.replications`. The legacy
757    // `evidence.replicated` scalar is a fall-through for findings
758    // pre-v0.32 that have no `Replication` records yet — same shape as
759    // `Project::compute_confidence_for`. A finding is "replicated" if
760    // EITHER the structured collection holds a `replicated` outcome
761    // for it, OR (no records exist at all) the legacy flag is set.
762    let mut targets_with_success: HashSet<&str> = HashSet::new();
763    let mut targets_with_any_record: HashSet<&str> = HashSet::new();
764    for r in &project.replications {
765        targets_with_any_record.insert(r.target_finding.as_str());
766        if r.outcome == "replicated" {
767            targets_with_success.insert(r.target_finding.as_str());
768        }
769    }
770    let replicated = project
771        .findings
772        .iter()
773        .filter(|b| {
774            if targets_with_any_record.contains(b.id.as_str()) {
775                targets_with_success.contains(b.id.as_str())
776            } else {
777                b.evidence.replicated
778            }
779        })
780        .count();
781    let avg_confidence = if project.findings.is_empty() {
782        0.0
783    } else {
784        (project
785            .findings
786            .iter()
787            .map(|b| b.confidence.score)
788            .sum::<f64>()
789            / project.findings.len() as f64
790            * 1000.0)
791            .round()
792            / 1000.0
793    };
794
795    project.stats.findings = project.findings.len();
796    project.stats.links = total_links;
797    project.stats.replicated = replicated;
798    project.stats.unreplicated = project.findings.len().saturating_sub(replicated);
799    project.stats.avg_confidence = avg_confidence;
800    project.stats.gaps = project.findings.iter().filter(|b| b.flags.gap).count();
801    project.stats.negative_space = project
802        .findings
803        .iter()
804        .filter(|b| b.flags.negative_space)
805        .count();
806    project.stats.contested = project
807        .findings
808        .iter()
809        .filter(|b| b.flags.contested)
810        .count();
811    project.stats.categories = categories;
812    project.stats.link_types = link_types;
813    let reviewed_from_legacy = project
814        .findings
815        .iter()
816        .filter_map(|b| {
817            b.provenance
818                .review
819                .as_ref()
820                .filter(|r| r.reviewed)
821                .map(|_| b.id.clone())
822        })
823        .collect::<HashSet<_>>();
824    let reviewed_from_events = project
825        .events
826        .iter()
827        .filter(|event| {
828            matches!(
829                event.kind.as_str(),
830                "finding.reviewed"
831                    | "finding.noted"
832                    | "finding.caveated"
833                    | "finding.confidence_revised"
834                    | "finding.rejected"
835                    | "finding.retracted"
836            )
837        })
838        .filter(|event| {
839            project
840                .findings
841                .iter()
842                .any(|finding| finding.id == event.target.id)
843        })
844        .map(|event| event.target.id.clone())
845        .collect::<HashSet<_>>();
846    let reviewed_ids = reviewed_from_legacy.union(&reviewed_from_events).count();
847    project.stats.human_reviewed = reviewed_ids;
848    let canonical_review_events = project
849        .events
850        .iter()
851        .filter(|event| {
852            matches!(
853                event.kind.as_str(),
854                "finding.reviewed"
855                    | "finding.noted"
856                    | "finding.caveated"
857                    | "finding.rejected"
858                    | "finding.retracted"
859                    | "finding.asserted"
860            )
861        })
862        .count();
863    project.stats.review_event_count = canonical_review_events + project.review_events.len();
864    project.stats.confidence_update_count = project
865        .events
866        .iter()
867        .filter(|event| event.kind == "finding.confidence_revised")
868        .count()
869        + project.confidence_updates.len();
870    project.stats.event_count = project.events.len();
871    project.stats.source_count = project.sources.len();
872    project.stats.evidence_atom_count = project.evidence_atoms.len();
873    project.stats.condition_record_count = project.condition_records.len();
874    project.stats.proposal_count = project.proposals.len();
875    project.stats.confidence_distribution = ConfidenceDistribution {
876        high_gt_80: project
877            .findings
878            .iter()
879            .filter(|b| b.confidence.score > 0.8)
880            .count(),
881        medium_60_80: project
882            .findings
883            .iter()
884            .filter(|b| (0.6..=0.8).contains(&b.confidence.score))
885            .count(),
886        low_lt_60: project
887            .findings
888            .iter()
889            .filter(|b| b.confidence.score < 0.6)
890            .count(),
891    };
892}
893
894#[cfg(test)]
895mod tests {
896    use super::*;
897    use crate::bundle::*;
898
899    fn make_finding(
900        id: &str,
901        score: f64,
902        assertion_type: &str,
903        replicated: bool,
904        gap: bool,
905    ) -> FindingBundle {
906        FindingBundle {
907            id: id.into(),
908            version: 1,
909            previous_version: None,
910            assertion: Assertion {
911                text: format!("Finding {id}"),
912                assertion_type: assertion_type.into(),
913                entities: vec![],
914                relation: None,
915                direction: None,
916                causal_claim: None,
917                causal_evidence_grade: None,
918            },
919            evidence: Evidence {
920                evidence_type: "experimental".into(),
921                model_system: String::new(),
922                species: None,
923                method: String::new(),
924                sample_size: None,
925                effect_size: None,
926                p_value: None,
927                replicated,
928                replication_count: None,
929                evidence_spans: vec![],
930            },
931            conditions: Conditions {
932                text: String::new(),
933                species_verified: vec![],
934                species_unverified: vec![],
935                in_vitro: false,
936                in_vivo: false,
937                human_data: false,
938                clinical_trial: false,
939                concentration_range: None,
940                duration: None,
941                age_group: None,
942                cell_type: None,
943            },
944            confidence: Confidence::raw(score, "seeded prior", 0.85),
945            provenance: Provenance {
946                source_type: "published_paper".into(),
947                doi: None,
948                pmid: None,
949                pmc: None,
950                openalex_id: None,
951                url: None,
952                title: "Test".into(),
953                authors: vec![],
954                year: Some(2024),
955                journal: None,
956                license: None,
957                publisher: None,
958                funders: vec![],
959                extraction: Extraction::default(),
960                review: None,
961                citation_count: None,
962            },
963            flags: Flags {
964                gap,
965                negative_space: false,
966                contested: false,
967                retracted: false,
968                declining: false,
969                gravity_well: false,
970                review_state: None,
971                superseded: false,
972                signature_threshold: None,
973                jointly_accepted: false,
974            },
975            links: vec![],
976            annotations: vec![],
977            attachments: vec![],
978            created: String::new(),
979            updated: None,
980
981            access_tier: crate::access_tier::AccessTier::Public,
982        }
983    }
984
985    #[test]
986    fn empty_frontier() {
987        let c = assemble("test", vec![], 0, 0, "empty");
988        assert_eq!(c.stats.findings, 0);
989        assert_eq!(c.stats.links, 0);
990        assert_eq!(c.stats.avg_confidence, 0.0);
991        assert_eq!(c.stats.replicated, 0);
992        assert_eq!(c.stats.unreplicated, 0);
993        assert_eq!(c.project.name, "test");
994        assert_eq!(c.project.description, "empty");
995    }
996
997    #[test]
998    fn findings_count() {
999        let bundles = vec![
1000            make_finding("f1", 0.8, "mechanism", false, false),
1001            make_finding("f2", 0.6, "therapeutic", true, false),
1002            make_finding("f3", 0.9, "mechanism", false, true),
1003        ];
1004        let c = assemble("test", bundles, 5, 1, "desc");
1005        assert_eq!(c.stats.findings, 3);
1006        assert_eq!(c.project.papers_processed, 5);
1007        assert_eq!(c.project.errors, 1);
1008    }
1009
1010    #[test]
1011    fn replicated_unreplicated_counts() {
1012        let bundles = vec![
1013            make_finding("f1", 0.8, "mechanism", true, false),
1014            make_finding("f2", 0.6, "mechanism", true, false),
1015            make_finding("f3", 0.9, "mechanism", false, false),
1016        ];
1017        let c = assemble("test", bundles, 3, 0, "desc");
1018        assert_eq!(c.stats.replicated, 2);
1019        assert_eq!(c.stats.unreplicated, 1);
1020    }
1021
1022    #[test]
1023    fn category_counts() {
1024        let bundles = vec![
1025            make_finding("f1", 0.8, "mechanism", false, false),
1026            make_finding("f2", 0.6, "mechanism", false, false),
1027            make_finding("f3", 0.9, "therapeutic", false, false),
1028        ];
1029        let c = assemble("test", bundles, 3, 0, "desc");
1030        assert_eq!(*c.stats.categories.get("mechanism").unwrap(), 2);
1031        assert_eq!(*c.stats.categories.get("therapeutic").unwrap(), 1);
1032    }
1033
1034    #[test]
1035    fn link_counting() {
1036        let mut f1 = make_finding("f1", 0.8, "mechanism", false, false);
1037        f1.add_link("f2", "extends", "shared entity");
1038        f1.add_link("f3", "contradicts", "opposite direction");
1039        let f2 = make_finding("f2", 0.7, "mechanism", false, false);
1040        let c = assemble("test", vec![f1, f2], 2, 0, "desc");
1041        assert_eq!(c.stats.links, 2);
1042        assert_eq!(*c.stats.link_types.get("extends").unwrap(), 1);
1043        assert_eq!(*c.stats.link_types.get("contradicts").unwrap(), 1);
1044    }
1045
1046    #[test]
1047    fn avg_confidence() {
1048        let bundles = vec![
1049            make_finding("f1", 0.8, "mechanism", false, false),
1050            make_finding("f2", 0.6, "mechanism", false, false),
1051        ];
1052        let c = assemble("test", bundles, 2, 0, "desc");
1053        assert!((c.stats.avg_confidence - 0.7).abs() < 0.01);
1054    }
1055
1056    #[test]
1057    fn confidence_distribution_buckets() {
1058        let bundles = vec![
1059            make_finding("f1", 0.9, "mechanism", false, false), // high
1060            make_finding("f2", 0.85, "mechanism", false, false), // high
1061            make_finding("f3", 0.7, "mechanism", false, false), // medium
1062            make_finding("f4", 0.6, "mechanism", false, false), // medium (0.6 is in 0.6..=0.8)
1063            make_finding("f5", 0.4, "mechanism", false, false), // low
1064        ];
1065        let c = assemble("test", bundles, 5, 0, "desc");
1066        assert_eq!(c.stats.confidence_distribution.high_gt_80, 2);
1067        assert_eq!(c.stats.confidence_distribution.medium_60_80, 2);
1068        assert_eq!(c.stats.confidence_distribution.low_lt_60, 1);
1069    }
1070
1071    #[test]
1072    fn gaps_counted() {
1073        let bundles = vec![
1074            make_finding("f1", 0.8, "mechanism", false, true),
1075            make_finding("f2", 0.6, "mechanism", false, false),
1076            make_finding("f3", 0.9, "mechanism", false, true),
1077        ];
1078        let c = assemble("test", bundles, 3, 0, "desc");
1079        assert_eq!(c.stats.gaps, 2);
1080    }
1081
1082    #[test]
1083    fn metadata_preserved() {
1084        let c = assemble("my frontier", vec![], 10, 2, "A description");
1085        assert_eq!(c.project.name, "my frontier");
1086        assert_eq!(c.project.description, "A description");
1087        assert_eq!(c.project.papers_processed, 10);
1088        assert_eq!(c.project.errors, 2);
1089        assert_eq!(c.vela_version, VELA_SCHEMA_VERSION);
1090        assert!(!c.project.compiled_at.is_empty());
1091    }
1092}