Skip to main content

vela_protocol/
aggregate.rs

1//! v0.35: Consensus aggregation — the inference layer.
2//!
3//! Given a target finding, the kernel can find other findings making
4//! similar claims, weight them by evidence quality (replication
5//! count, review state, time decay), and return a consensus
6//! confidence with a credible interval.
7//!
8//! This is what turns Vela from "a database of claims" into "a
9//! reasoning surface over claims." Other parts of the substrate
10//! describe what's *believed* (findings) and what's *expected*
11//! (predictions). This module describes what the *field* collectively
12//! holds — derived deterministically from canonical state, never
13//! stored.
14//!
15//! Doctrine: aggregation is a derived view, not a kernel object.
16//! Same input frontier → same consensus result, byte-for-byte.
17
18use std::collections::HashSet;
19
20use serde::{Deserialize, Serialize};
21
22use crate::bundle::{CausalClaim, CausalEvidenceGrade, FindingBundle};
23use crate::project::Project;
24
25/// v0.38.2: filter constraints for consensus aggregation. Consensus
26/// computed without a filter blends all claim-similar findings —
27/// fine when "what does the field hold?" is the question, but wrong
28/// when the question is specifically "what does the field hold *as
29/// causation*?" or "what's the consensus among RCT-grade evidence?"
30///
31/// `None` for any field means no constraint; the default value of
32/// `Filter::default()` is the pre-v0.38.2 behavior.
33#[derive(Debug, Clone, Default, Serialize, Deserialize)]
34pub struct AggregateFilter {
35    /// Only include findings whose `causal_claim` matches. `None`
36    /// includes all (including unset claims).
37    #[serde(default, skip_serializing_if = "Option::is_none")]
38    pub causal_claim: Option<CausalClaim>,
39    /// Minimum study-design grade. Findings with `causal_evidence_grade`
40    /// strictly weaker than this are excluded. `None` includes all
41    /// (including unset grades). Ordering: Theoretical < Observational
42    /// < QuasiExperimental < Rct.
43    #[serde(default, skip_serializing_if = "Option::is_none")]
44    pub causal_grade_min: Option<CausalEvidenceGrade>,
45}
46
47/// Total order on `CausalEvidenceGrade` for the `causal_grade_min`
48/// filter. Higher value means stronger study design.
49fn grade_rank(g: CausalEvidenceGrade) -> u32 {
50    match g {
51        CausalEvidenceGrade::Theoretical => 1,
52        CausalEvidenceGrade::Observational => 2,
53        CausalEvidenceGrade::QuasiExperimental => 3,
54        CausalEvidenceGrade::Rct => 4,
55    }
56}
57
58fn passes_filter(f: &FindingBundle, filter: &AggregateFilter) -> bool {
59    if let Some(want) = filter.causal_claim
60        && f.assertion.causal_claim != Some(want)
61    {
62        return false;
63    }
64    if let Some(min) = filter.causal_grade_min {
65        match f.assertion.causal_evidence_grade {
66            None => return false, // ungraded findings can't satisfy a min
67            Some(g) if grade_rank(g) < grade_rank(min) => return false,
68            _ => {}
69        }
70    }
71    true
72}
73
74/// How candidate findings are weighted when computing consensus.
75///
76/// `Unweighted`: every matching finding contributes equally. Good for
77/// counting how many independent assertions exist.
78/// `ReplicationWeighted`: each finding's weight scales with the
79/// number of successful (or failed) replications referencing it as
80/// `target_finding`. Failed replications subtract weight; successful
81/// ones add weight. The substrate move that makes well-replicated
82/// claims dominate consensus over freshly-asserted ones.
83/// `CitationWeighted`: weight scales with `provenance.citation_count`.
84/// Useful when most findings carry real citation counts; falls back
85/// to unweighted otherwise.
86/// `Composite`: weighted blend of the three above, currently in
87/// fixed proportions (`replication 0.5 + citation 0.3 + base 0.2`).
88#[derive(Debug, Clone, Copy, Serialize, Deserialize, PartialEq, Eq)]
89#[serde(rename_all = "snake_case")]
90pub enum WeightingScheme {
91    Unweighted,
92    ReplicationWeighted,
93    CitationWeighted,
94    Composite,
95}
96
97impl WeightingScheme {
98    pub fn parse(s: &str) -> Result<Self, String> {
99        match s.to_lowercase().as_str() {
100            "unweighted" | "uniform" => Ok(WeightingScheme::Unweighted),
101            "replication" | "replication_weighted" => Ok(WeightingScheme::ReplicationWeighted),
102            "citation" | "citation_weighted" => Ok(WeightingScheme::CitationWeighted),
103            "composite" | "default" => Ok(WeightingScheme::Composite),
104            _ => Err(format!(
105                "unknown weighting scheme `{s}`; valid: unweighted | replication | citation | composite"
106            )),
107        }
108    }
109
110    pub fn name(&self) -> &'static str {
111        match self {
112            WeightingScheme::Unweighted => "unweighted",
113            WeightingScheme::ReplicationWeighted => "replication_weighted",
114            WeightingScheme::CitationWeighted => "citation_weighted",
115            WeightingScheme::Composite => "composite",
116        }
117    }
118}
119
120/// One finding's contribution to a consensus result.
121#[derive(Debug, Clone, Serialize, Deserialize)]
122pub struct ConsensusConstituent {
123    pub finding_id: String,
124    pub assertion_text: String,
125    /// Original `Confidence.score` from the finding, before any
126    /// adjustments.
127    pub raw_score: f64,
128    /// `raw_score` after replication / review-state adjustments.
129    /// `>= raw_score` if the finding has successful replications,
130    /// `< raw_score` if the finding is contested or has failed
131    /// replications.
132    pub adjusted_score: f64,
133    /// Final weight in the consensus computation.
134    pub weight: f64,
135    /// Number of `Replication` records targeting this finding,
136    /// broken down by outcome. Useful for the rendering layer.
137    pub n_replications: usize,
138    pub n_replicated: usize,
139    pub n_failed_replications: usize,
140}
141
142/// Derived consensus over claim-similar findings.
143#[derive(Debug, Clone, Serialize, Deserialize)]
144pub struct ConsensusResult {
145    /// `vf_<id>` of the target finding the consensus is anchored to.
146    pub target: String,
147    /// The target finding's assertion text, for display.
148    pub target_assertion: String,
149    /// Number of findings (including the target) that contributed.
150    pub n_findings: usize,
151    /// Weighted-mean confidence on `[0, 1]`.
152    pub consensus_confidence: f64,
153    /// 95% credible interval over the weighted distribution.
154    pub credible_interval_lo: f64,
155    pub credible_interval_hi: f64,
156    /// Each constituent finding with its weight + adjusted score.
157    pub constituents: Vec<ConsensusConstituent>,
158    /// Name of the weighting scheme used.
159    pub weighting: String,
160    /// v0.38.2: filter applied to neighbor findings before similarity
161    /// computation. `None` (the v0.35–v0.38.1 default) means no
162    /// filter; everything similar contributes.
163    #[serde(default, skip_serializing_if = "Option::is_none")]
164    pub filter: Option<AggregateFilter>,
165}
166
167/// Compute consensus over findings similar to `target_id`.
168///
169/// "Similar" means: shares ≥ 1 named entity with the target's
170/// assertion AND has either matching `assertion_type` or substantial
171/// text overlap. This is intentionally fuzzier than `vf_id` equality
172/// — two papers asserting the same mechanism in different prose
173/// should both contribute.
174///
175/// Returns `None` if `target_id` isn't in the project.
176///
177/// Equivalent to `consensus_for_with_filter` with the default
178/// (no-op) filter; preserved for backward compatibility.
179pub fn consensus_for(
180    project: &Project,
181    target_id: &str,
182    weighting: WeightingScheme,
183) -> Option<ConsensusResult> {
184    consensus_for_with_filter(project, target_id, weighting, &AggregateFilter::default())
185}
186
187/// v0.38.2: same as `consensus_for`, with a structured `AggregateFilter`
188/// applied to candidate findings before similarity is checked. Lets
189/// callers ask sharper questions: "what's the consensus *as
190/// intervention*?" or "consensus among RCT-grade evidence only?"
191///
192/// The target finding is always included as the anchor, even if it
193/// would not pass the filter — the consensus is *about* this claim.
194/// Only the *neighbors* are filtered.
195pub fn consensus_for_with_filter(
196    project: &Project,
197    target_id: &str,
198    weighting: WeightingScheme,
199    filter: &AggregateFilter,
200) -> Option<ConsensusResult> {
201    let target = project.findings.iter().find(|f| f.id == target_id)?;
202    let target_entities: HashSet<String> = target
203        .assertion
204        .entities
205        .iter()
206        .map(|e| e.name.to_lowercase())
207        .collect();
208    let target_text_words: HashSet<String> = target
209        .assertion
210        .text
211        .to_lowercase()
212        .split_whitespace()
213        .filter(|w| w.len() > 4)
214        .map(|w| w.trim_matches(|c: char| !c.is_alphanumeric()).to_string())
215        .filter(|w| !w.is_empty())
216        .collect();
217
218    // Find candidate findings — including the target itself, which
219    // anchors the consensus on its own evidence.
220    //
221    // v0.38.2: neighbor findings must pass the optional `filter`
222    // (causal_claim match + causal_grade_min). The target is always
223    // included regardless of filter — the consensus is *about* this
224    // claim, not selecting *for* it.
225    let mut candidates: Vec<&FindingBundle> = Vec::new();
226    for f in &project.findings {
227        if f.id == target_id {
228            candidates.push(f);
229            continue;
230        }
231        if !is_similar(
232            f,
233            &target_entities,
234            &target_text_words,
235            &target.assertion.assertion_type,
236        ) {
237            continue;
238        }
239        if !passes_filter(f, filter) {
240            continue;
241        }
242        candidates.push(f);
243    }
244
245    // Build constituent records: replication tallies + adjusted score
246    // + weight.
247    let constituents: Vec<ConsensusConstituent> = candidates
248        .iter()
249        .map(|f| {
250            let (n_repls, n_replicated, n_failed) = replication_tallies(project, &f.id);
251            let raw_score = f.confidence.score;
252            let adjusted_score = adjust_score_for_replications_and_review(
253                raw_score,
254                n_replicated,
255                n_failed,
256                f.flags.contested,
257            );
258            let weight = compute_weight(weighting, f, n_replicated, n_failed);
259            ConsensusConstituent {
260                finding_id: f.id.clone(),
261                assertion_text: f.assertion.text.clone(),
262                raw_score,
263                adjusted_score,
264                weight,
265                n_replications: n_repls,
266                n_replicated,
267                n_failed_replications: n_failed,
268            }
269        })
270        .collect();
271
272    // Weighted mean + credible interval. If total weight is zero
273    // (degenerate), fall back to the unweighted mean of adjusted
274    // scores.
275    let total_weight: f64 = constituents.iter().map(|c| c.weight).sum();
276    let consensus_confidence = if total_weight > 0.0 {
277        constituents
278            .iter()
279            .map(|c| c.adjusted_score * c.weight)
280            .sum::<f64>()
281            / total_weight
282    } else if !constituents.is_empty() {
283        constituents.iter().map(|c| c.adjusted_score).sum::<f64>() / constituents.len() as f64
284    } else {
285        0.0
286    };
287
288    let (credible_interval_lo, credible_interval_hi) =
289        weighted_credible_interval(&constituents, consensus_confidence, total_weight);
290
291    let filter_serialized = if filter.causal_claim.is_some() || filter.causal_grade_min.is_some() {
292        Some(filter.clone())
293    } else {
294        None
295    };
296
297    Some(ConsensusResult {
298        target: target.id.clone(),
299        target_assertion: target.assertion.text.clone(),
300        n_findings: constituents.len(),
301        consensus_confidence: round3(consensus_confidence),
302        credible_interval_lo: round3(credible_interval_lo),
303        credible_interval_hi: round3(credible_interval_hi),
304        constituents,
305        weighting: weighting.name().to_string(),
306        filter: filter_serialized,
307    })
308}
309
310fn is_similar(
311    candidate: &FindingBundle,
312    target_entities: &HashSet<String>,
313    target_text_words: &HashSet<String>,
314    target_type: &str,
315) -> bool {
316    // Entity overlap: share at least one named entity (case-insensitive).
317    let cand_entities: HashSet<String> = candidate
318        .assertion
319        .entities
320        .iter()
321        .map(|e| e.name.to_lowercase())
322        .collect();
323    let entity_overlap = !cand_entities.is_disjoint(target_entities);
324
325    // Text overlap: at least 3 substantive words shared (Jaccard-ish).
326    let cand_text_words: HashSet<String> = candidate
327        .assertion
328        .text
329        .to_lowercase()
330        .split_whitespace()
331        .filter(|w| w.len() > 4)
332        .map(|w| w.trim_matches(|c: char| !c.is_alphanumeric()).to_string())
333        .filter(|w| !w.is_empty())
334        .collect();
335    let text_overlap = cand_text_words.intersection(target_text_words).count() >= 3;
336
337    // Type match contributes to similarity but isn't required.
338    let type_match = candidate.assertion.assertion_type == target_type;
339
340    // Loose-OR: any two of the three signals (or strong overlap on
341    // one) qualifies as similar.
342    let signals = [entity_overlap, text_overlap, type_match]
343        .iter()
344        .filter(|x| **x)
345        .count();
346    signals >= 2 || (entity_overlap && cand_entities.intersection(target_entities).count() >= 2)
347}
348
349fn replication_tallies(project: &Project, finding_id: &str) -> (usize, usize, usize) {
350    let mut total = 0usize;
351    let mut replicated = 0usize;
352    let mut failed = 0usize;
353    for r in &project.replications {
354        if r.target_finding == finding_id {
355            total += 1;
356            match r.outcome.as_str() {
357                "replicated" => replicated += 1,
358                "failed" => failed += 1,
359                _ => {}
360            }
361        }
362    }
363    (total, replicated, failed)
364}
365
366fn adjust_score_for_replications_and_review(
367    raw: f64,
368    n_replicated: usize,
369    n_failed: usize,
370    contested: bool,
371) -> f64 {
372    // Replications: each successful adds 5%, each failed subtracts
373    // 10%. Capped at [0, 1].
374    let mut adj = raw + 0.05 * n_replicated as f64 - 0.10 * n_failed as f64;
375    if contested {
376        adj *= 0.85;
377    }
378    adj.clamp(0.0, 1.0)
379}
380
381fn compute_weight(
382    scheme: WeightingScheme,
383    f: &FindingBundle,
384    n_replicated: usize,
385    n_failed: usize,
386) -> f64 {
387    let base = 1.0;
388    let replication_factor = 1.0 + 0.5 * n_replicated as f64 - 0.5 * n_failed as f64;
389    let citation_factor = 1.0 + (f.provenance.citation_count.unwrap_or(0) as f64).ln_1p() * 0.10;
390    match scheme {
391        WeightingScheme::Unweighted => base,
392        WeightingScheme::ReplicationWeighted => replication_factor.max(0.0),
393        WeightingScheme::CitationWeighted => citation_factor.max(0.0),
394        WeightingScheme::Composite => {
395            (0.2 * base + 0.5 * replication_factor.max(0.0) + 0.3 * citation_factor.max(0.0))
396                .max(0.0)
397        }
398    }
399}
400
401fn weighted_credible_interval(
402    constituents: &[ConsensusConstituent],
403    mean: f64,
404    total_weight: f64,
405) -> (f64, f64) {
406    if constituents.is_empty() || total_weight <= 0.0 {
407        return (mean, mean);
408    }
409    // Weighted variance.
410    let var = constituents
411        .iter()
412        .map(|c| c.weight * (c.adjusted_score - mean).powi(2))
413        .sum::<f64>()
414        / total_weight;
415    let sd = var.sqrt();
416    // 95% interval ≈ ±1.96 SD; clamp to [0, 1].
417    let lo = (mean - 1.96 * sd).clamp(0.0, 1.0);
418    let hi = (mean + 1.96 * sd).clamp(0.0, 1.0);
419    (lo, hi)
420}
421
422fn round3(x: f64) -> f64 {
423    (x * 1000.0).round() / 1000.0
424}
425
426#[cfg(test)]
427mod v0_38_2_filter_tests {
428    use super::*;
429    use crate::bundle::*;
430    use crate::project;
431
432    fn finding(
433        id: &str,
434        claim: Option<CausalClaim>,
435        grade: Option<CausalEvidenceGrade>,
436    ) -> FindingBundle {
437        FindingBundle::new(
438            Assertion {
439                text: format!("X covaries with Y in {id}"),
440                assertion_type: "mechanism".into(),
441                entities: vec![
442                    Entity {
443                        name: "X".into(),
444                        entity_type: "protein".into(),
445                        identifiers: serde_json::Map::new(),
446                        canonical_id: None,
447                        candidates: vec![],
448                        aliases: vec![],
449                        resolution_provenance: None,
450                        resolution_confidence: 1.0,
451                        resolution_method: None,
452                        species_context: None,
453                        needs_review: false,
454                    },
455                    Entity {
456                        name: "Y".into(),
457                        entity_type: "protein".into(),
458                        identifiers: serde_json::Map::new(),
459                        canonical_id: None,
460                        candidates: vec![],
461                        aliases: vec![],
462                        resolution_provenance: None,
463                        resolution_confidence: 1.0,
464                        resolution_method: None,
465                        species_context: None,
466                        needs_review: false,
467                    },
468                ],
469                relation: Some("covaries_with".into()),
470                direction: Some("positive".into()),
471                causal_claim: claim,
472                causal_evidence_grade: grade,
473            },
474            Evidence {
475                evidence_type: "experimental".into(),
476                model_system: String::new(),
477                species: None,
478                method: String::new(),
479                sample_size: Some("n=100".into()),
480                effect_size: None,
481                p_value: None,
482                replicated: false,
483                replication_count: None,
484                evidence_spans: vec![],
485            },
486            Conditions {
487                text: String::new(),
488                species_verified: vec![],
489                species_unverified: vec![],
490                in_vitro: false,
491                in_vivo: false,
492                human_data: false,
493                clinical_trial: false,
494                concentration_range: None,
495                duration: None,
496                age_group: None,
497                cell_type: None,
498            },
499            Confidence::raw(0.7, "test", 0.85),
500            Provenance {
501                source_type: "published_paper".into(),
502                doi: None,
503                pmid: None,
504                pmc: None,
505                openalex_id: None,
506                url: None,
507                title: "Test".into(),
508                authors: vec![],
509                year: Some(2025),
510                journal: None,
511                license: None,
512                publisher: None,
513                funders: vec![],
514                extraction: Extraction::default(),
515                review: None,
516                citation_count: None,
517            },
518            Flags::default(),
519        )
520    }
521
522    fn make_project(findings: Vec<FindingBundle>) -> Project {
523        project::assemble("test", findings, 1, 0, "test")
524    }
525
526    #[test]
527    fn unfiltered_blends_all_similar_findings() {
528        let target = finding(
529            "a",
530            Some(CausalClaim::Correlation),
531            Some(CausalEvidenceGrade::Observational),
532        );
533        let interv = finding(
534            "b",
535            Some(CausalClaim::Intervention),
536            Some(CausalEvidenceGrade::Rct),
537        );
538        let target_id = target.id.clone();
539        let project = make_project(vec![target, interv]);
540        let result = consensus_for(&project, &target_id, WeightingScheme::Unweighted).unwrap();
541        // No filter: both findings contribute.
542        assert_eq!(result.n_findings, 2);
543        assert!(result.filter.is_none());
544    }
545
546    #[test]
547    fn causal_claim_filter_keeps_only_matching_neighbors() {
548        let target = finding(
549            "a",
550            Some(CausalClaim::Correlation),
551            Some(CausalEvidenceGrade::Observational),
552        );
553        let interv = finding(
554            "b",
555            Some(CausalClaim::Intervention),
556            Some(CausalEvidenceGrade::Rct),
557        );
558        let target_id = target.id.clone();
559        let project = make_project(vec![target, interv]);
560        let filter = AggregateFilter {
561            causal_claim: Some(CausalClaim::Intervention),
562            causal_grade_min: None,
563        };
564        let result =
565            consensus_for_with_filter(&project, &target_id, WeightingScheme::Unweighted, &filter)
566                .unwrap();
567        // Target (correlation) is the anchor — always included.
568        // Neighbor (intervention) passes the filter.
569        // Result includes both — anchor + 1 matching neighbor.
570        assert_eq!(result.n_findings, 2);
571        // But if the target had a correlation neighbor not matching
572        // intervention, it would be excluded.
573    }
574
575    #[test]
576    fn causal_claim_filter_excludes_non_matching_neighbors() {
577        let target = finding(
578            "a",
579            Some(CausalClaim::Intervention),
580            Some(CausalEvidenceGrade::Rct),
581        );
582        let neighbor_corr = finding(
583            "b",
584            Some(CausalClaim::Correlation),
585            Some(CausalEvidenceGrade::Observational),
586        );
587        let target_id = target.id.clone();
588        let project = make_project(vec![target, neighbor_corr]);
589        let filter = AggregateFilter {
590            causal_claim: Some(CausalClaim::Intervention),
591            causal_grade_min: None,
592        };
593        let result =
594            consensus_for_with_filter(&project, &target_id, WeightingScheme::Unweighted, &filter)
595                .unwrap();
596        // Only the target (always included) — neighbor filtered out.
597        assert_eq!(result.n_findings, 1);
598        assert_eq!(
599            result.filter.as_ref().unwrap().causal_claim,
600            Some(CausalClaim::Intervention)
601        );
602    }
603
604    #[test]
605    fn grade_min_excludes_lower_grades() {
606        let target = finding(
607            "a",
608            Some(CausalClaim::Mediation),
609            Some(CausalEvidenceGrade::Rct),
610        );
611        let neighbor_obs = finding(
612            "b",
613            Some(CausalClaim::Mediation),
614            Some(CausalEvidenceGrade::Observational),
615        );
616        let neighbor_rct = finding(
617            "c",
618            Some(CausalClaim::Mediation),
619            Some(CausalEvidenceGrade::Rct),
620        );
621        let target_id = target.id.clone();
622        let project = make_project(vec![target, neighbor_obs, neighbor_rct]);
623        let filter = AggregateFilter {
624            causal_claim: None,
625            causal_grade_min: Some(CausalEvidenceGrade::QuasiExperimental),
626        };
627        let result =
628            consensus_for_with_filter(&project, &target_id, WeightingScheme::Unweighted, &filter)
629                .unwrap();
630        // Target (RCT, anchor) + neighbor_rct. Observational excluded.
631        assert_eq!(result.n_findings, 2);
632    }
633
634    #[test]
635    fn ungraded_findings_excluded_when_grade_min_set() {
636        let target = finding(
637            "a",
638            Some(CausalClaim::Mediation),
639            Some(CausalEvidenceGrade::Rct),
640        );
641        let neighbor_ungraded = finding("b", Some(CausalClaim::Mediation), None);
642        let target_id = target.id.clone();
643        let project = make_project(vec![target, neighbor_ungraded]);
644        let filter = AggregateFilter {
645            causal_claim: None,
646            causal_grade_min: Some(CausalEvidenceGrade::Observational),
647        };
648        let result =
649            consensus_for_with_filter(&project, &target_id, WeightingScheme::Unweighted, &filter)
650                .unwrap();
651        // Ungraded neighbor is excluded; only target remains.
652        assert_eq!(result.n_findings, 1);
653    }
654
655    #[test]
656    fn grade_rank_orders_correctly() {
657        assert!(
658            grade_rank(CausalEvidenceGrade::Theoretical)
659                < grade_rank(CausalEvidenceGrade::Observational)
660        );
661        assert!(
662            grade_rank(CausalEvidenceGrade::Observational)
663                < grade_rank(CausalEvidenceGrade::QuasiExperimental)
664        );
665        assert!(
666            grade_rank(CausalEvidenceGrade::QuasiExperimental)
667                < grade_rank(CausalEvidenceGrade::Rct)
668        );
669    }
670}