1use std::collections::HashSet;
19
20use serde::{Deserialize, Serialize};
21
22use crate::bundle::{CausalClaim, CausalEvidenceGrade, FindingBundle};
23use crate::project::Project;
24
25#[derive(Debug, Clone, Default, Serialize, Deserialize)]
34pub struct AggregateFilter {
35 #[serde(default, skip_serializing_if = "Option::is_none")]
38 pub causal_claim: Option<CausalClaim>,
39 #[serde(default, skip_serializing_if = "Option::is_none")]
44 pub causal_grade_min: Option<CausalEvidenceGrade>,
45}
46
47fn grade_rank(g: CausalEvidenceGrade) -> u32 {
50 match g {
51 CausalEvidenceGrade::Theoretical => 1,
52 CausalEvidenceGrade::Observational => 2,
53 CausalEvidenceGrade::QuasiExperimental => 3,
54 CausalEvidenceGrade::Rct => 4,
55 }
56}
57
58fn passes_filter(f: &FindingBundle, filter: &AggregateFilter) -> bool {
59 if let Some(want) = filter.causal_claim
60 && f.assertion.causal_claim != Some(want)
61 {
62 return false;
63 }
64 if let Some(min) = filter.causal_grade_min {
65 match f.assertion.causal_evidence_grade {
66 None => return false, Some(g) if grade_rank(g) < grade_rank(min) => return false,
68 _ => {}
69 }
70 }
71 true
72}
73
74#[derive(Debug, Clone, Copy, Serialize, Deserialize, PartialEq, Eq)]
89#[serde(rename_all = "snake_case")]
90pub enum WeightingScheme {
91 Unweighted,
92 ReplicationWeighted,
93 CitationWeighted,
94 Composite,
95}
96
97impl WeightingScheme {
98 pub fn parse(s: &str) -> Result<Self, String> {
99 match s.to_lowercase().as_str() {
100 "unweighted" | "uniform" => Ok(WeightingScheme::Unweighted),
101 "replication" | "replication_weighted" => Ok(WeightingScheme::ReplicationWeighted),
102 "citation" | "citation_weighted" => Ok(WeightingScheme::CitationWeighted),
103 "composite" | "default" => Ok(WeightingScheme::Composite),
104 _ => Err(format!(
105 "unknown weighting scheme `{s}`; valid: unweighted | replication | citation | composite"
106 )),
107 }
108 }
109
110 pub fn name(&self) -> &'static str {
111 match self {
112 WeightingScheme::Unweighted => "unweighted",
113 WeightingScheme::ReplicationWeighted => "replication_weighted",
114 WeightingScheme::CitationWeighted => "citation_weighted",
115 WeightingScheme::Composite => "composite",
116 }
117 }
118}
119
120#[derive(Debug, Clone, Serialize, Deserialize)]
122pub struct ConsensusConstituent {
123 pub finding_id: String,
124 pub assertion_text: String,
125 pub raw_score: f64,
128 pub adjusted_score: f64,
133 pub weight: f64,
135 pub n_replications: usize,
138 pub n_replicated: usize,
139 pub n_failed_replications: usize,
140}
141
142#[derive(Debug, Clone, Serialize, Deserialize)]
144pub struct ConsensusResult {
145 pub target: String,
147 pub target_assertion: String,
149 pub n_findings: usize,
151 pub consensus_confidence: f64,
153 pub credible_interval_lo: f64,
155 pub credible_interval_hi: f64,
156 pub constituents: Vec<ConsensusConstituent>,
158 pub weighting: String,
160 #[serde(default, skip_serializing_if = "Option::is_none")]
164 pub filter: Option<AggregateFilter>,
165}
166
167pub fn consensus_for(
180 project: &Project,
181 target_id: &str,
182 weighting: WeightingScheme,
183) -> Option<ConsensusResult> {
184 consensus_for_with_filter(project, target_id, weighting, &AggregateFilter::default())
185}
186
187pub fn consensus_for_with_filter(
196 project: &Project,
197 target_id: &str,
198 weighting: WeightingScheme,
199 filter: &AggregateFilter,
200) -> Option<ConsensusResult> {
201 let target = project.findings.iter().find(|f| f.id == target_id)?;
202 let target_entities: HashSet<String> = target
203 .assertion
204 .entities
205 .iter()
206 .map(|e| e.name.to_lowercase())
207 .collect();
208 let target_text_words: HashSet<String> = target
209 .assertion
210 .text
211 .to_lowercase()
212 .split_whitespace()
213 .filter(|w| w.len() > 4)
214 .map(|w| w.trim_matches(|c: char| !c.is_alphanumeric()).to_string())
215 .filter(|w| !w.is_empty())
216 .collect();
217
218 let mut candidates: Vec<&FindingBundle> = Vec::new();
226 for f in &project.findings {
227 if f.id == target_id {
228 candidates.push(f);
229 continue;
230 }
231 if !is_similar(
232 f,
233 &target_entities,
234 &target_text_words,
235 &target.assertion.assertion_type,
236 ) {
237 continue;
238 }
239 if !passes_filter(f, filter) {
240 continue;
241 }
242 candidates.push(f);
243 }
244
245 let constituents: Vec<ConsensusConstituent> = candidates
248 .iter()
249 .map(|f| {
250 let (n_repls, n_replicated, n_failed) = replication_tallies(project, &f.id);
251 let raw_score = f.confidence.score;
252 let adjusted_score = adjust_score_for_replications_and_review(
253 raw_score,
254 n_replicated,
255 n_failed,
256 f.flags.contested,
257 );
258 let weight = compute_weight(weighting, f, n_replicated, n_failed);
259 ConsensusConstituent {
260 finding_id: f.id.clone(),
261 assertion_text: f.assertion.text.clone(),
262 raw_score,
263 adjusted_score,
264 weight,
265 n_replications: n_repls,
266 n_replicated,
267 n_failed_replications: n_failed,
268 }
269 })
270 .collect();
271
272 let total_weight: f64 = constituents.iter().map(|c| c.weight).sum();
276 let consensus_confidence = if total_weight > 0.0 {
277 constituents
278 .iter()
279 .map(|c| c.adjusted_score * c.weight)
280 .sum::<f64>()
281 / total_weight
282 } else if !constituents.is_empty() {
283 constituents.iter().map(|c| c.adjusted_score).sum::<f64>() / constituents.len() as f64
284 } else {
285 0.0
286 };
287
288 let (credible_interval_lo, credible_interval_hi) =
289 weighted_credible_interval(&constituents, consensus_confidence, total_weight);
290
291 let filter_serialized = if filter.causal_claim.is_some() || filter.causal_grade_min.is_some() {
292 Some(filter.clone())
293 } else {
294 None
295 };
296
297 Some(ConsensusResult {
298 target: target.id.clone(),
299 target_assertion: target.assertion.text.clone(),
300 n_findings: constituents.len(),
301 consensus_confidence: round3(consensus_confidence),
302 credible_interval_lo: round3(credible_interval_lo),
303 credible_interval_hi: round3(credible_interval_hi),
304 constituents,
305 weighting: weighting.name().to_string(),
306 filter: filter_serialized,
307 })
308}
309
310fn is_similar(
311 candidate: &FindingBundle,
312 target_entities: &HashSet<String>,
313 target_text_words: &HashSet<String>,
314 target_type: &str,
315) -> bool {
316 let cand_entities: HashSet<String> = candidate
318 .assertion
319 .entities
320 .iter()
321 .map(|e| e.name.to_lowercase())
322 .collect();
323 let entity_overlap = !cand_entities.is_disjoint(target_entities);
324
325 let cand_text_words: HashSet<String> = candidate
327 .assertion
328 .text
329 .to_lowercase()
330 .split_whitespace()
331 .filter(|w| w.len() > 4)
332 .map(|w| w.trim_matches(|c: char| !c.is_alphanumeric()).to_string())
333 .filter(|w| !w.is_empty())
334 .collect();
335 let text_overlap = cand_text_words.intersection(target_text_words).count() >= 3;
336
337 let type_match = candidate.assertion.assertion_type == target_type;
339
340 let signals = [entity_overlap, text_overlap, type_match]
343 .iter()
344 .filter(|x| **x)
345 .count();
346 signals >= 2 || (entity_overlap && cand_entities.intersection(target_entities).count() >= 2)
347}
348
349fn replication_tallies(project: &Project, finding_id: &str) -> (usize, usize, usize) {
350 let mut total = 0usize;
351 let mut replicated = 0usize;
352 let mut failed = 0usize;
353 for r in &project.replications {
354 if r.target_finding == finding_id {
355 total += 1;
356 match r.outcome.as_str() {
357 "replicated" => replicated += 1,
358 "failed" => failed += 1,
359 _ => {}
360 }
361 }
362 }
363 (total, replicated, failed)
364}
365
366fn adjust_score_for_replications_and_review(
367 raw: f64,
368 n_replicated: usize,
369 n_failed: usize,
370 contested: bool,
371) -> f64 {
372 let mut adj = raw + 0.05 * n_replicated as f64 - 0.10 * n_failed as f64;
375 if contested {
376 adj *= 0.85;
377 }
378 adj.clamp(0.0, 1.0)
379}
380
381fn compute_weight(
382 scheme: WeightingScheme,
383 f: &FindingBundle,
384 n_replicated: usize,
385 n_failed: usize,
386) -> f64 {
387 let base = 1.0;
388 let replication_factor = 1.0 + 0.5 * n_replicated as f64 - 0.5 * n_failed as f64;
389 let citation_factor = 1.0 + (f.provenance.citation_count.unwrap_or(0) as f64).ln_1p() * 0.10;
390 match scheme {
391 WeightingScheme::Unweighted => base,
392 WeightingScheme::ReplicationWeighted => replication_factor.max(0.0),
393 WeightingScheme::CitationWeighted => citation_factor.max(0.0),
394 WeightingScheme::Composite => {
395 (0.2 * base + 0.5 * replication_factor.max(0.0) + 0.3 * citation_factor.max(0.0))
396 .max(0.0)
397 }
398 }
399}
400
401fn weighted_credible_interval(
402 constituents: &[ConsensusConstituent],
403 mean: f64,
404 total_weight: f64,
405) -> (f64, f64) {
406 if constituents.is_empty() || total_weight <= 0.0 {
407 return (mean, mean);
408 }
409 let var = constituents
411 .iter()
412 .map(|c| c.weight * (c.adjusted_score - mean).powi(2))
413 .sum::<f64>()
414 / total_weight;
415 let sd = var.sqrt();
416 let lo = (mean - 1.96 * sd).clamp(0.0, 1.0);
418 let hi = (mean + 1.96 * sd).clamp(0.0, 1.0);
419 (lo, hi)
420}
421
422fn round3(x: f64) -> f64 {
423 (x * 1000.0).round() / 1000.0
424}
425
426#[cfg(test)]
427mod v0_38_2_filter_tests {
428 use super::*;
429 use crate::bundle::*;
430 use crate::project;
431
432 fn finding(
433 id: &str,
434 claim: Option<CausalClaim>,
435 grade: Option<CausalEvidenceGrade>,
436 ) -> FindingBundle {
437 FindingBundle::new(
438 Assertion {
439 text: format!("X covaries with Y in {id}"),
440 assertion_type: "mechanism".into(),
441 entities: vec![
442 Entity {
443 name: "X".into(),
444 entity_type: "protein".into(),
445 identifiers: serde_json::Map::new(),
446 canonical_id: None,
447 candidates: vec![],
448 aliases: vec![],
449 resolution_provenance: None,
450 resolution_confidence: 1.0,
451 resolution_method: None,
452 species_context: None,
453 needs_review: false,
454 },
455 Entity {
456 name: "Y".into(),
457 entity_type: "protein".into(),
458 identifiers: serde_json::Map::new(),
459 canonical_id: None,
460 candidates: vec![],
461 aliases: vec![],
462 resolution_provenance: None,
463 resolution_confidence: 1.0,
464 resolution_method: None,
465 species_context: None,
466 needs_review: false,
467 },
468 ],
469 relation: Some("covaries_with".into()),
470 direction: Some("positive".into()),
471 causal_claim: claim,
472 causal_evidence_grade: grade,
473 },
474 Evidence {
475 evidence_type: "experimental".into(),
476 model_system: String::new(),
477 species: None,
478 method: String::new(),
479 sample_size: Some("n=100".into()),
480 effect_size: None,
481 p_value: None,
482 replicated: false,
483 replication_count: None,
484 evidence_spans: vec![],
485 },
486 Conditions {
487 text: String::new(),
488 species_verified: vec![],
489 species_unverified: vec![],
490 in_vitro: false,
491 in_vivo: false,
492 human_data: false,
493 clinical_trial: false,
494 concentration_range: None,
495 duration: None,
496 age_group: None,
497 cell_type: None,
498 },
499 Confidence::raw(0.7, "test", 0.85),
500 Provenance {
501 source_type: "published_paper".into(),
502 doi: None,
503 pmid: None,
504 pmc: None,
505 openalex_id: None,
506 url: None,
507 title: "Test".into(),
508 authors: vec![],
509 year: Some(2025),
510 journal: None,
511 license: None,
512 publisher: None,
513 funders: vec![],
514 extraction: Extraction::default(),
515 review: None,
516 citation_count: None,
517 },
518 Flags::default(),
519 )
520 }
521
522 fn make_project(findings: Vec<FindingBundle>) -> Project {
523 project::assemble("test", findings, 1, 0, "test")
524 }
525
526 #[test]
527 fn unfiltered_blends_all_similar_findings() {
528 let target = finding(
529 "a",
530 Some(CausalClaim::Correlation),
531 Some(CausalEvidenceGrade::Observational),
532 );
533 let interv = finding(
534 "b",
535 Some(CausalClaim::Intervention),
536 Some(CausalEvidenceGrade::Rct),
537 );
538 let target_id = target.id.clone();
539 let project = make_project(vec![target, interv]);
540 let result = consensus_for(&project, &target_id, WeightingScheme::Unweighted).unwrap();
541 assert_eq!(result.n_findings, 2);
543 assert!(result.filter.is_none());
544 }
545
546 #[test]
547 fn causal_claim_filter_keeps_only_matching_neighbors() {
548 let target = finding(
549 "a",
550 Some(CausalClaim::Correlation),
551 Some(CausalEvidenceGrade::Observational),
552 );
553 let interv = finding(
554 "b",
555 Some(CausalClaim::Intervention),
556 Some(CausalEvidenceGrade::Rct),
557 );
558 let target_id = target.id.clone();
559 let project = make_project(vec![target, interv]);
560 let filter = AggregateFilter {
561 causal_claim: Some(CausalClaim::Intervention),
562 causal_grade_min: None,
563 };
564 let result =
565 consensus_for_with_filter(&project, &target_id, WeightingScheme::Unweighted, &filter)
566 .unwrap();
567 assert_eq!(result.n_findings, 2);
571 }
574
575 #[test]
576 fn causal_claim_filter_excludes_non_matching_neighbors() {
577 let target = finding(
578 "a",
579 Some(CausalClaim::Intervention),
580 Some(CausalEvidenceGrade::Rct),
581 );
582 let neighbor_corr = finding(
583 "b",
584 Some(CausalClaim::Correlation),
585 Some(CausalEvidenceGrade::Observational),
586 );
587 let target_id = target.id.clone();
588 let project = make_project(vec![target, neighbor_corr]);
589 let filter = AggregateFilter {
590 causal_claim: Some(CausalClaim::Intervention),
591 causal_grade_min: None,
592 };
593 let result =
594 consensus_for_with_filter(&project, &target_id, WeightingScheme::Unweighted, &filter)
595 .unwrap();
596 assert_eq!(result.n_findings, 1);
598 assert_eq!(
599 result.filter.as_ref().unwrap().causal_claim,
600 Some(CausalClaim::Intervention)
601 );
602 }
603
604 #[test]
605 fn grade_min_excludes_lower_grades() {
606 let target = finding(
607 "a",
608 Some(CausalClaim::Mediation),
609 Some(CausalEvidenceGrade::Rct),
610 );
611 let neighbor_obs = finding(
612 "b",
613 Some(CausalClaim::Mediation),
614 Some(CausalEvidenceGrade::Observational),
615 );
616 let neighbor_rct = finding(
617 "c",
618 Some(CausalClaim::Mediation),
619 Some(CausalEvidenceGrade::Rct),
620 );
621 let target_id = target.id.clone();
622 let project = make_project(vec![target, neighbor_obs, neighbor_rct]);
623 let filter = AggregateFilter {
624 causal_claim: None,
625 causal_grade_min: Some(CausalEvidenceGrade::QuasiExperimental),
626 };
627 let result =
628 consensus_for_with_filter(&project, &target_id, WeightingScheme::Unweighted, &filter)
629 .unwrap();
630 assert_eq!(result.n_findings, 2);
632 }
633
634 #[test]
635 fn ungraded_findings_excluded_when_grade_min_set() {
636 let target = finding(
637 "a",
638 Some(CausalClaim::Mediation),
639 Some(CausalEvidenceGrade::Rct),
640 );
641 let neighbor_ungraded = finding("b", Some(CausalClaim::Mediation), None);
642 let target_id = target.id.clone();
643 let project = make_project(vec![target, neighbor_ungraded]);
644 let filter = AggregateFilter {
645 causal_claim: None,
646 causal_grade_min: Some(CausalEvidenceGrade::Observational),
647 };
648 let result =
649 consensus_for_with_filter(&project, &target_id, WeightingScheme::Unweighted, &filter)
650 .unwrap();
651 assert_eq!(result.n_findings, 1);
653 }
654
655 #[test]
656 fn grade_rank_orders_correctly() {
657 assert!(
658 grade_rank(CausalEvidenceGrade::Theoretical)
659 < grade_rank(CausalEvidenceGrade::Observational)
660 );
661 assert!(
662 grade_rank(CausalEvidenceGrade::Observational)
663 < grade_rank(CausalEvidenceGrade::QuasiExperimental)
664 );
665 assert!(
666 grade_rank(CausalEvidenceGrade::QuasiExperimental)
667 < grade_rank(CausalEvidenceGrade::Rct)
668 );
669 }
670}