vela_protocol/
confidence.rs

1//! Citation-grounded frontier confidence calibration.
2//!
3//! Adjusts existing frontier epistemic confidence scores using calibration signals:
4//! citation count, recency, evidence type, and evidence span availability.
5//!
6//! ## Integration
7//!
8//! Add to main.rs:
9//! ```ignore
10//! mod confidence;
11//! ```
12//!
13//! Insert in the compile pipeline after normalization, before resolve/link:
14//! ```ignore
15//! println!("[X/N] Calibrating confidence...");
16//! let adjustments = confidence::ground_confidence(&mut all_bundles);
17//! println!("  -> {adjustments} findings adjusted");
18//! ```
19
20use chrono::{Datelike, Utc};
21
22use crate::bundle::{ConfidenceUpdate, FindingBundle};
23
24/// Calibrate confidence scores on all bundles using citation, recency, evidence,
25/// and span signals. Returns a vector of ConfidenceUpdate records (one per
26/// bundle whose score changed). Also mutates each bundle's confidence in place
27/// for backward compatibility.
28pub fn ground_confidence(bundles: &mut [FindingBundle]) -> Vec<ConfidenceUpdate> {
29    if bundles.is_empty() {
30        return Vec::new();
31    }
32
33    // Compute citation percentiles across the corpus.
34    let mut citation_counts: Vec<u64> = bundles
35        .iter()
36        .filter_map(|b| b.provenance.citation_count)
37        .collect();
38    citation_counts.sort_unstable();
39
40    let p90 = percentile_value(&citation_counts, 90);
41    let p10 = percentile_value(&citation_counts, 10);
42
43    let current_year = Utc::now().naive_utc().year();
44    let mut updates: Vec<ConfidenceUpdate> = Vec::new();
45    let now = Utc::now().to_rfc3339();
46
47    for bundle in bundles.iter_mut() {
48        let prior_score = bundle.confidence.score;
49        let mut adjustment = 0.0f64;
50        let mut basis_parts: Vec<String> = vec![format!("pre_calibration: {:.2}", prior_score)];
51
52        // Factor 1: Citation count (log-scaled, clamped to -0.15 .. +0.15).
53        if let Some(cites) = bundle.provenance.citation_count {
54            let log_signal = (cites as f64 + 1.0).log10() / 4.0; // 0..~1 for 0..9999
55            let citation_adj = if cites >= p90 {
56                log_signal.min(0.15)
57            } else if cites <= p10 {
58                -(0.10f64.min(0.15 - log_signal))
59            } else {
60                (log_signal - 0.3).clamp(-0.05, 0.10)
61            };
62            adjustment += citation_adj;
63            basis_parts.push(format!("citations: {} ({:+.2})", cites, citation_adj));
64        }
65
66        // Factor 2: Recency.
67        if let Some(year) = bundle.provenance.year {
68            let age = current_year - year;
69            let recency_adj = if age <= 3 {
70                0.05
71            } else if age <= 10 {
72                0.0
73            } else {
74                -0.05
75            };
76            adjustment += recency_adj;
77            basis_parts.push(format!("recency: {} ({:+.2})", year, recency_adj));
78        }
79
80        // Factor 3: Evidence type weighting.
81        let etype = bundle.evidence.evidence_type.as_str();
82        let etype_adj = match etype {
83            "meta_analysis" | "systematic_review" => 0.10,
84            "experimental" if bundle.conditions.human_data => 0.05,
85            "experimental" => 0.0,
86            "observational" => 0.0,
87            "theoretical" | "computational" => -0.05,
88            _ => 0.0,
89        };
90        adjustment += etype_adj;
91        basis_parts.push(format!("evidence: {} ({:+.2})", etype, etype_adj));
92
93        // Factor 4: Evidence spans (auditable extraction).
94        let span_adj = if !bundle.evidence.evidence_spans.is_empty() {
95            0.05
96        } else {
97            -0.05
98        };
99        adjustment += span_adj;
100
101        // Weighted combination: 60% LLM, 40% grounded adjustment.
102        let calibrated = (prior_score + adjustment).clamp(0.0, 1.0);
103        let final_score = (0.6 * prior_score + 0.4 * calibrated).clamp(0.05, 0.99);
104
105        // Round to 3 decimal places.
106        let final_score = (final_score * 1000.0).round() / 1000.0;
107
108        basis_parts.push(format!("calibration: {:+.2}", adjustment));
109        basis_parts.push(format!("-> {:.3}", final_score));
110        bundle.confidence.basis = basis_parts.join(", ");
111        if let Some(components) = bundle.confidence.components.as_mut() {
112            components.calibration_adjustment = adjustment;
113        }
114
115        if (final_score - prior_score).abs() > 0.001 {
116            updates.push(ConfidenceUpdate {
117                finding_id: bundle.id.clone(),
118                previous_score: prior_score,
119                new_score: final_score,
120                basis: bundle.confidence.basis.clone(),
121                updated_by: "grounding_pass".into(),
122                updated_at: now.clone(),
123            });
124        }
125
126        bundle.confidence.score = final_score;
127    }
128
129    updates
130}
131
132/// Return the value at the given percentile (0-100) from a sorted slice.
133fn percentile_value(sorted: &[u64], pct: usize) -> u64 {
134    if sorted.is_empty() {
135        return 0;
136    }
137    let idx = (pct * sorted.len() / 100).min(sorted.len() - 1);
138    sorted[idx]
139}
140
141#[cfg(test)]
142mod tests {
143    use super::*;
144    use crate::bundle::*;
145
146    fn make_bundle(score: f64, citations: u64, year: i32, etype: &str) -> FindingBundle {
147        FindingBundle {
148            id: "test".into(),
149            version: 1,
150            previous_version: None,
151            assertion: Assertion {
152                text: "Test assertion".into(),
153                assertion_type: "mechanism".into(),
154                entities: vec![],
155                relation: None,
156                direction: None,
157                causal_claim: None,
158                causal_evidence_grade: None,
159            },
160            evidence: Evidence {
161                evidence_type: etype.into(),
162                model_system: String::new(),
163                species: None,
164                method: String::new(),
165                sample_size: None,
166                effect_size: None,
167                p_value: None,
168                replicated: false,
169                replication_count: None,
170                evidence_spans: vec![],
171            },
172            conditions: Conditions {
173                text: String::new(),
174                species_verified: vec![],
175                species_unverified: vec![],
176                in_vitro: false,
177                in_vivo: false,
178                human_data: false,
179                clinical_trial: false,
180                concentration_range: None,
181                duration: None,
182                age_group: None,
183                cell_type: None,
184            },
185            confidence: Confidence {
186                kind: crate::bundle::ConfidenceKind::FrontierEpistemic,
187                score,
188                basis: "seeded prior".into(),
189                method: crate::bundle::ConfidenceMethod::LlmInitial,
190                components: None,
191                extraction_confidence: 0.85,
192            },
193            provenance: Provenance {
194                source_type: "published_paper".into(),
195                doi: None,
196                pmid: None,
197                pmc: None,
198                openalex_id: None,
199                url: None,
200                title: "Test".into(),
201                authors: vec![],
202                year: Some(year),
203                journal: None,
204                license: None,
205                publisher: None,
206                funders: vec![],
207                extraction: Extraction::default(),
208                review: None,
209                citation_count: Some(citations),
210            },
211            flags: Flags {
212                gap: false,
213                negative_space: false,
214                contested: false,
215                retracted: false,
216                declining: false,
217                gravity_well: false,
218                review_state: None,
219                superseded: false,
220                signature_threshold: None,
221                jointly_accepted: false,
222            },
223            links: vec![],
224            annotations: vec![],
225            attachments: vec![],
226            created: String::new(),
227            updated: None,
228
229            access_tier: crate::access_tier::AccessTier::Public,
230        }
231    }
232
233    #[test]
234    fn high_citations_boost() {
235        let mut bundles = vec![
236            make_bundle(0.70, 5000, 2024, "meta_analysis"),
237            make_bundle(0.70, 2, 2010, "theoretical"),
238        ];
239        let updates = ground_confidence(&mut bundles);
240        // Highly cited meta-analysis should score higher than low-cited theoretical.
241        assert!(bundles[0].confidence.score > bundles[1].confidence.score);
242        // Should return update records for changed scores.
243        assert!(!updates.is_empty());
244    }
245
246    #[test]
247    fn scores_clamped() {
248        let mut bundles = vec![make_bundle(0.99, 10000, 2025, "meta_analysis")];
249        let _updates = ground_confidence(&mut bundles);
250        assert!(bundles[0].confidence.score <= 0.99);
251        assert!(bundles[0].confidence.score >= 0.05);
252    }
253
254    #[test]
255    fn recency_bonus_for_recent_papers() {
256        let current_year = Utc::now().naive_utc().year();
257        let recent_year = current_year - 1; // within 3 years
258        let mut bundles = vec![
259            make_bundle(0.70, 100, recent_year, "experimental"),
260            make_bundle(0.70, 100, current_year - 15, "experimental"), // old paper
261        ];
262        ground_confidence(&mut bundles);
263        // Recent paper should score higher due to recency bonus (+0.05 vs -0.05)
264        assert!(bundles[0].confidence.score > bundles[1].confidence.score);
265    }
266
267    #[test]
268    fn recency_penalty_for_old_papers() {
269        let current_year = Utc::now().naive_utc().year();
270        let old_year = current_year - 20; // > 10 years old
271        let mid_year = current_year - 5; // 3-10 years: neutral
272        let mut bundles = vec![
273            make_bundle(0.70, 100, mid_year, "experimental"),
274            make_bundle(0.70, 100, old_year, "experimental"),
275        ];
276        ground_confidence(&mut bundles);
277        // Mid-age paper (neutral recency) should score higher than old paper (penalized)
278        assert!(bundles[0].confidence.score > bundles[1].confidence.score);
279    }
280
281    #[test]
282    fn meta_analysis_boosted_over_theoretical() {
283        let current_year = Utc::now().naive_utc().year();
284        let mut bundles = vec![
285            make_bundle(0.70, 100, current_year - 5, "meta_analysis"),
286            make_bundle(0.70, 100, current_year - 5, "theoretical"),
287        ];
288        ground_confidence(&mut bundles);
289        // meta_analysis gets +0.10, theoretical gets -0.05
290        assert!(bundles[0].confidence.score > bundles[1].confidence.score);
291    }
292
293    #[test]
294    fn experimental_human_data_boost() {
295        let current_year = Utc::now().naive_utc().year();
296        let mut b_human = make_bundle(0.70, 100, current_year - 5, "experimental");
297        b_human.conditions.human_data = true;
298        let b_animal = make_bundle(0.70, 100, current_year - 5, "experimental");
299        let mut bundles = vec![b_human, b_animal];
300        ground_confidence(&mut bundles);
301        // experimental + human_data gets +0.05, experimental alone gets 0.0
302        assert!(bundles[0].confidence.score > bundles[1].confidence.score);
303    }
304
305    #[test]
306    fn evidence_span_bonus() {
307        let current_year = Utc::now().naive_utc().year();
308        let mut b_with_span = make_bundle(0.70, 100, current_year - 5, "experimental");
309        b_with_span.evidence.evidence_spans = vec![serde_json::json!({"text": "some evidence"})];
310        let b_without = make_bundle(0.70, 100, current_year - 5, "experimental");
311        let mut bundles = vec![b_with_span, b_without];
312        ground_confidence(&mut bundles);
313        // With spans gets +0.05, without gets -0.05
314        assert!(bundles[0].confidence.score > bundles[1].confidence.score);
315    }
316
317    #[test]
318    fn empty_bundles_returns_empty() {
319        let mut bundles: Vec<FindingBundle> = vec![];
320        let updates = ground_confidence(&mut bundles);
321        assert!(updates.is_empty());
322    }
323
324    #[test]
325    fn score_never_exceeds_bounds() {
326        // Very low initial score with all negative adjustments
327        let mut bundles = vec![make_bundle(0.05, 0, 1990, "theoretical")];
328        ground_confidence(&mut bundles);
329        assert!(bundles[0].confidence.score >= 0.05);
330        assert!(bundles[0].confidence.score <= 0.99);
331
332        // Very high initial score with all positive adjustments
333        let current_year = Utc::now().naive_utc().year();
334        let mut b = make_bundle(0.99, 10000, current_year, "meta_analysis");
335        b.evidence.evidence_spans = vec![serde_json::json!({"text": "span"})];
336        let mut bundles2 = vec![b];
337        ground_confidence(&mut bundles2);
338        assert!(bundles2[0].confidence.score >= 0.05);
339        assert!(bundles2[0].confidence.score <= 0.99);
340    }
341
342    #[test]
343    fn update_records_have_correct_fields() {
344        let current_year = Utc::now().naive_utc().year();
345        let mut bundles = vec![make_bundle(0.70, 5000, current_year, "meta_analysis")];
346        let updates = ground_confidence(&mut bundles);
347        assert!(!updates.is_empty());
348        let u = &updates[0];
349        assert_eq!(u.finding_id, "test");
350        assert_eq!(u.previous_score, 0.70);
351        assert_eq!(u.updated_by, "grounding_pass");
352        assert!(!u.updated_at.is_empty());
353        assert!(!u.basis.is_empty());
354    }
355
356    #[test]
357    fn basis_string_populated() {
358        let current_year = Utc::now().naive_utc().year();
359        let mut bundles = vec![make_bundle(0.70, 100, current_year, "experimental")];
360        ground_confidence(&mut bundles);
361        let basis = &bundles[0].confidence.basis;
362        assert!(basis.contains("pre_calibration:"));
363        assert!(basis.contains("citations:"));
364        assert!(basis.contains("recency:"));
365        assert!(basis.contains("evidence:"));
366        assert!(basis.contains("calibration:"));
367    }
368
369    #[test]
370    fn computed_components_capture_calibration_adjustment() {
371        let current_year = Utc::now().naive_utc().year();
372        let mut bundle = make_bundle(0.70, 5000, current_year, "meta_analysis");
373        bundle.confidence =
374            crate::bundle::compute_confidence(&bundle.evidence, &bundle.conditions, false);
375        let mut bundles = vec![bundle];
376        ground_confidence(&mut bundles);
377        let components = bundles[0].confidence.components.as_ref().unwrap();
378        assert!(components.calibration_adjustment > 0.0);
379    }
380
381    #[test]
382    fn percentile_value_works() {
383        assert_eq!(percentile_value(&[], 90), 0);
384        assert_eq!(percentile_value(&[10], 50), 10);
385        assert_eq!(percentile_value(&[1, 2, 3, 4, 5, 6, 7, 8, 9, 10], 90), 10);
386        assert_eq!(percentile_value(&[1, 2, 3, 4, 5, 6, 7, 8, 9, 10], 10), 2);
387    }
388
389    #[test]
390    fn no_citation_count_still_works() {
391        let current_year = Utc::now().naive_utc().year();
392        let mut b = make_bundle(0.70, 0, current_year, "experimental");
393        b.provenance.citation_count = None;
394        let mut bundles = vec![b];
395        let _updates = ground_confidence(&mut bundles);
396        // Should not panic; score should still be valid
397        assert!(bundles[0].confidence.score >= 0.05);
398        assert!(bundles[0].confidence.score <= 0.99);
399    }
400
401    #[test]
402    fn observational_is_neutral() {
403        let current_year = Utc::now().naive_utc().year();
404        let b_obs = make_bundle(0.70, 100, current_year - 5, "observational");
405        let b_exp = make_bundle(0.70, 100, current_year - 5, "experimental");
406        // Both same conditions otherwise
407        let mut bundles = vec![b_obs, b_exp];
408        ground_confidence(&mut bundles);
409        // Both should be equal since observational and experimental (non-human) both get 0.0
410        assert!((bundles[0].confidence.score - bundles[1].confidence.score).abs() < 0.001);
411    }
412
413    #[test]
414    fn systematic_review_boosted() {
415        let current_year = Utc::now().naive_utc().year();
416        let mut bundles = vec![
417            make_bundle(0.70, 100, current_year - 5, "systematic_review"),
418            make_bundle(0.70, 100, current_year - 5, "experimental"),
419        ];
420        ground_confidence(&mut bundles);
421        assert!(bundles[0].confidence.score > bundles[1].confidence.score);
422    }
423}
vela_protocol/confidence.rs

vela_protocol/
confidence.rs