1use chrono::{Datelike, Utc};
21
22use crate::bundle::{ConfidenceUpdate, FindingBundle};
23
24pub fn ground_confidence(bundles: &mut [FindingBundle]) -> Vec<ConfidenceUpdate> {
29 if bundles.is_empty() {
30 return Vec::new();
31 }
32
33 let mut citation_counts: Vec<u64> = bundles
35 .iter()
36 .filter_map(|b| b.provenance.citation_count)
37 .collect();
38 citation_counts.sort_unstable();
39
40 let p90 = percentile_value(&citation_counts, 90);
41 let p10 = percentile_value(&citation_counts, 10);
42
43 let current_year = Utc::now().naive_utc().year();
44 let mut updates: Vec<ConfidenceUpdate> = Vec::new();
45 let now = Utc::now().to_rfc3339();
46
47 for bundle in bundles.iter_mut() {
48 let prior_score = bundle.confidence.score;
49 let mut adjustment = 0.0f64;
50 let mut basis_parts: Vec<String> = vec![format!("pre_calibration: {:.2}", prior_score)];
51
52 if let Some(cites) = bundle.provenance.citation_count {
54 let log_signal = (cites as f64 + 1.0).log10() / 4.0; let citation_adj = if cites >= p90 {
56 log_signal.min(0.15)
57 } else if cites <= p10 {
58 -(0.10f64.min(0.15 - log_signal))
59 } else {
60 (log_signal - 0.3).clamp(-0.05, 0.10)
61 };
62 adjustment += citation_adj;
63 basis_parts.push(format!("citations: {} ({:+.2})", cites, citation_adj));
64 }
65
66 if let Some(year) = bundle.provenance.year {
68 let age = current_year - year;
69 let recency_adj = if age <= 3 {
70 0.05
71 } else if age <= 10 {
72 0.0
73 } else {
74 -0.05
75 };
76 adjustment += recency_adj;
77 basis_parts.push(format!("recency: {} ({:+.2})", year, recency_adj));
78 }
79
80 let etype = bundle.evidence.evidence_type.as_str();
82 let etype_adj = match etype {
83 "meta_analysis" | "systematic_review" => 0.10,
84 "experimental" if bundle.conditions.human_data => 0.05,
85 "experimental" => 0.0,
86 "observational" => 0.0,
87 "theoretical" | "computational" => -0.05,
88 _ => 0.0,
89 };
90 adjustment += etype_adj;
91 basis_parts.push(format!("evidence: {} ({:+.2})", etype, etype_adj));
92
93 let span_adj = if !bundle.evidence.evidence_spans.is_empty() {
95 0.05
96 } else {
97 -0.05
98 };
99 adjustment += span_adj;
100
101 let calibrated = (prior_score + adjustment).clamp(0.0, 1.0);
103 let final_score = (0.6 * prior_score + 0.4 * calibrated).clamp(0.05, 0.99);
104
105 let final_score = (final_score * 1000.0).round() / 1000.0;
107
108 basis_parts.push(format!("calibration: {:+.2}", adjustment));
109 basis_parts.push(format!("-> {:.3}", final_score));
110 bundle.confidence.basis = basis_parts.join(", ");
111 if let Some(components) = bundle.confidence.components.as_mut() {
112 components.calibration_adjustment = adjustment;
113 }
114
115 if (final_score - prior_score).abs() > 0.001 {
116 updates.push(ConfidenceUpdate {
117 finding_id: bundle.id.clone(),
118 previous_score: prior_score,
119 new_score: final_score,
120 basis: bundle.confidence.basis.clone(),
121 updated_by: "grounding_pass".into(),
122 updated_at: now.clone(),
123 });
124 }
125
126 bundle.confidence.score = final_score;
127 }
128
129 updates
130}
131
132fn percentile_value(sorted: &[u64], pct: usize) -> u64 {
134 if sorted.is_empty() {
135 return 0;
136 }
137 let idx = (pct * sorted.len() / 100).min(sorted.len() - 1);
138 sorted[idx]
139}
140
141#[cfg(test)]
142mod tests {
143 use super::*;
144 use crate::bundle::*;
145
146 fn make_bundle(score: f64, citations: u64, year: i32, etype: &str) -> FindingBundle {
147 FindingBundle {
148 id: "test".into(),
149 version: 1,
150 previous_version: None,
151 assertion: Assertion {
152 text: "Test assertion".into(),
153 assertion_type: "mechanism".into(),
154 entities: vec![],
155 relation: None,
156 direction: None,
157 causal_claim: None,
158 causal_evidence_grade: None,
159 },
160 evidence: Evidence {
161 evidence_type: etype.into(),
162 model_system: String::new(),
163 species: None,
164 method: String::new(),
165 sample_size: None,
166 effect_size: None,
167 p_value: None,
168 replicated: false,
169 replication_count: None,
170 evidence_spans: vec![],
171 },
172 conditions: Conditions {
173 text: String::new(),
174 species_verified: vec![],
175 species_unverified: vec![],
176 in_vitro: false,
177 in_vivo: false,
178 human_data: false,
179 clinical_trial: false,
180 concentration_range: None,
181 duration: None,
182 age_group: None,
183 cell_type: None,
184 },
185 confidence: Confidence {
186 kind: crate::bundle::ConfidenceKind::FrontierEpistemic,
187 score,
188 basis: "seeded prior".into(),
189 method: crate::bundle::ConfidenceMethod::LlmInitial,
190 components: None,
191 extraction_confidence: 0.85,
192 },
193 provenance: Provenance {
194 source_type: "published_paper".into(),
195 doi: None,
196 pmid: None,
197 pmc: None,
198 openalex_id: None,
199 url: None,
200 title: "Test".into(),
201 authors: vec![],
202 year: Some(year),
203 journal: None,
204 license: None,
205 publisher: None,
206 funders: vec![],
207 extraction: Extraction::default(),
208 review: None,
209 citation_count: Some(citations),
210 },
211 flags: Flags {
212 gap: false,
213 negative_space: false,
214 contested: false,
215 retracted: false,
216 declining: false,
217 gravity_well: false,
218 review_state: None,
219 superseded: false,
220 signature_threshold: None,
221 jointly_accepted: false,
222 },
223 links: vec![],
224 annotations: vec![],
225 attachments: vec![],
226 created: String::new(),
227 updated: None,
228
229 access_tier: crate::access_tier::AccessTier::Public,
230 }
231 }
232
233 #[test]
234 fn high_citations_boost() {
235 let mut bundles = vec![
236 make_bundle(0.70, 5000, 2024, "meta_analysis"),
237 make_bundle(0.70, 2, 2010, "theoretical"),
238 ];
239 let updates = ground_confidence(&mut bundles);
240 assert!(bundles[0].confidence.score > bundles[1].confidence.score);
242 assert!(!updates.is_empty());
244 }
245
246 #[test]
247 fn scores_clamped() {
248 let mut bundles = vec![make_bundle(0.99, 10000, 2025, "meta_analysis")];
249 let _updates = ground_confidence(&mut bundles);
250 assert!(bundles[0].confidence.score <= 0.99);
251 assert!(bundles[0].confidence.score >= 0.05);
252 }
253
254 #[test]
255 fn recency_bonus_for_recent_papers() {
256 let current_year = Utc::now().naive_utc().year();
257 let recent_year = current_year - 1; let mut bundles = vec![
259 make_bundle(0.70, 100, recent_year, "experimental"),
260 make_bundle(0.70, 100, current_year - 15, "experimental"), ];
262 ground_confidence(&mut bundles);
263 assert!(bundles[0].confidence.score > bundles[1].confidence.score);
265 }
266
267 #[test]
268 fn recency_penalty_for_old_papers() {
269 let current_year = Utc::now().naive_utc().year();
270 let old_year = current_year - 20; let mid_year = current_year - 5; let mut bundles = vec![
273 make_bundle(0.70, 100, mid_year, "experimental"),
274 make_bundle(0.70, 100, old_year, "experimental"),
275 ];
276 ground_confidence(&mut bundles);
277 assert!(bundles[0].confidence.score > bundles[1].confidence.score);
279 }
280
281 #[test]
282 fn meta_analysis_boosted_over_theoretical() {
283 let current_year = Utc::now().naive_utc().year();
284 let mut bundles = vec![
285 make_bundle(0.70, 100, current_year - 5, "meta_analysis"),
286 make_bundle(0.70, 100, current_year - 5, "theoretical"),
287 ];
288 ground_confidence(&mut bundles);
289 assert!(bundles[0].confidence.score > bundles[1].confidence.score);
291 }
292
293 #[test]
294 fn experimental_human_data_boost() {
295 let current_year = Utc::now().naive_utc().year();
296 let mut b_human = make_bundle(0.70, 100, current_year - 5, "experimental");
297 b_human.conditions.human_data = true;
298 let b_animal = make_bundle(0.70, 100, current_year - 5, "experimental");
299 let mut bundles = vec![b_human, b_animal];
300 ground_confidence(&mut bundles);
301 assert!(bundles[0].confidence.score > bundles[1].confidence.score);
303 }
304
305 #[test]
306 fn evidence_span_bonus() {
307 let current_year = Utc::now().naive_utc().year();
308 let mut b_with_span = make_bundle(0.70, 100, current_year - 5, "experimental");
309 b_with_span.evidence.evidence_spans = vec![serde_json::json!({"text": "some evidence"})];
310 let b_without = make_bundle(0.70, 100, current_year - 5, "experimental");
311 let mut bundles = vec![b_with_span, b_without];
312 ground_confidence(&mut bundles);
313 assert!(bundles[0].confidence.score > bundles[1].confidence.score);
315 }
316
317 #[test]
318 fn empty_bundles_returns_empty() {
319 let mut bundles: Vec<FindingBundle> = vec![];
320 let updates = ground_confidence(&mut bundles);
321 assert!(updates.is_empty());
322 }
323
324 #[test]
325 fn score_never_exceeds_bounds() {
326 let mut bundles = vec![make_bundle(0.05, 0, 1990, "theoretical")];
328 ground_confidence(&mut bundles);
329 assert!(bundles[0].confidence.score >= 0.05);
330 assert!(bundles[0].confidence.score <= 0.99);
331
332 let current_year = Utc::now().naive_utc().year();
334 let mut b = make_bundle(0.99, 10000, current_year, "meta_analysis");
335 b.evidence.evidence_spans = vec![serde_json::json!({"text": "span"})];
336 let mut bundles2 = vec![b];
337 ground_confidence(&mut bundles2);
338 assert!(bundles2[0].confidence.score >= 0.05);
339 assert!(bundles2[0].confidence.score <= 0.99);
340 }
341
342 #[test]
343 fn update_records_have_correct_fields() {
344 let current_year = Utc::now().naive_utc().year();
345 let mut bundles = vec![make_bundle(0.70, 5000, current_year, "meta_analysis")];
346 let updates = ground_confidence(&mut bundles);
347 assert!(!updates.is_empty());
348 let u = &updates[0];
349 assert_eq!(u.finding_id, "test");
350 assert_eq!(u.previous_score, 0.70);
351 assert_eq!(u.updated_by, "grounding_pass");
352 assert!(!u.updated_at.is_empty());
353 assert!(!u.basis.is_empty());
354 }
355
356 #[test]
357 fn basis_string_populated() {
358 let current_year = Utc::now().naive_utc().year();
359 let mut bundles = vec![make_bundle(0.70, 100, current_year, "experimental")];
360 ground_confidence(&mut bundles);
361 let basis = &bundles[0].confidence.basis;
362 assert!(basis.contains("pre_calibration:"));
363 assert!(basis.contains("citations:"));
364 assert!(basis.contains("recency:"));
365 assert!(basis.contains("evidence:"));
366 assert!(basis.contains("calibration:"));
367 }
368
369 #[test]
370 fn computed_components_capture_calibration_adjustment() {
371 let current_year = Utc::now().naive_utc().year();
372 let mut bundle = make_bundle(0.70, 5000, current_year, "meta_analysis");
373 bundle.confidence =
374 crate::bundle::compute_confidence(&bundle.evidence, &bundle.conditions, false);
375 let mut bundles = vec![bundle];
376 ground_confidence(&mut bundles);
377 let components = bundles[0].confidence.components.as_ref().unwrap();
378 assert!(components.calibration_adjustment > 0.0);
379 }
380
381 #[test]
382 fn percentile_value_works() {
383 assert_eq!(percentile_value(&[], 90), 0);
384 assert_eq!(percentile_value(&[10], 50), 10);
385 assert_eq!(percentile_value(&[1, 2, 3, 4, 5, 6, 7, 8, 9, 10], 90), 10);
386 assert_eq!(percentile_value(&[1, 2, 3, 4, 5, 6, 7, 8, 9, 10], 10), 2);
387 }
388
389 #[test]
390 fn no_citation_count_still_works() {
391 let current_year = Utc::now().naive_utc().year();
392 let mut b = make_bundle(0.70, 0, current_year, "experimental");
393 b.provenance.citation_count = None;
394 let mut bundles = vec![b];
395 let _updates = ground_confidence(&mut bundles);
396 assert!(bundles[0].confidence.score >= 0.05);
398 assert!(bundles[0].confidence.score <= 0.99);
399 }
400
401 #[test]
402 fn observational_is_neutral() {
403 let current_year = Utc::now().naive_utc().year();
404 let b_obs = make_bundle(0.70, 100, current_year - 5, "observational");
405 let b_exp = make_bundle(0.70, 100, current_year - 5, "experimental");
406 let mut bundles = vec![b_obs, b_exp];
408 ground_confidence(&mut bundles);
409 assert!((bundles[0].confidence.score - bundles[1].confidence.score).abs() < 0.001);
411 }
412
413 #[test]
414 fn systematic_review_boosted() {
415 let current_year = Utc::now().naive_utc().year();
416 let mut bundles = vec![
417 make_bundle(0.70, 100, current_year - 5, "systematic_review"),
418 make_bundle(0.70, 100, current_year - 5, "experimental"),
419 ];
420 ground_confidence(&mut bundles);
421 assert!(bundles[0].confidence.score > bundles[1].confidence.score);
422 }
423}