1use std::collections::BTreeMap;
4
5use chrono::Utc;
6use serde::{Deserialize, Serialize};
7use serde_json::Value;
8use sha2::{Digest, Sha256};
9
10pub const VALID_ENTITY_TYPES: &[&str] = &[
18 "gene",
20 "protein",
21 "compound",
22 "disease",
23 "cell_type",
24 "organism",
25 "pathway",
26 "assay",
27 "anatomical_structure",
28 "particle",
30 "instrument",
31 "dataset",
32 "quantity",
33 "other",
35];
36
37pub const VALID_ASSERTION_TYPES: &[&str] = &[
43 "mechanism",
44 "therapeutic",
45 "diagnostic",
46 "epidemiological",
47 "observational",
48 "review",
49 "methodological",
50 "computational",
51 "theoretical",
52 "negative",
53 "measurement",
55 "exclusion",
56 "tension",
66 "open_question",
67 "hypothesis",
68 "candidate_finding",
69];
70
71pub const VALID_ARTIFACT_KINDS: &[&str] = &[
77 "dataset",
78 "clinical_trial_record",
79 "protocol",
80 "supplement",
81 "notebook",
82 "code",
83 "model_output",
84 "table",
85 "figure",
86 "registry_record",
87 "lab_file",
88 "source_file",
89 "other",
90];
91
92pub fn valid_artifact_kind(kind: &str) -> bool {
93 VALID_ARTIFACT_KINDS.contains(&kind)
94}
95
96pub const VALID_EVIDENCE_TYPES: &[&str] = &[
98 "experimental",
99 "observational",
100 "computational",
101 "theoretical",
102 "meta_analysis",
103 "systematic_review",
104 "case_report",
105 "extracted_from_notes",
109];
110
111pub const VALID_PROVENANCE_SOURCE_TYPES: &[&str] = &[
118 "published_paper",
119 "preprint",
120 "clinical_trial",
121 "lab_notebook",
122 "model_output",
123 "expert_assertion",
124 "database_record",
125 "data_release",
127 "researcher_notes",
132];
133
134pub const VALID_LINK_TYPES: &[&str] = &[
136 "supports",
137 "contradicts",
138 "extends",
139 "depends",
140 "replicates",
141 "supersedes",
142 "synthesized_from",
143];
144
145#[derive(Debug, Clone, Serialize, Deserialize)]
147pub struct ResolvedId {
148 pub source: String,
150 pub id: String,
152 pub confidence: f64,
154 #[serde(default, skip_serializing_if = "Option::is_none")]
156 pub matched_name: Option<String>,
157}
158
159#[derive(Debug, Clone, PartialEq, Eq, Serialize, Deserialize)]
161#[serde(rename_all = "snake_case")]
162pub enum ResolutionMethod {
163 ExactMatch,
164 FuzzyMatch,
165 LlmInference,
166 Manual,
167}
168
169impl std::fmt::Display for ResolutionMethod {
170 fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
171 match self {
172 ResolutionMethod::ExactMatch => write!(f, "exact_match"),
173 ResolutionMethod::FuzzyMatch => write!(f, "fuzzy_match"),
174 ResolutionMethod::LlmInference => write!(f, "llm_inference"),
175 ResolutionMethod::Manual => write!(f, "manual"),
176 }
177 }
178}
179
180#[derive(Debug, Clone, Serialize, Deserialize)]
181pub struct Entity {
182 pub name: String,
183 #[serde(rename = "type")]
184 pub entity_type: String,
185 #[serde(default)]
188 pub identifiers: serde_json::Map<String, serde_json::Value>,
189 #[serde(default, skip_serializing_if = "Option::is_none")]
191 pub canonical_id: Option<ResolvedId>,
192 #[serde(default)]
194 pub candidates: Vec<ResolvedId>,
195 #[serde(default)]
197 pub aliases: Vec<String>,
198 #[serde(default, skip_serializing_if = "Option::is_none")]
200 pub resolution_provenance: Option<String>,
201 #[serde(default = "default_one")]
202 pub resolution_confidence: f64,
203 #[serde(default, skip_serializing_if = "Option::is_none")]
205 pub resolution_method: Option<ResolutionMethod>,
206 #[serde(default, skip_serializing_if = "Option::is_none")]
208 pub species_context: Option<String>,
209 #[serde(default, skip_serializing_if = "std::ops::Not::not")]
211 pub needs_review: bool,
212}
213
214fn default_one() -> f64 {
215 1.0
216}
217
218#[derive(Debug, Clone, Serialize, Deserialize)]
219pub struct Evidence {
220 #[serde(rename = "type")]
221 pub evidence_type: String,
222 #[serde(default)]
223 pub model_system: String,
224 pub species: Option<String>,
225 #[serde(default)]
226 pub method: String,
227 pub sample_size: Option<String>,
228 pub effect_size: Option<String>,
229 pub p_value: Option<String>,
230 #[serde(default)]
231 pub replicated: bool,
232 pub replication_count: Option<u32>,
233 #[serde(default)]
234 pub evidence_spans: Vec<serde_json::Value>,
235}
236
237pub const VALID_REPLICATION_OUTCOMES: &[&str] =
245 &["replicated", "failed", "partial", "inconclusive"];
246
247#[derive(Debug, Clone, Serialize, Deserialize)]
265pub struct Replication {
266 pub id: String,
268 pub target_finding: String,
270 pub attempted_by: String,
273 pub outcome: String,
277 pub evidence: Evidence,
280 pub conditions: Conditions,
285 pub provenance: Provenance,
287 #[serde(default)]
290 pub notes: String,
291 pub created: String,
293 #[serde(default, skip_serializing_if = "Option::is_none")]
297 pub previous_attempt: Option<String>,
298}
299
300impl Replication {
301 pub fn content_address(
310 target_finding: &str,
311 attempted_by: &str,
312 conditions: &Conditions,
313 outcome: &str,
314 ) -> String {
315 let norm_conditions = FindingBundle::normalize_text(&conditions.text);
316 let preimage = format!(
317 "{}|{}|{}|{}",
318 target_finding, attempted_by, norm_conditions, outcome
319 );
320 let hash = Sha256::digest(preimage.as_bytes());
321 format!("vrep_{}", &hex::encode(hash)[..16])
322 }
323
324 pub fn new(
327 target_finding: impl Into<String>,
328 attempted_by: impl Into<String>,
329 outcome: impl Into<String>,
330 evidence: Evidence,
331 conditions: Conditions,
332 provenance: Provenance,
333 notes: impl Into<String>,
334 ) -> Self {
335 let target = target_finding.into();
336 let actor = attempted_by.into();
337 let oc = outcome.into();
338 let id = Self::content_address(&target, &actor, &conditions, &oc);
339 Self {
340 id,
341 target_finding: target,
342 attempted_by: actor,
343 outcome: oc,
344 evidence,
345 conditions,
346 provenance,
347 notes: notes.into(),
348 created: Utc::now().to_rfc3339(),
349 previous_attempt: None,
350 }
351 }
352}
353
354#[derive(Debug, Clone, Serialize, Deserialize)]
364#[serde(tag = "kind", rename_all = "snake_case")]
365pub enum ExpectedOutcome {
366 Affirmed,
367 Falsified,
368 Quantitative {
369 value: f64,
370 tolerance: f64,
371 units: String,
372 },
373 Categorical {
374 value: String,
375 },
376}
377
378impl ExpectedOutcome {
379 pub fn canonical(&self) -> String {
382 match self {
383 ExpectedOutcome::Affirmed => "affirmed".to_string(),
384 ExpectedOutcome::Falsified => "falsified".to_string(),
385 ExpectedOutcome::Quantitative {
386 value,
387 tolerance,
388 units,
389 } => format!("quant:{value}±{tolerance}{units}"),
390 ExpectedOutcome::Categorical { value } => format!("cat:{value}"),
391 }
392 }
393}
394
395#[derive(Debug, Clone, Serialize, Deserialize)]
416pub struct Prediction {
417 pub id: String,
419 pub claim_text: String,
421 #[serde(default)]
425 pub target_findings: Vec<String>,
426 pub predicted_at: String,
430 pub resolves_by: Option<String>,
433 pub resolution_criterion: String,
437 pub expected_outcome: ExpectedOutcome,
441 pub made_by: String,
443 pub confidence: f64,
446 pub conditions: Conditions,
450 #[serde(default, skip_serializing_if = "std::ops::Not::not")]
460 pub expired_unresolved: bool,
461}
462
463impl Prediction {
464 pub fn content_address(
468 claim_text: &str,
469 made_by: &str,
470 predicted_at: &str,
471 resolution_criterion: &str,
472 expected_outcome: &ExpectedOutcome,
473 ) -> String {
474 let preimage = format!(
475 "{}|{}|{}|{}|{}",
476 FindingBundle::normalize_text(claim_text),
477 made_by,
478 predicted_at,
479 FindingBundle::normalize_text(resolution_criterion),
480 expected_outcome.canonical(),
481 );
482 let hash = Sha256::digest(preimage.as_bytes());
483 format!("vpred_{}", &hex::encode(hash)[..16])
484 }
485
486 #[allow(clippy::too_many_arguments)]
489 pub fn new(
490 claim_text: impl Into<String>,
491 target_findings: Vec<String>,
492 predicted_at: Option<String>,
493 resolves_by: Option<String>,
494 resolution_criterion: impl Into<String>,
495 expected_outcome: ExpectedOutcome,
496 made_by: impl Into<String>,
497 confidence: f64,
498 conditions: Conditions,
499 ) -> Self {
500 let now = predicted_at.unwrap_or_else(|| Utc::now().to_rfc3339());
501 let claim = claim_text.into();
502 let crit = resolution_criterion.into();
503 let actor = made_by.into();
504 let id = Self::content_address(&claim, &actor, &now, &crit, &expected_outcome);
505 Self {
506 id,
507 claim_text: claim,
508 target_findings,
509 predicted_at: now,
510 resolves_by,
511 resolution_criterion: crit,
512 expected_outcome,
513 made_by: actor,
514 confidence,
515 conditions,
516 expired_unresolved: false,
517 }
518 }
519}
520
521#[derive(Debug, Clone, Serialize, Deserialize)]
528pub struct Resolution {
529 pub id: String,
531 pub prediction_id: String,
533 pub actual_outcome: String,
536 pub matched_expected: bool,
539 pub resolved_at: String,
541 pub resolved_by: String,
545 pub evidence: Evidence,
548 pub confidence: f64,
551}
552
553impl Resolution {
554 pub fn content_address(
558 prediction_id: &str,
559 actual_outcome: &str,
560 resolved_by: &str,
561 resolved_at: &str,
562 matched_expected: bool,
563 ) -> String {
564 let preimage = format!(
565 "{}|{}|{}|{}|{}",
566 prediction_id,
567 FindingBundle::normalize_text(actual_outcome),
568 resolved_by,
569 resolved_at,
570 matched_expected,
571 );
572 let hash = Sha256::digest(preimage.as_bytes());
573 format!("vres_{}", &hex::encode(hash)[..16])
574 }
575
576 pub fn new(
579 prediction_id: impl Into<String>,
580 actual_outcome: impl Into<String>,
581 matched_expected: bool,
582 resolved_by: impl Into<String>,
583 evidence: Evidence,
584 confidence: f64,
585 ) -> Self {
586 let now = Utc::now().to_rfc3339();
587 let pid = prediction_id.into();
588 let outcome = actual_outcome.into();
589 let resolver = resolved_by.into();
590 let id = Self::content_address(&pid, &outcome, &resolver, &now, matched_expected);
591 Self {
592 id,
593 prediction_id: pid,
594 actual_outcome: outcome,
595 matched_expected,
596 resolved_at: now,
597 resolved_by: resolver,
598 evidence,
599 confidence,
600 }
601 }
602}
603
604#[derive(Debug, Clone, Serialize, Deserialize)]
630#[serde(tag = "kind", rename_all = "snake_case")]
631pub enum NegativeResultKind {
632 RegisteredTrial {
634 endpoint: String,
636 intervention: String,
638 comparator: String,
640 population: String,
642 n_enrolled: u32,
644 power: f64,
649 effect_size_ci: (f64, f64),
654 #[serde(default, skip_serializing_if = "Option::is_none")]
660 effect_size_threshold: Option<f64>,
661 #[serde(default, skip_serializing_if = "Option::is_none")]
664 registry_id: Option<String>,
665 },
666 Exploratory {
669 reagent: String,
671 observation: String,
674 attempts: u32,
677 },
678}
679
680#[derive(Debug, Clone, Serialize, Deserialize)]
681pub struct NegativeResult {
682 pub id: String,
684 pub kind: NegativeResultKind,
686 #[serde(default)]
690 pub target_findings: Vec<String>,
691 pub deposited_by: String,
693 pub conditions: Conditions,
697 pub provenance: Provenance,
699 pub created: String,
701 #[serde(default)]
705 pub notes: String,
706 #[serde(default, skip_serializing_if = "Option::is_none")]
710 pub review_state: Option<ReviewState>,
711 #[serde(default, skip_serializing_if = "std::ops::Not::not")]
713 pub retracted: bool,
714 #[serde(default, skip_serializing_if = "is_public_tier")]
718 pub access_tier: crate::access_tier::AccessTier,
719}
720
721impl NegativeResultKind {
722 pub fn canonical(&self) -> String {
726 match self {
727 NegativeResultKind::RegisteredTrial {
728 endpoint,
729 intervention,
730 comparator,
731 population,
732 n_enrolled,
733 power,
734 effect_size_ci,
735 effect_size_threshold,
736 registry_id,
737 } => format!(
738 "trial|{}|{}|{}|{}|{}|{:.4}|{:.6},{:.6}|{}|{}",
739 FindingBundle::normalize_text(endpoint),
740 FindingBundle::normalize_text(intervention),
741 FindingBundle::normalize_text(comparator),
742 FindingBundle::normalize_text(population),
743 n_enrolled,
744 power,
745 effect_size_ci.0,
746 effect_size_ci.1,
747 effect_size_threshold
748 .map(|t| format!("{t:.6}"))
749 .unwrap_or_default(),
750 registry_id.clone().unwrap_or_default(),
751 ),
752 NegativeResultKind::Exploratory {
753 reagent,
754 observation,
755 attempts,
756 } => format!(
757 "exploratory|{}|{}|{}",
758 FindingBundle::normalize_text(reagent),
759 FindingBundle::normalize_text(observation),
760 attempts,
761 ),
762 }
763 }
764}
765
766impl NegativeResult {
767 pub fn content_address(
771 kind: &NegativeResultKind,
772 deposited_by: &str,
773 created: &str,
774 conditions: &Conditions,
775 ) -> String {
776 let preimage = format!(
777 "{}|{}|{}|{}",
778 kind.canonical(),
779 deposited_by,
780 created,
781 FindingBundle::normalize_text(&conditions.text),
782 );
783 let hash = Sha256::digest(preimage.as_bytes());
784 format!("vnr_{}", &hex::encode(hash)[..16])
785 }
786
787 pub fn new(
790 kind: NegativeResultKind,
791 target_findings: Vec<String>,
792 deposited_by: impl Into<String>,
793 conditions: Conditions,
794 provenance: Provenance,
795 notes: impl Into<String>,
796 ) -> Self {
797 let depositor = deposited_by.into();
798 let created = Utc::now().to_rfc3339();
799 let id = Self::content_address(&kind, &depositor, &created, &conditions);
800 Self {
801 id,
802 kind,
803 target_findings,
804 deposited_by: depositor,
805 conditions,
806 provenance,
807 created,
808 notes: notes.into(),
809 review_state: None,
810 retracted: false,
811 access_tier: crate::access_tier::AccessTier::Public,
812 }
813 }
814
815 pub fn is_informative_trial_null(&self) -> Option<bool> {
821 match &self.kind {
822 NegativeResultKind::RegisteredTrial {
823 power,
824 effect_size_ci,
825 effect_size_threshold,
826 ..
827 } => {
828 let threshold = (*effect_size_threshold)?;
829 Some(*power >= 0.8 && effect_size_ci.0 > -threshold && effect_size_ci.1 < threshold)
830 }
831 NegativeResultKind::Exploratory { .. } => None,
832 }
833 }
834}
835
836#[derive(Debug, Clone, Serialize, Deserialize, PartialEq, Eq)]
856#[serde(rename_all = "snake_case")]
857pub enum TrajectoryStepKind {
858 Hypothesis,
860 Tried,
862 RuledOut,
865 Observed,
868 Refined,
870}
871
872impl TrajectoryStepKind {
873 pub fn canonical(&self) -> &'static str {
874 match self {
875 TrajectoryStepKind::Hypothesis => "hypothesis",
876 TrajectoryStepKind::Tried => "tried",
877 TrajectoryStepKind::RuledOut => "ruled_out",
878 TrajectoryStepKind::Observed => "observed",
879 TrajectoryStepKind::Refined => "refined",
880 }
881 }
882}
883
884#[derive(Debug, Clone, Serialize, Deserialize)]
885pub struct TrajectoryStep {
886 pub id: String,
889 pub kind: TrajectoryStepKind,
891 pub description: String,
895 pub at: String,
897 pub actor: String,
901 #[serde(default)]
906 pub references: Vec<String>,
907}
908
909impl TrajectoryStep {
910 pub fn content_address(
914 trajectory_id: &str,
915 kind: &TrajectoryStepKind,
916 description: &str,
917 at: &str,
918 actor: &str,
919 ) -> String {
920 let preimage = format!(
921 "{}|{}|{}|{}|{}",
922 trajectory_id,
923 kind.canonical(),
924 FindingBundle::normalize_text(description),
925 at,
926 actor,
927 );
928 let hash = Sha256::digest(preimage.as_bytes());
929 format!("vts_{}", &hex::encode(hash)[..16])
930 }
931
932 pub fn new(
935 trajectory_id: &str,
936 kind: TrajectoryStepKind,
937 description: impl Into<String>,
938 actor: impl Into<String>,
939 at: Option<String>,
940 references: Vec<String>,
941 ) -> Self {
942 let at = at.unwrap_or_else(|| Utc::now().to_rfc3339());
943 let actor = actor.into();
944 let description = description.into();
945 let id = Self::content_address(trajectory_id, &kind, &description, &at, &actor);
946 Self {
947 id,
948 kind,
949 description,
950 at,
951 actor,
952 references,
953 }
954 }
955}
956
957#[derive(Debug, Clone, Serialize, Deserialize)]
958pub struct Trajectory {
959 pub id: String,
961 #[serde(default)]
965 pub target_findings: Vec<String>,
966 pub deposited_by: String,
969 pub created: String,
971 #[serde(default)]
974 pub steps: Vec<TrajectoryStep>,
975 #[serde(default)]
977 pub notes: String,
978 #[serde(default, skip_serializing_if = "Option::is_none")]
981 pub review_state: Option<ReviewState>,
982 #[serde(default, skip_serializing_if = "std::ops::Not::not")]
984 pub retracted: bool,
985 #[serde(default, skip_serializing_if = "is_public_tier")]
991 pub access_tier: crate::access_tier::AccessTier,
992}
993
994impl Trajectory {
995 pub fn content_address(
1004 target_findings: &[String],
1005 deposited_by: &str,
1006 created: &str,
1007 ) -> String {
1008 let mut sorted: Vec<&str> = target_findings.iter().map(String::as_str).collect();
1009 sorted.sort();
1010 let preimage = format!("{}|{}|{}", sorted.join(","), deposited_by, created);
1011 let hash = Sha256::digest(preimage.as_bytes());
1012 format!("vtr_{}", &hex::encode(hash)[..16])
1013 }
1014
1015 pub fn new(
1019 target_findings: Vec<String>,
1020 deposited_by: impl Into<String>,
1021 notes: impl Into<String>,
1022 ) -> Self {
1023 let depositor = deposited_by.into();
1024 let created = Utc::now().to_rfc3339();
1025 let id = Self::content_address(&target_findings, &depositor, &created);
1026 Self {
1027 id,
1028 target_findings,
1029 deposited_by: depositor,
1030 created,
1031 steps: Vec::new(),
1032 notes: notes.into(),
1033 review_state: None,
1034 retracted: false,
1035 access_tier: crate::access_tier::AccessTier::Public,
1036 }
1037 }
1038}
1039
1040#[derive(Debug, Clone, Serialize, Deserialize)]
1055pub struct Dataset {
1056 pub id: String,
1058 pub name: String,
1060 pub version: Option<String>,
1063 #[serde(default)]
1066 pub schema: Vec<(String, String)>,
1067 pub row_count: Option<u64>,
1069 pub content_hash: String,
1074 pub url: Option<String>,
1076 pub license: Option<String>,
1078 pub provenance: Provenance,
1082 pub created: String,
1084}
1085
1086impl Dataset {
1087 pub fn content_address(
1091 name: &str,
1092 version: Option<&str>,
1093 content_hash: &str,
1094 url: Option<&str>,
1095 ) -> String {
1096 let preimage = format!(
1097 "{}|{}|{}|{}",
1098 name,
1099 version.unwrap_or(""),
1100 content_hash,
1101 url.unwrap_or("")
1102 );
1103 let hash = Sha256::digest(preimage.as_bytes());
1104 format!("vd_{}", &hex::encode(hash)[..16])
1105 }
1106
1107 pub fn new(
1110 name: impl Into<String>,
1111 version: Option<String>,
1112 content_hash: impl Into<String>,
1113 url: Option<String>,
1114 license: Option<String>,
1115 provenance: Provenance,
1116 ) -> Self {
1117 let n = name.into();
1118 let h = content_hash.into();
1119 let id = Self::content_address(&n, version.as_deref(), &h, url.as_deref());
1120 Self {
1121 id,
1122 name: n,
1123 version,
1124 schema: Vec::new(),
1125 row_count: None,
1126 content_hash: h,
1127 url,
1128 license,
1129 provenance,
1130 created: Utc::now().to_rfc3339(),
1131 }
1132 }
1133}
1134
1135#[derive(Debug, Clone, Serialize, Deserialize)]
1150pub struct CodeArtifact {
1151 pub id: String,
1153 pub language: String,
1157 pub repo_url: Option<String>,
1159 pub git_commit: Option<String>,
1163 pub path: String,
1165 pub line_range: Option<(u32, u32)>,
1167 pub content_hash: String,
1171 pub entry_point: Option<String>,
1174 pub created: String,
1176}
1177
1178impl CodeArtifact {
1179 pub fn content_address(
1183 repo_url: Option<&str>,
1184 git_commit: Option<&str>,
1185 path: &str,
1186 line_range: Option<(u32, u32)>,
1187 content_hash: &str,
1188 ) -> String {
1189 let lr = line_range
1190 .map(|(a, b)| format!("{a}-{b}"))
1191 .unwrap_or_default();
1192 let preimage = format!(
1193 "{}|{}|{}|{}|{}",
1194 repo_url.unwrap_or(""),
1195 git_commit.unwrap_or(""),
1196 path,
1197 lr,
1198 content_hash
1199 );
1200 let hash = Sha256::digest(preimage.as_bytes());
1201 format!("vc_{}", &hex::encode(hash)[..16])
1202 }
1203
1204 pub fn new(
1207 language: impl Into<String>,
1208 repo_url: Option<String>,
1209 git_commit: Option<String>,
1210 path: impl Into<String>,
1211 line_range: Option<(u32, u32)>,
1212 content_hash: impl Into<String>,
1213 entry_point: Option<String>,
1214 ) -> Self {
1215 let p = path.into();
1216 let h = content_hash.into();
1217 let id = Self::content_address(
1218 repo_url.as_deref(),
1219 git_commit.as_deref(),
1220 &p,
1221 line_range,
1222 &h,
1223 );
1224 Self {
1225 id,
1226 language: language.into(),
1227 repo_url,
1228 git_commit,
1229 path: p,
1230 line_range,
1231 content_hash: h,
1232 entry_point,
1233 created: Utc::now().to_rfc3339(),
1234 }
1235 }
1236}
1237
1238#[derive(Debug, Clone, Serialize, Deserialize)]
1247pub struct Artifact {
1248 pub id: String,
1251 pub kind: String,
1253 pub name: String,
1255 pub content_hash: String,
1257 #[serde(default, skip_serializing_if = "Option::is_none")]
1259 pub size_bytes: Option<u64>,
1260 #[serde(default, skip_serializing_if = "Option::is_none")]
1262 pub media_type: Option<String>,
1263 pub storage_mode: String,
1265 #[serde(default, skip_serializing_if = "Option::is_none")]
1267 pub locator: Option<String>,
1268 #[serde(default, skip_serializing_if = "Option::is_none")]
1270 pub source_url: Option<String>,
1271 #[serde(default, skip_serializing_if = "Option::is_none")]
1273 pub license: Option<String>,
1274 #[serde(default, skip_serializing_if = "Vec::is_empty")]
1276 pub target_findings: Vec<String>,
1277 #[serde(default, skip_serializing_if = "Option::is_none")]
1280 pub source_id: Option<String>,
1281 pub provenance: Provenance,
1284 #[serde(default, skip_serializing_if = "BTreeMap::is_empty")]
1287 pub metadata: BTreeMap<String, Value>,
1288 #[serde(default, skip_serializing_if = "Option::is_none")]
1290 pub review_state: Option<ReviewState>,
1291 #[serde(default)]
1292 pub retracted: bool,
1293 #[serde(default)]
1294 pub access_tier: crate::access_tier::AccessTier,
1295 pub created: String,
1297}
1298
1299impl Artifact {
1300 pub fn content_address(
1301 kind: &str,
1302 name: &str,
1303 content_hash: &str,
1304 source_url: Option<&str>,
1305 locator: Option<&str>,
1306 ) -> String {
1307 let preimage = format!(
1308 "{}|{}|{}|{}|{}",
1309 kind,
1310 name,
1311 content_hash,
1312 source_url.unwrap_or(""),
1313 locator.unwrap_or("")
1314 );
1315 let hash = Sha256::digest(preimage.as_bytes());
1316 format!("va_{}", &hex::encode(hash)[..16])
1317 }
1318
1319 #[allow(clippy::too_many_arguments)]
1320 pub fn new(
1321 kind: impl Into<String>,
1322 name: impl Into<String>,
1323 content_hash: impl Into<String>,
1324 size_bytes: Option<u64>,
1325 media_type: Option<String>,
1326 storage_mode: impl Into<String>,
1327 locator: Option<String>,
1328 source_url: Option<String>,
1329 license: Option<String>,
1330 target_findings: Vec<String>,
1331 provenance: Provenance,
1332 metadata: BTreeMap<String, Value>,
1333 access_tier: crate::access_tier::AccessTier,
1334 ) -> Result<Self, String> {
1335 let kind = kind.into();
1336 if !valid_artifact_kind(&kind) {
1337 return Err(format!(
1338 "artifact kind '{kind}' is not supported; valid: {}",
1339 VALID_ARTIFACT_KINDS.join(", ")
1340 ));
1341 }
1342 let name = name.into();
1343 if name.trim().is_empty() {
1344 return Err("artifact name must be non-empty".to_string());
1345 }
1346 let content_hash = normalize_sha256(content_hash.into())?;
1347 let storage_mode = storage_mode.into();
1348 if !matches!(
1349 storage_mode.as_str(),
1350 "local_blob" | "local_file" | "remote" | "pointer"
1351 ) {
1352 return Err(format!(
1353 "artifact storage_mode '{storage_mode}' is not supported; valid: local_blob, local_file, remote, pointer"
1354 ));
1355 }
1356 let id = Self::content_address(
1357 &kind,
1358 &name,
1359 &content_hash,
1360 source_url.as_deref(),
1361 locator.as_deref(),
1362 );
1363 Ok(Self {
1364 id,
1365 kind,
1366 name,
1367 content_hash,
1368 size_bytes,
1369 media_type,
1370 storage_mode,
1371 locator,
1372 source_url,
1373 license,
1374 target_findings,
1375 source_id: None,
1376 provenance,
1377 metadata,
1378 review_state: None,
1379 retracted: false,
1380 access_tier,
1381 created: Utc::now().to_rfc3339(),
1382 })
1383 }
1384}
1385
1386fn normalize_sha256(value: String) -> Result<String, String> {
1387 let trimmed = value.trim();
1388 let hex = trimmed.strip_prefix("sha256:").unwrap_or(trimmed);
1389 if hex.len() != 64 || !hex.chars().all(|c| c.is_ascii_hexdigit()) {
1390 return Err(format!(
1391 "content_hash must be sha256:<64hex> or 64 hex chars, got {trimmed:?}"
1392 ));
1393 }
1394 Ok(format!("sha256:{}", hex.to_ascii_lowercase()))
1395}
1396
1397#[derive(Debug, Clone, Serialize, Deserialize)]
1398pub struct Conditions {
1399 #[serde(default)]
1400 pub text: String,
1401 #[serde(default)]
1402 pub species_verified: Vec<String>,
1403 #[serde(default)]
1404 pub species_unverified: Vec<String>,
1405 #[serde(default)]
1406 pub in_vitro: bool,
1407 #[serde(default)]
1408 pub in_vivo: bool,
1409 #[serde(default)]
1410 pub human_data: bool,
1411 #[serde(default)]
1412 pub clinical_trial: bool,
1413 pub concentration_range: Option<String>,
1414 pub duration: Option<String>,
1415 pub age_group: Option<String>,
1416 pub cell_type: Option<String>,
1417}
1418
1419#[derive(Debug, Clone, Serialize, Deserialize)]
1421pub struct ConfidenceComponents {
1422 #[serde(alias = "evidence_grade")]
1426 pub evidence_strength: f64,
1427 #[serde(alias = "replication_factor")]
1430 pub replication_strength: f64,
1431 pub sample_strength: f64,
1434 #[serde(alias = "species_relevance")]
1436 pub model_relevance: f64,
1437 #[serde(alias = "contradiction_penalty")]
1439 pub review_penalty: f64,
1440 #[serde(default)]
1442 pub calibration_adjustment: f64,
1443 #[serde(default = "default_causal_consistency")]
1449 pub causal_consistency: f64,
1450 #[serde(default = "default_formula_version")]
1458 pub formula_version: String,
1459}
1460
1461fn default_causal_consistency() -> f64 {
1462 1.0
1463}
1464
1465fn default_formula_version() -> String {
1466 "v0.8".to_string()
1467}
1468
1469#[derive(Debug, Clone, Serialize, Deserialize, PartialEq, Eq)]
1471#[serde(rename_all = "snake_case")]
1472#[derive(Default)]
1473pub enum ConfidenceMethod {
1474 Computed,
1476 ExpertJudgment,
1478 #[default]
1480 LlmInitial,
1481}
1482
1483#[derive(Debug, Clone, Serialize, Deserialize, PartialEq, Eq, Default)]
1485#[serde(rename_all = "snake_case")]
1486pub enum ConfidenceKind {
1487 #[default]
1489 FrontierEpistemic,
1490}
1491
1492#[derive(Debug, Clone, Serialize, Deserialize)]
1493pub struct Confidence {
1494 #[serde(default)]
1496 pub kind: ConfidenceKind,
1497 pub score: f64,
1498 pub basis: String,
1499 #[serde(default)]
1501 pub method: ConfidenceMethod,
1502 #[serde(default, skip_serializing_if = "Option::is_none")]
1504 pub components: Option<ConfidenceComponents>,
1505 #[serde(default = "default_extraction_conf")]
1507 pub extraction_confidence: f64,
1508}
1509
1510fn default_extraction_conf() -> f64 {
1511 0.85
1512}
1513
1514impl Confidence {
1515 pub fn raw(score: f64, basis: impl Into<String>, extraction_confidence: f64) -> Self {
1526 Self {
1527 kind: ConfidenceKind::FrontierEpistemic,
1528 score,
1529 basis: basis.into(),
1530 method: ConfidenceMethod::LlmInitial,
1531 components: None,
1532 extraction_confidence,
1533 }
1534 }
1535}
1536
1537fn parse_sample_size(s: &str) -> Option<u64> {
1540 let mut max_num: Option<u64> = None;
1541 for word in s.split(|c: char| !c.is_ascii_digit()) {
1542 if let Ok(n) = word.parse::<u64>() {
1543 max_num = Some(max_num.map_or(n, |prev: u64| prev.max(n)));
1544 }
1545 }
1546 max_num
1547}
1548
1549pub fn compute_confidence(
1559 evidence: &Evidence,
1560 conditions: &Conditions,
1561 contested: bool,
1562) -> Confidence {
1563 let n_replicated = if evidence.replicated {
1564 evidence.replication_count.unwrap_or(1)
1565 } else {
1566 0
1567 };
1568 compute_confidence_from_components(
1569 evidence,
1570 conditions,
1571 contested,
1572 n_replicated,
1573 0,
1574 0,
1575 None,
1576 None,
1577 )
1578}
1579
1580#[must_use]
1588pub fn causal_consistency_multiplier(
1589 claim: Option<CausalClaim>,
1590 grade: Option<CausalEvidenceGrade>,
1591) -> f64 {
1592 use CausalClaim::*;
1593 use CausalEvidenceGrade::*;
1594 let (Some(c), Some(g)) = (claim, grade) else {
1595 return 1.0;
1596 };
1597 match (c, g) {
1598 (_, Rct) => 1.10,
1600 (Correlation, _) => 1.0,
1602 (Mediation, QuasiExperimental) => 1.05,
1604 (Mediation, Observational) => 0.85,
1605 (Mediation, Theoretical) => 0.90,
1606 (Intervention, QuasiExperimental) => 0.90,
1609 (Intervention, Observational) => 0.65,
1610 (Intervention, Theoretical) => 0.75,
1611 }
1612}
1613
1614#[must_use]
1632pub fn compute_confidence_from_components(
1633 evidence: &Evidence,
1634 conditions: &Conditions,
1635 contested: bool,
1636 n_replicated: u32,
1637 n_failed: u32,
1638 n_partial: u32,
1639 causal_claim: Option<CausalClaim>,
1640 causal_evidence_grade: Option<CausalEvidenceGrade>,
1641) -> Confidence {
1642 let evidence_strength = match evidence.evidence_type.as_str() {
1643 "meta_analysis" => 0.95,
1644 "systematic_review" => 0.90,
1645 "experimental" => 0.80,
1646 "observational" => 0.65,
1647 "computational" => 0.55,
1648 "case_report" => 0.40,
1649 "theoretical" => 0.30,
1650 _ => 0.50,
1651 };
1652
1653 let replication_strength = (0.7 + 0.1 * f64::from(n_replicated) + 0.05 * f64::from(n_partial)
1654 - 0.10 * f64::from(n_failed))
1655 .clamp(0.4, 1.0);
1656
1657 let sample_strength = match evidence.sample_size.as_deref().and_then(parse_sample_size) {
1658 Some(n) if n > 1000 => 1.0,
1659 Some(n) if n > 100 => 0.9,
1660 Some(n) if n > 30 => 0.8,
1661 Some(n) if n > 10 => 0.7,
1662 Some(_) => 0.6,
1663 None => 0.6,
1664 };
1665
1666 let model_relevance = if conditions.human_data {
1667 1.0
1668 } else if conditions.in_vivo {
1669 0.8
1670 } else if conditions.in_vitro {
1671 0.6
1672 } else {
1673 0.5
1674 };
1675
1676 let review_penalty = if contested { 0.15 } else { 0.0 };
1677 let calibration_adjustment = 0.0;
1678 let causal_consistency = causal_consistency_multiplier(causal_claim, causal_evidence_grade);
1679
1680 let raw = evidence_strength
1681 * replication_strength
1682 * model_relevance
1683 * sample_strength
1684 * causal_consistency
1685 - review_penalty
1686 + calibration_adjustment;
1687 let score = raw.clamp(0.0, 1.0);
1688 let score = (score * 1000.0).round() / 1000.0;
1689
1690 let components = ConfidenceComponents {
1691 evidence_strength,
1692 replication_strength,
1693 sample_strength,
1694 model_relevance,
1695 review_penalty,
1696 calibration_adjustment,
1697 causal_consistency,
1698 formula_version: "v0.7".to_string(),
1699 };
1700
1701 let basis = format!(
1702 "frontier_epistemic: evidence={:.2} * replication={:.2} * model={:.2} * sample={:.2} * causal={:.2} - review_penalty={:.2} + calibration={:.2} = {:.3}",
1703 evidence_strength,
1704 replication_strength,
1705 model_relevance,
1706 sample_strength,
1707 causal_consistency,
1708 review_penalty,
1709 calibration_adjustment,
1710 score,
1711 );
1712
1713 Confidence {
1714 kind: ConfidenceKind::FrontierEpistemic,
1715 score,
1716 basis,
1717 method: ConfidenceMethod::Computed,
1718 components: Some(components),
1719 extraction_confidence: default_extraction_conf(),
1720 }
1721}
1722
1723#[must_use]
1728pub fn count_replication_outcomes(
1729 replications: &[Replication],
1730 target_finding: &str,
1731) -> (u32, u32, u32) {
1732 let mut n_replicated = 0u32;
1733 let mut n_failed = 0u32;
1734 let mut n_partial = 0u32;
1735 for r in replications {
1736 if r.target_finding != target_finding {
1737 continue;
1738 }
1739 match r.outcome.as_str() {
1740 "replicated" => n_replicated += 1,
1741 "failed" => n_failed += 1,
1742 "partial" => n_partial += 1,
1743 _ => {}
1744 }
1745 }
1746 (n_replicated, n_failed, n_partial)
1747}
1748
1749pub fn recompute_all_confidence(
1758 findings: &mut [FindingBundle],
1759 replications: &[Replication],
1760) -> usize {
1761 let mut changed = 0;
1762 for bundle in findings.iter_mut() {
1763 let old_score = bundle.confidence.score;
1764 let extraction_conf = bundle.confidence.extraction_confidence;
1765 let (n_repl, n_failed, n_partial) = count_replication_outcomes(replications, &bundle.id);
1766 let (n_repl, n_failed, n_partial) = if n_repl + n_failed + n_partial == 0 {
1770 let legacy = if bundle.evidence.replicated {
1771 bundle.evidence.replication_count.unwrap_or(1)
1772 } else {
1773 0
1774 };
1775 (legacy, 0, 0)
1776 } else {
1777 (n_repl, n_failed, n_partial)
1778 };
1779 let mut new_conf = compute_confidence_from_components(
1780 &bundle.evidence,
1781 &bundle.conditions,
1782 bundle.flags.contested,
1783 n_repl,
1784 n_failed,
1785 n_partial,
1786 bundle.assertion.causal_claim,
1787 bundle.assertion.causal_evidence_grade,
1788 );
1789 new_conf.extraction_confidence = extraction_conf;
1791 if (new_conf.score - old_score).abs() > 0.001 {
1792 changed += 1;
1793 }
1794 bundle.confidence = new_conf;
1795 }
1796 changed
1797}
1798
1799#[derive(Debug, Clone, Default, Serialize, Deserialize)]
1800pub struct Extraction {
1801 #[serde(default = "default_extraction_method")]
1802 pub method: String,
1803 pub model: Option<String>,
1804 pub model_version: Option<String>,
1805 #[serde(default)]
1806 pub extracted_at: String,
1807 #[serde(default = "default_extractor_version")]
1808 pub extractor_version: String,
1809}
1810
1811fn default_extraction_method() -> String {
1812 "llm_extraction".into()
1813}
1814fn default_extractor_version() -> String {
1815 "vela/0.2.0".into()
1816}
1817
1818#[derive(Debug, Clone, Serialize, Deserialize)]
1819pub struct Review {
1820 #[serde(default)]
1821 pub reviewed: bool,
1822 pub reviewer: Option<String>,
1823 pub reviewed_at: Option<String>,
1824 #[serde(default)]
1825 pub corrections: Vec<serde_json::Value>,
1826}
1827
1828#[derive(Debug, Clone, Serialize, Deserialize)]
1829pub struct Author {
1830 pub name: String,
1831 pub orcid: Option<String>,
1832}
1833
1834#[derive(Debug, Clone, Serialize, Deserialize)]
1835pub struct Provenance {
1836 #[serde(default = "default_source_type")]
1837 pub source_type: String,
1838 pub doi: Option<String>,
1839 pub pmid: Option<String>,
1840 pub pmc: Option<String>,
1841 pub openalex_id: Option<String>,
1842 #[serde(default, skip_serializing_if = "Option::is_none")]
1846 pub url: Option<String>,
1847 #[serde(default)]
1848 pub title: String,
1849 #[serde(default)]
1850 pub authors: Vec<Author>,
1851 pub year: Option<i32>,
1852 pub journal: Option<String>,
1853 #[serde(default, skip_serializing_if = "Option::is_none")]
1855 pub license: Option<String>,
1856 #[serde(default, skip_serializing_if = "Option::is_none")]
1858 pub publisher: Option<String>,
1859 #[serde(default, skip_serializing_if = "Vec::is_empty")]
1861 pub funders: Vec<String>,
1862 #[serde(default)]
1863 pub extraction: Extraction,
1864 pub review: Option<Review>,
1865 #[serde(default)]
1867 pub citation_count: Option<u64>,
1868}
1869
1870fn default_source_type() -> String {
1871 "published_paper".into()
1872}
1873
1874#[derive(Debug, Clone, PartialEq, Eq, Serialize, Deserialize)]
1878#[serde(rename_all = "snake_case")]
1879pub enum ReviewState {
1880 Accepted,
1882 Contested,
1884 NeedsRevision,
1887 Rejected,
1890}
1891
1892impl ReviewState {
1893 #[must_use]
1896 pub fn implies_contested(&self) -> bool {
1897 matches!(
1898 self,
1899 ReviewState::Contested | ReviewState::NeedsRevision | ReviewState::Rejected
1900 )
1901 }
1902}
1903
1904#[derive(Debug, Clone, Default, Serialize, Deserialize)]
1905pub struct Flags {
1906 #[serde(default)]
1907 pub gap: bool,
1908 #[serde(default)]
1909 pub negative_space: bool,
1910 #[serde(default)]
1914 pub contested: bool,
1915 #[serde(default)]
1916 pub retracted: bool,
1917 #[serde(default)]
1918 pub declining: bool,
1919 #[serde(default)]
1920 pub gravity_well: bool,
1921 #[serde(default, skip_serializing_if = "Option::is_none")]
1925 pub review_state: Option<ReviewState>,
1926 #[serde(default, skip_serializing_if = "std::ops::Not::not")]
1931 pub superseded: bool,
1932 #[serde(default, skip_serializing_if = "Option::is_none")]
1941 pub signature_threshold: Option<u32>,
1942 #[serde(default, skip_serializing_if = "std::ops::Not::not")]
1947 pub jointly_accepted: bool,
1948}
1949
1950#[derive(Debug, Clone, Copy, PartialEq, Eq, Serialize, Deserialize)]
1964#[serde(rename_all = "snake_case")]
1965pub enum CausalClaim {
1966 Correlation,
1968 Mediation,
1970 Intervention,
1972}
1973
1974#[derive(Debug, Clone, Copy, PartialEq, Eq, Serialize, Deserialize)]
1980#[serde(rename_all = "snake_case")]
1981pub enum CausalEvidenceGrade {
1982 Rct,
1984 QuasiExperimental,
1987 Observational,
1990 Theoretical,
1992}
1993
1994pub const VALID_CAUSAL_CLAIMS: &[&str] = &["correlation", "mediation", "intervention"];
1997
1998pub const VALID_CAUSAL_EVIDENCE_GRADES: &[&str] =
2000 &["rct", "quasi_experimental", "observational", "theoretical"];
2001
2002#[derive(Debug, Clone, Serialize, Deserialize)]
2003pub struct Assertion {
2004 pub text: String,
2005 #[serde(rename = "type")]
2006 pub assertion_type: String,
2007 #[serde(default)]
2008 pub entities: Vec<Entity>,
2009 pub relation: Option<String>,
2010 pub direction: Option<String>,
2011 #[serde(default, skip_serializing_if = "Option::is_none")]
2016 pub causal_claim: Option<CausalClaim>,
2017 #[serde(default, skip_serializing_if = "Option::is_none")]
2021 pub causal_evidence_grade: Option<CausalEvidenceGrade>,
2022}
2023
2024#[derive(Debug, Clone, Serialize, Deserialize)]
2025pub struct Link {
2026 pub target: String,
2027 #[serde(rename = "type")]
2028 pub link_type: String,
2029 #[serde(default)]
2030 pub note: String,
2031 #[serde(default = "default_compiler")]
2032 pub inferred_by: String,
2033 #[serde(default)]
2035 pub created_at: String,
2036 #[serde(default, skip_serializing_if = "Option::is_none")]
2043 pub mechanism: Option<Mechanism>,
2044}
2045
2046fn default_compiler() -> String {
2047 "compiler".into()
2048}
2049
2050#[derive(Debug, Clone, Copy, Serialize, Deserialize, PartialEq)]
2076#[serde(tag = "kind", rename_all = "snake_case")]
2077pub enum Mechanism {
2078 Linear {
2079 sign: MechanismSign,
2080 slope: f64,
2082 },
2083 Monotonic {
2084 sign: MechanismSign,
2085 },
2086 Threshold {
2087 sign: MechanismSign,
2088 threshold: f64,
2089 },
2090 Saturating {
2091 sign: MechanismSign,
2092 half_max: f64,
2093 },
2094 Unknown,
2095}
2096
2097#[derive(Debug, Clone, Copy, Serialize, Deserialize, PartialEq, Eq)]
2102#[serde(rename_all = "lowercase")]
2103pub enum MechanismSign {
2104 Positive,
2105 Negative,
2106}
2107
2108impl MechanismSign {
2109 #[must_use]
2110 pub fn as_f64(self) -> f64 {
2111 match self {
2112 Self::Positive => 1.0,
2113 Self::Negative => -1.0,
2114 }
2115 }
2116}
2117
2118impl Mechanism {
2119 #[must_use]
2123 pub fn apply(&self, delta_x: f64) -> Option<f64> {
2124 match *self {
2125 Self::Linear { sign, slope } => Some(sign.as_f64() * slope * delta_x),
2126 Self::Monotonic { sign } => {
2127 Some(sign.as_f64() * delta_x.signum() * delta_x.abs().min(1.0))
2128 }
2129 Self::Threshold { sign, threshold } => {
2130 if delta_x.abs() >= threshold {
2131 Some(sign.as_f64() * delta_x.signum())
2132 } else {
2133 Some(0.0)
2134 }
2135 }
2136 Self::Saturating { sign, half_max } => {
2137 let denom = delta_x.abs() + half_max.max(1e-9);
2139 Some(sign.as_f64() * delta_x / denom)
2140 }
2141 Self::Unknown => None,
2142 }
2143 }
2144}
2145
2146#[derive(Debug, Clone, PartialEq, Eq)]
2154pub enum LinkRef {
2155 Local { vf_id: String },
2157 Cross { vf_id: String, vfr_id: String },
2161}
2162
2163#[derive(Debug, Clone, PartialEq, Eq)]
2164pub enum LinkParseError {
2165 Empty,
2166 BadVfPrefix,
2167 BadVfrPrefix,
2168 EmptyVfId,
2169 EmptyVfrId,
2170 TooManyAtSigns,
2171}
2172
2173impl std::fmt::Display for LinkParseError {
2174 fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
2175 match self {
2176 LinkParseError::Empty => write!(f, "empty link target"),
2177 LinkParseError::BadVfPrefix => write!(f, "link target must start with 'vf_'"),
2178 LinkParseError::BadVfrPrefix => {
2179 write!(f, "cross-frontier suffix must start with 'vfr_'")
2180 }
2181 LinkParseError::EmptyVfId => write!(f, "link target's vf_ id is empty"),
2182 LinkParseError::EmptyVfrId => write!(f, "cross-frontier vfr_ id is empty"),
2183 LinkParseError::TooManyAtSigns => {
2184 write!(f, "link target has more than one '@' separator")
2185 }
2186 }
2187 }
2188}
2189
2190impl std::error::Error for LinkParseError {}
2191
2192impl LinkRef {
2193 pub fn parse(s: &str) -> Result<Self, LinkParseError> {
2198 if s.is_empty() {
2199 return Err(LinkParseError::Empty);
2200 }
2201 let mut parts = s.split('@');
2202 let local = parts.next().ok_or(LinkParseError::Empty)?;
2203 let remote = parts.next();
2204 if parts.next().is_some() {
2205 return Err(LinkParseError::TooManyAtSigns);
2206 }
2207 let vf_id = local
2208 .strip_prefix("vf_")
2209 .ok_or(LinkParseError::BadVfPrefix)?;
2210 if vf_id.is_empty() {
2211 return Err(LinkParseError::EmptyVfId);
2212 }
2213 match remote {
2214 None => Ok(LinkRef::Local {
2215 vf_id: local.to_string(),
2216 }),
2217 Some(r) => {
2218 let vfr_id = r.strip_prefix("vfr_").ok_or(LinkParseError::BadVfrPrefix)?;
2219 if vfr_id.is_empty() {
2220 return Err(LinkParseError::EmptyVfrId);
2221 }
2222 Ok(LinkRef::Cross {
2223 vf_id: local.to_string(),
2224 vfr_id: r.to_string(),
2225 })
2226 }
2227 }
2228 }
2229
2230 pub fn format(&self) -> String {
2232 match self {
2233 LinkRef::Local { vf_id } => vf_id.clone(),
2234 LinkRef::Cross { vf_id, vfr_id } => format!("{vf_id}@{vfr_id}"),
2235 }
2236 }
2237
2238 pub fn is_cross_frontier(&self) -> bool {
2240 matches!(self, LinkRef::Cross { .. })
2241 }
2242}
2243
2244#[cfg(test)]
2245mod link_ref_tests {
2246 use super::*;
2247
2248 #[test]
2249 fn parses_local_vf_id() {
2250 let r = LinkRef::parse("vf_abc123").unwrap();
2251 assert_eq!(
2252 r,
2253 LinkRef::Local {
2254 vf_id: "vf_abc123".into()
2255 }
2256 );
2257 assert_eq!(r.format(), "vf_abc123");
2258 assert!(!r.is_cross_frontier());
2259 }
2260
2261 #[test]
2262 fn parses_cross_frontier_target() {
2263 let r = LinkRef::parse("vf_abc@vfr_def").unwrap();
2264 assert_eq!(
2265 r,
2266 LinkRef::Cross {
2267 vf_id: "vf_abc".into(),
2268 vfr_id: "vfr_def".into(),
2269 }
2270 );
2271 assert_eq!(r.format(), "vf_abc@vfr_def");
2272 assert!(r.is_cross_frontier());
2273 }
2274
2275 #[test]
2276 fn rejects_empty() {
2277 assert_eq!(LinkRef::parse(""), Err(LinkParseError::Empty));
2278 }
2279
2280 #[test]
2281 fn rejects_missing_vf_prefix() {
2282 assert_eq!(LinkRef::parse("xx_abc"), Err(LinkParseError::BadVfPrefix));
2283 }
2284
2285 #[test]
2286 fn rejects_empty_vf_id() {
2287 assert_eq!(LinkRef::parse("vf_"), Err(LinkParseError::EmptyVfId));
2288 }
2289
2290 #[test]
2291 fn rejects_missing_vfr_prefix_after_at() {
2292 assert_eq!(
2293 LinkRef::parse("vf_abc@xxx_def"),
2294 Err(LinkParseError::BadVfrPrefix)
2295 );
2296 }
2297
2298 #[test]
2299 fn rejects_empty_vfr_id() {
2300 assert_eq!(
2301 LinkRef::parse("vf_abc@vfr_"),
2302 Err(LinkParseError::EmptyVfrId)
2303 );
2304 }
2305
2306 #[test]
2307 fn rejects_double_at() {
2308 assert_eq!(
2309 LinkRef::parse("vf_abc@vfr_def@x"),
2310 Err(LinkParseError::TooManyAtSigns)
2311 );
2312 }
2313
2314 #[test]
2315 fn round_trips_real_ids() {
2316 for s in [
2317 "vf_d0a962d3251133dd",
2318 "vf_d0a962d3251133dd@vfr_7344e96c0f2669d5",
2319 ] {
2320 assert_eq!(LinkRef::parse(s).unwrap().format(), s);
2321 }
2322 }
2323}
2324
2325#[derive(Debug, Clone, Serialize, Deserialize)]
2327pub struct Annotation {
2328 pub id: String,
2330 pub text: String,
2332 pub author: String,
2334 pub timestamp: String,
2336 #[serde(default, skip_serializing_if = "Option::is_none")]
2342 pub provenance: Option<ProvenanceRef>,
2343}
2344
2345#[derive(Debug, Clone, Serialize, Deserialize, PartialEq, Eq)]
2351pub struct ProvenanceRef {
2352 #[serde(default, skip_serializing_if = "Option::is_none")]
2353 pub doi: Option<String>,
2354 #[serde(default, skip_serializing_if = "Option::is_none")]
2355 pub pmid: Option<String>,
2356 #[serde(default, skip_serializing_if = "Option::is_none")]
2357 pub title: Option<String>,
2358 #[serde(default, skip_serializing_if = "Option::is_none")]
2360 pub span: Option<String>,
2361}
2362
2363impl ProvenanceRef {
2364 #[must_use]
2367 pub fn has_identifier(&self) -> bool {
2368 self.doi.is_some() || self.pmid.is_some() || self.title.is_some()
2369 }
2370}
2371
2372#[derive(Debug, Clone, Serialize, Deserialize)]
2374pub struct Attachment {
2375 pub filename: String,
2376 pub label: Option<String>,
2377 pub path: String,
2378 pub size_bytes: u64,
2379 pub mime_type: Option<String>,
2380 pub attached_at: String,
2381 pub attached_by: Option<String>,
2382}
2383
2384#[derive(Debug, Clone, Serialize, Deserialize)]
2389pub struct ReviewEvent {
2390 pub id: String,
2392 #[serde(default, skip_serializing_if = "Option::is_none")]
2394 pub workspace: Option<String>,
2395 pub finding_id: String,
2397 pub reviewer: String,
2399 pub reviewed_at: String,
2401 #[serde(default, skip_serializing_if = "Option::is_none")]
2403 pub scope: Option<String>,
2404 #[serde(default, skip_serializing_if = "Option::is_none")]
2406 pub status: Option<String>,
2407 pub action: ReviewAction,
2409 #[serde(default)]
2411 pub reason: String,
2412 #[serde(default, skip_serializing_if = "Vec::is_empty")]
2414 pub evidence_considered: Vec<ReviewEvidence>,
2415 #[serde(default, skip_serializing_if = "Option::is_none")]
2417 pub state_change: Option<serde_json::Value>,
2418}
2419
2420#[derive(Debug, Clone, Serialize, Deserialize, PartialEq)]
2421pub struct ReviewEvidence {
2422 pub finding_id: String,
2423 #[serde(default, skip_serializing_if = "Option::is_none")]
2424 pub role: Option<String>,
2425 #[serde(default, skip_serializing_if = "Option::is_none")]
2426 pub note: Option<String>,
2427}
2428
2429#[derive(Debug, Clone, Serialize, Deserialize)]
2431#[serde(tag = "type", rename_all = "snake_case")]
2432pub enum ReviewAction {
2433 Approved,
2435 Qualified { target: String },
2437 Corrected {
2439 field: String,
2440 original: String,
2441 corrected: String,
2442 },
2443 Flagged { flag_type: String },
2445 Disputed {
2447 counter_evidence: String,
2448 #[serde(default, skip_serializing_if = "Option::is_none")]
2449 counter_doi: Option<String>,
2450 },
2451}
2452
2453#[derive(Debug, Clone, Serialize, Deserialize)]
2458pub struct ConfidenceUpdate {
2459 pub finding_id: String,
2460 pub previous_score: f64,
2461 pub new_score: f64,
2462 pub basis: String,
2463 pub updated_by: String,
2465 pub updated_at: String,
2467}
2468
2469#[derive(Debug, Clone, Serialize, Deserialize)]
2470pub struct FindingBundle {
2471 pub id: String,
2472 #[serde(default = "default_version")]
2473 pub version: u32,
2474 pub previous_version: Option<String>,
2475 pub assertion: Assertion,
2476 pub evidence: Evidence,
2477 pub conditions: Conditions,
2478 pub confidence: Confidence,
2479 pub provenance: Provenance,
2480 pub flags: Flags,
2481 #[serde(default)]
2482 pub links: Vec<Link>,
2483 #[serde(default, skip_serializing_if = "Vec::is_empty")]
2484 pub annotations: Vec<Annotation>,
2485 #[serde(default, skip_serializing_if = "Vec::is_empty")]
2486 pub attachments: Vec<Attachment>,
2487 pub created: String,
2488 pub updated: Option<String>,
2489 #[serde(default, skip_serializing_if = "is_public_tier")]
2496 pub access_tier: crate::access_tier::AccessTier,
2497}
2498
2499fn is_public_tier(tier: &crate::access_tier::AccessTier) -> bool {
2500 matches!(tier, crate::access_tier::AccessTier::Public)
2501}
2502
2503fn default_version() -> u32 {
2504 1
2505}
2506
2507impl FindingBundle {
2508 pub fn normalize_text(s: &str) -> String {
2514 let lower = s.to_lowercase();
2515 let collapsed: String = lower.split_whitespace().collect::<Vec<_>>().join(" ");
2517 collapsed
2519 .trim_end_matches(['.', ';', ':', '!', '?'])
2520 .to_string()
2521 }
2522
2523 pub fn content_address(assertion: &Assertion, provenance: &Provenance) -> String {
2527 let norm_text = Self::normalize_text(&assertion.text);
2528 let prov_id = provenance
2529 .doi
2530 .as_deref()
2531 .or(provenance.pmid.as_deref())
2532 .unwrap_or(&provenance.title);
2533 let preimage = format!("{}|{}|{}", norm_text, assertion.assertion_type, prov_id);
2534 let hash = Sha256::digest(preimage.as_bytes());
2535 format!("vf_{}", &hex::encode(hash)[..16])
2536 }
2537
2538 pub fn new(
2539 assertion: Assertion,
2540 evidence: Evidence,
2541 conditions: Conditions,
2542 confidence: Confidence,
2543 provenance: Provenance,
2544 flags: Flags,
2545 ) -> Self {
2546 let now = Utc::now().to_rfc3339();
2547 let id = Self::content_address(&assertion, &provenance);
2548
2549 Self {
2550 id,
2551 version: 1,
2552 previous_version: None,
2553 assertion,
2554 evidence,
2555 conditions,
2556 confidence,
2557 provenance,
2558 flags,
2559 links: Vec::new(),
2560 annotations: Vec::new(),
2561 attachments: Vec::new(),
2562 created: now,
2563 updated: None,
2564 access_tier: crate::access_tier::AccessTier::Public,
2565 }
2566 }
2567
2568 pub fn add_link(&mut self, target_id: &str, link_type: &str, note: &str) {
2569 self.links.push(Link {
2570 target: target_id.to_string(),
2571 link_type: link_type.to_string(),
2572 note: note.to_string(),
2573 inferred_by: "compiler".to_string(),
2574 created_at: Utc::now().to_rfc3339(),
2575 mechanism: None,
2576 });
2577 }
2578
2579 pub fn add_link_with_source(
2580 &mut self,
2581 target_id: &str,
2582 link_type: &str,
2583 note: &str,
2584 inferred_by: &str,
2585 ) {
2586 self.links.push(Link {
2587 target: target_id.to_string(),
2588 link_type: link_type.to_string(),
2589 note: note.to_string(),
2590 inferred_by: inferred_by.to_string(),
2591 created_at: Utc::now().to_rfc3339(),
2592 mechanism: None,
2593 });
2594 }
2595}
2596
2597#[cfg(test)]
2598mod tests {
2599 use super::*;
2600
2601 fn sample_assertion() -> Assertion {
2602 Assertion {
2603 text: "NLRP3 activates IL-1B".into(),
2604 assertion_type: "mechanism".into(),
2605 entities: vec![Entity {
2606 name: "NLRP3".into(),
2607 entity_type: "protein".into(),
2608 identifiers: serde_json::Map::new(),
2609 canonical_id: None,
2610 candidates: vec![],
2611 aliases: vec![],
2612 resolution_provenance: None,
2613 resolution_confidence: 1.0,
2614 resolution_method: None,
2615 species_context: None,
2616 needs_review: false,
2617 }],
2618 relation: Some("activates".into()),
2619 direction: Some("positive".into()),
2620 causal_claim: None,
2621 causal_evidence_grade: None,
2622 }
2623 }
2624
2625 fn sample_evidence() -> Evidence {
2626 Evidence {
2627 evidence_type: "experimental".into(),
2628 model_system: "mouse".into(),
2629 species: Some("Mus musculus".into()),
2630 method: "Western blot".into(),
2631 sample_size: Some("n=30".into()),
2632 effect_size: None,
2633 p_value: Some("p<0.05".into()),
2634 replicated: true,
2635 replication_count: Some(3),
2636 evidence_spans: vec![],
2637 }
2638 }
2639
2640 fn sample_conditions() -> Conditions {
2641 Conditions {
2642 text: "In vitro, mouse microglia".into(),
2643 species_verified: vec!["Mus musculus".into()],
2644 species_unverified: vec![],
2645 in_vitro: true,
2646 in_vivo: false,
2647 human_data: false,
2648 clinical_trial: false,
2649 concentration_range: None,
2650 duration: None,
2651 age_group: None,
2652 cell_type: Some("microglia".into()),
2653 }
2654 }
2655
2656 fn sample_confidence() -> Confidence {
2657 Confidence {
2658 kind: ConfidenceKind::FrontierEpistemic,
2659 score: 0.85,
2660 basis: "Experimental with replication".into(),
2661 method: ConfidenceMethod::LlmInitial,
2662 components: None,
2663 extraction_confidence: 0.9,
2664 }
2665 }
2666
2667 fn sample_provenance() -> Provenance {
2668 Provenance {
2669 source_type: "published_paper".into(),
2670 doi: Some("10.1234/test".into()),
2671 pmid: None,
2672 pmc: None,
2673 openalex_id: None,
2674 url: None,
2675 title: "Test Paper".into(),
2676 authors: vec![Author {
2677 name: "Smith J".into(),
2678 orcid: None,
2679 }],
2680 year: Some(2024),
2681 journal: Some("Nature".into()),
2682 license: None,
2683 publisher: None,
2684 funders: vec![],
2685 extraction: Extraction::default(),
2686 review: None,
2687 citation_count: Some(100),
2688 }
2689 }
2690
2691 fn sample_flags() -> Flags {
2692 Flags {
2693 gap: false,
2694 negative_space: false,
2695 contested: false,
2696 retracted: false,
2697 declining: false,
2698 gravity_well: false,
2699 review_state: None,
2700 superseded: false,
2701 signature_threshold: None,
2702 jointly_accepted: false,
2703 }
2704 }
2705
2706 #[test]
2709 fn same_content_same_id() {
2710 let b1 = FindingBundle::new(
2711 sample_assertion(),
2712 sample_evidence(),
2713 sample_conditions(),
2714 sample_confidence(),
2715 sample_provenance(),
2716 sample_flags(),
2717 );
2718 let b2 = FindingBundle::new(
2719 sample_assertion(),
2720 sample_evidence(),
2721 sample_conditions(),
2722 sample_confidence(),
2723 sample_provenance(),
2724 sample_flags(),
2725 );
2726 assert_eq!(b1.id, b2.id);
2727 }
2728
2729 #[test]
2730 fn different_content_different_id() {
2731 let b1 = FindingBundle::new(
2732 sample_assertion(),
2733 sample_evidence(),
2734 sample_conditions(),
2735 sample_confidence(),
2736 sample_provenance(),
2737 sample_flags(),
2738 );
2739 let mut different_assertion = sample_assertion();
2740 different_assertion.text = "Completely different claim".into();
2741 let b2 = FindingBundle::new(
2742 different_assertion,
2743 sample_evidence(),
2744 sample_conditions(),
2745 sample_confidence(),
2746 sample_provenance(),
2747 sample_flags(),
2748 );
2749 assert_ne!(b1.id, b2.id);
2750 }
2751
2752 #[test]
2753 fn id_starts_with_vf_prefix() {
2754 let b = FindingBundle::new(
2755 sample_assertion(),
2756 sample_evidence(),
2757 sample_conditions(),
2758 sample_confidence(),
2759 sample_provenance(),
2760 sample_flags(),
2761 );
2762 assert!(b.id.starts_with("vf_"));
2763 assert_eq!(b.id.len(), 3 + 16); }
2765
2766 #[test]
2767 fn new_bundle_version_is_one() {
2768 let b = FindingBundle::new(
2769 sample_assertion(),
2770 sample_evidence(),
2771 sample_conditions(),
2772 sample_confidence(),
2773 sample_provenance(),
2774 sample_flags(),
2775 );
2776 assert_eq!(b.version, 1);
2777 assert!(b.previous_version.is_none());
2778 }
2779
2780 #[test]
2781 fn new_bundle_has_no_links() {
2782 let b = FindingBundle::new(
2783 sample_assertion(),
2784 sample_evidence(),
2785 sample_conditions(),
2786 sample_confidence(),
2787 sample_provenance(),
2788 sample_flags(),
2789 );
2790 assert!(b.links.is_empty());
2791 }
2792
2793 #[test]
2794 fn new_bundle_has_created_timestamp() {
2795 let b = FindingBundle::new(
2796 sample_assertion(),
2797 sample_evidence(),
2798 sample_conditions(),
2799 sample_confidence(),
2800 sample_provenance(),
2801 sample_flags(),
2802 );
2803 assert!(!b.created.is_empty());
2804 assert!(b.updated.is_none());
2805 }
2806
2807 #[test]
2810 fn add_link_works() {
2811 let mut b = FindingBundle::new(
2812 sample_assertion(),
2813 sample_evidence(),
2814 sample_conditions(),
2815 sample_confidence(),
2816 sample_provenance(),
2817 sample_flags(),
2818 );
2819 b.add_link("target_id", "extends", "shared entity");
2820 assert_eq!(b.links.len(), 1);
2821 assert_eq!(b.links[0].target, "target_id");
2822 assert_eq!(b.links[0].link_type, "extends");
2823 assert_eq!(b.links[0].note, "shared entity");
2824 assert_eq!(b.links[0].inferred_by, "compiler");
2825 }
2826
2827 #[test]
2828 fn add_link_with_source_works() {
2829 let mut b = FindingBundle::new(
2830 sample_assertion(),
2831 sample_evidence(),
2832 sample_conditions(),
2833 sample_confidence(),
2834 sample_provenance(),
2835 sample_flags(),
2836 );
2837 b.add_link_with_source(
2838 "target_id",
2839 "contradicts",
2840 "opposite direction",
2841 "entity_overlap",
2842 );
2843 assert_eq!(b.links.len(), 1);
2844 assert_eq!(b.links[0].inferred_by, "entity_overlap");
2845 }
2846
2847 #[test]
2848 fn multiple_links_accumulate() {
2849 let mut b = FindingBundle::new(
2850 sample_assertion(),
2851 sample_evidence(),
2852 sample_conditions(),
2853 sample_confidence(),
2854 sample_provenance(),
2855 sample_flags(),
2856 );
2857 b.add_link("t1", "extends", "note1");
2858 b.add_link("t2", "contradicts", "note2");
2859 b.add_link("t3", "supports", "note3");
2860 assert_eq!(b.links.len(), 3);
2861 }
2862
2863 #[test]
2866 fn review_event_creation() {
2867 let event = ReviewEvent {
2868 id: "rev_abc123".into(),
2869 workspace: None,
2870 finding_id: "vf_abc".into(),
2871 reviewer: "0000-0001-2345-6789".into(),
2872 reviewed_at: "2024-01-01T00:00:00Z".into(),
2873 scope: None,
2874 status: None,
2875 action: ReviewAction::Approved,
2876 reason: "Looks correct".into(),
2877 evidence_considered: vec![],
2878 state_change: None,
2879 };
2880 assert_eq!(event.finding_id, "vf_abc");
2881 assert_eq!(event.reviewer, "0000-0001-2345-6789");
2882 }
2883
2884 #[test]
2885 fn review_action_corrected() {
2886 let action = ReviewAction::Corrected {
2887 field: "direction".into(),
2888 original: "positive".into(),
2889 corrected: "negative".into(),
2890 };
2891 if let ReviewAction::Corrected {
2892 field,
2893 original,
2894 corrected,
2895 } = action
2896 {
2897 assert_eq!(field, "direction");
2898 assert_eq!(original, "positive");
2899 assert_eq!(corrected, "negative");
2900 } else {
2901 panic!("Expected Corrected variant");
2902 }
2903 }
2904
2905 #[test]
2906 fn review_action_disputed() {
2907 let action = ReviewAction::Disputed {
2908 counter_evidence: "Later study contradicts".into(),
2909 counter_doi: Some("10.1234/counter".into()),
2910 };
2911 if let ReviewAction::Disputed {
2912 counter_evidence,
2913 counter_doi,
2914 } = action
2915 {
2916 assert_eq!(counter_evidence, "Later study contradicts");
2917 assert_eq!(counter_doi, Some("10.1234/counter".into()));
2918 } else {
2919 panic!("Expected Disputed variant");
2920 }
2921 }
2922
2923 #[test]
2926 fn confidence_update_creation() {
2927 let update = ConfidenceUpdate {
2928 finding_id: "vf_abc".into(),
2929 previous_score: 0.7,
2930 new_score: 0.85,
2931 basis: "grounded".into(),
2932 updated_by: "grounding_pass".into(),
2933 updated_at: "2024-01-01T00:00:00Z".into(),
2934 };
2935 assert_eq!(update.previous_score, 0.7);
2936 assert_eq!(update.new_score, 0.85);
2937 assert_eq!(update.updated_by, "grounding_pass");
2938 }
2939
2940 #[test]
2943 fn finding_serializes_and_deserializes() {
2944 let b = FindingBundle::new(
2945 sample_assertion(),
2946 sample_evidence(),
2947 sample_conditions(),
2948 sample_confidence(),
2949 sample_provenance(),
2950 sample_flags(),
2951 );
2952 let json = serde_json::to_string(&b).unwrap();
2953 let b2: FindingBundle = serde_json::from_str(&json).unwrap();
2954 assert_eq!(b.id, b2.id);
2955 assert_eq!(b.assertion.text, b2.assertion.text);
2956 assert_eq!(b.confidence.score, b2.confidence.score);
2957 }
2958
2959 #[test]
2960 fn valid_entity_types_list() {
2961 for t in ["gene", "protein", "compound", "other"] {
2963 assert!(VALID_ENTITY_TYPES.contains(&t), "missing {t}");
2964 }
2965 for t in ["particle", "instrument", "dataset", "quantity"] {
2967 assert!(VALID_ENTITY_TYPES.contains(&t), "missing {t}");
2968 }
2969 assert_eq!(VALID_ENTITY_TYPES.len(), 14);
2970 }
2971
2972 #[test]
2973 fn v0_10_assertion_and_source_extensions() {
2974 assert!(VALID_ASSERTION_TYPES.contains(&"measurement"));
2975 assert!(VALID_ASSERTION_TYPES.contains(&"exclusion"));
2976 assert!(VALID_PROVENANCE_SOURCE_TYPES.contains(&"data_release"));
2977 }
2978
2979 #[test]
2982 fn confidence_does_not_affect_id() {
2983 let b1 = FindingBundle::new(
2985 sample_assertion(),
2986 sample_evidence(),
2987 sample_conditions(),
2988 sample_confidence(),
2989 sample_provenance(),
2990 sample_flags(),
2991 );
2992 let mut conf2 = sample_confidence();
2993 conf2.score = 0.5;
2994 let b2 = FindingBundle::new(
2995 sample_assertion(),
2996 sample_evidence(),
2997 sample_conditions(),
2998 conf2,
2999 sample_provenance(),
3000 sample_flags(),
3001 );
3002 assert_eq!(b1.id, b2.id);
3003 }
3004
3005 #[test]
3006 fn flags_do_not_affect_id() {
3007 let b1 = FindingBundle::new(
3008 sample_assertion(),
3009 sample_evidence(),
3010 sample_conditions(),
3011 sample_confidence(),
3012 sample_provenance(),
3013 sample_flags(),
3014 );
3015 let mut flags2 = sample_flags();
3016 flags2.gap = true;
3017 flags2.contested = true;
3018 let b2 = FindingBundle::new(
3019 sample_assertion(),
3020 sample_evidence(),
3021 sample_conditions(),
3022 sample_confidence(),
3023 sample_provenance(),
3024 flags2,
3025 );
3026 assert_eq!(b1.id, b2.id);
3028 }
3029
3030 #[test]
3031 fn different_assertion_text_different_id() {
3032 let b1 = FindingBundle::new(
3033 sample_assertion(),
3034 sample_evidence(),
3035 sample_conditions(),
3036 sample_confidence(),
3037 sample_provenance(),
3038 sample_flags(),
3039 );
3040 let mut assertion2 = sample_assertion();
3041 assertion2.assertion_type = "therapeutic".into();
3042 let b2 = FindingBundle::new(
3043 assertion2,
3044 sample_evidence(),
3045 sample_conditions(),
3046 sample_confidence(),
3047 sample_provenance(),
3048 sample_flags(),
3049 );
3050 assert_ne!(b1.id, b2.id);
3051 }
3052
3053 #[test]
3054 fn different_doi_different_id() {
3055 let b1 = FindingBundle::new(
3056 sample_assertion(),
3057 sample_evidence(),
3058 sample_conditions(),
3059 sample_confidence(),
3060 sample_provenance(),
3061 sample_flags(),
3062 );
3063 let mut prov2 = sample_provenance();
3064 prov2.doi = Some("10.5678/other".into());
3065 let b2 = FindingBundle::new(
3066 sample_assertion(),
3067 sample_evidence(),
3068 sample_conditions(),
3069 sample_confidence(),
3070 prov2,
3071 sample_flags(),
3072 );
3073 assert_ne!(b1.id, b2.id);
3074 }
3075
3076 #[test]
3079 fn content_address_is_deterministic_across_runs() {
3080 let assertion1 = Assertion {
3083 text: "Mitochondrial dysfunction precedes amyloid plaque formation.".into(),
3084 assertion_type: "mechanism".into(),
3085 entities: vec![],
3086 relation: None,
3087 direction: None,
3088 causal_claim: None,
3089 causal_evidence_grade: None,
3090 };
3091 let prov1 = Provenance {
3092 source_type: "published_paper".into(),
3093 doi: Some("10.1038/s41586-023-06789-1".into()),
3094 pmid: None,
3095 pmc: None,
3096 openalex_id: None,
3097 url: None,
3098 title: "Mitochondria in AD".into(),
3099 authors: vec![],
3100 year: Some(2023),
3101 journal: None,
3102 license: None,
3103 publisher: None,
3104 funders: vec![],
3105 extraction: Extraction::default(),
3106 review: None,
3107 citation_count: None,
3108 };
3109
3110 let assertion2 = Assertion {
3112 text: "Mitochondrial dysfunction precedes amyloid plaque formation.".into(),
3113 assertion_type: "mechanism".into(),
3114 entities: vec![Entity {
3115 name: "mitochondria".into(),
3116 entity_type: "anatomical_structure".into(),
3117 identifiers: serde_json::Map::new(),
3118 canonical_id: None,
3119 candidates: vec![],
3120 aliases: vec![],
3121 resolution_provenance: None,
3122 resolution_confidence: 1.0,
3123 resolution_method: None,
3124 species_context: None,
3125 needs_review: false,
3126 }],
3127 relation: Some("precedes".into()),
3128 direction: Some("positive".into()),
3129 causal_claim: None,
3130 causal_evidence_grade: None,
3131 };
3132 let prov2 = Provenance {
3133 source_type: "published_paper".into(),
3134 doi: Some("10.1038/s41586-023-06789-1".into()),
3135 pmid: Some("37654321".into()),
3136 pmc: None,
3137 openalex_id: None,
3138 url: None,
3139 title: "Different title".into(),
3140 authors: vec![Author {
3141 name: "Jones A".into(),
3142 orcid: None,
3143 }],
3144 year: Some(2023),
3145 journal: Some("Nature".into()),
3146 license: None,
3147 publisher: None,
3148 funders: vec![],
3149 extraction: Extraction::default(),
3150 review: None,
3151 citation_count: Some(50),
3152 };
3153
3154 let id1 = FindingBundle::content_address(&assertion1, &prov1);
3155 let id2 = FindingBundle::content_address(&assertion2, &prov2);
3156 assert_eq!(
3157 id1, id2,
3158 "Same assertion text + type + DOI must produce same ID"
3159 );
3160 }
3161
3162 #[test]
3163 fn content_address_normalizes_whitespace_and_punctuation() {
3164 let assertion1 = Assertion {
3165 text: " NLRP3 activates IL-1B. ".into(),
3166 assertion_type: "mechanism".into(),
3167 entities: vec![],
3168 relation: None,
3169 direction: None,
3170 causal_claim: None,
3171 causal_evidence_grade: None,
3172 };
3173 let assertion2 = Assertion {
3174 text: "NLRP3 activates IL-1B".into(),
3175 assertion_type: "mechanism".into(),
3176 entities: vec![],
3177 relation: None,
3178 direction: None,
3179 causal_claim: None,
3180 causal_evidence_grade: None,
3181 };
3182 let prov = sample_provenance();
3183 let id1 = FindingBundle::content_address(&assertion1, &prov);
3184 let id2 = FindingBundle::content_address(&assertion2, &prov);
3185 assert_eq!(
3186 id1, id2,
3187 "Whitespace and trailing punctuation should be normalized away"
3188 );
3189 }
3190
3191 #[test]
3192 fn content_address_falls_back_to_title_when_no_doi_or_pmid() {
3193 let assertion = sample_assertion();
3194 let mut prov = sample_provenance();
3195 prov.doi = None;
3196 prov.pmid = None;
3197 prov.title = "Fallback Title".into();
3198 let id = FindingBundle::content_address(&assertion, &prov);
3199 assert!(id.starts_with("vf_"));
3200 assert_eq!(id.len(), 19); let mut prov2 = sample_provenance();
3204 prov2.doi = None;
3205 prov2.pmid = None;
3206 prov2.title = "Fallback Title".into();
3207 let id2 = FindingBundle::content_address(&assertion, &prov2);
3208 assert_eq!(id, id2);
3209 }
3210
3211 #[test]
3212 fn content_address_prefers_doi_over_pmid_over_title() {
3213 let assertion = sample_assertion();
3214
3215 let mut prov_doi = sample_provenance();
3216 prov_doi.doi = Some("10.1234/test".into());
3217 prov_doi.pmid = Some("12345".into());
3218 prov_doi.title = "Title".into();
3219
3220 let mut prov_pmid = sample_provenance();
3221 prov_pmid.doi = None;
3222 prov_pmid.pmid = Some("12345".into());
3223 prov_pmid.title = "Title".into();
3224
3225 let mut prov_title = sample_provenance();
3226 prov_title.doi = None;
3227 prov_title.pmid = None;
3228 prov_title.title = "Title".into();
3229
3230 let id_doi = FindingBundle::content_address(&assertion, &prov_doi);
3231 let id_pmid = FindingBundle::content_address(&assertion, &prov_pmid);
3232 let id_title = FindingBundle::content_address(&assertion, &prov_title);
3233
3234 assert_ne!(id_doi, id_pmid, "DOI vs PMID should differ");
3236 assert_ne!(id_pmid, id_title, "PMID vs title should differ");
3237 assert_ne!(id_doi, id_title, "DOI vs title should differ");
3238 }
3239
3240 #[test]
3243 fn compute_confidence_meta_analysis_human() {
3244 let evidence = Evidence {
3245 evidence_type: "meta_analysis".into(),
3246 model_system: "human cohorts".into(),
3247 species: Some("Homo sapiens".into()),
3248 method: "meta-analysis".into(),
3249 sample_size: Some("n=5000".into()),
3250 effect_size: None,
3251 p_value: None,
3252 replicated: true,
3253 replication_count: Some(5),
3254 evidence_spans: vec![],
3255 };
3256 let conditions = Conditions {
3257 text: String::new(),
3258 species_verified: vec![],
3259 species_unverified: vec![],
3260 in_vitro: false,
3261 in_vivo: false,
3262 human_data: true,
3263 clinical_trial: false,
3264 concentration_range: None,
3265 duration: None,
3266 age_group: None,
3267 cell_type: None,
3268 };
3269 let conf = compute_confidence(&evidence, &conditions, false);
3270 assert_eq!(conf.method, ConfidenceMethod::Computed);
3271 assert_eq!(conf.kind, ConfidenceKind::FrontierEpistemic);
3272 assert!(conf.components.is_some());
3273 let c = conf.components.unwrap();
3274 assert!((c.evidence_strength - 0.95).abs() < 0.001);
3275 assert!((c.replication_strength - 1.0).abs() < 0.001); assert!((c.sample_strength - 1.0).abs() < 0.001); assert!((c.model_relevance - 1.0).abs() < 0.001); assert!((c.review_penalty - 0.0).abs() < 0.001);
3279 assert!((c.calibration_adjustment - 0.0).abs() < 0.001);
3280 assert!((conf.score - 0.95).abs() < 0.001);
3282 }
3283
3284 #[test]
3285 fn compute_confidence_theoretical_no_replication() {
3286 let evidence = Evidence {
3287 evidence_type: "theoretical".into(),
3288 model_system: "computational".into(),
3289 species: None,
3290 method: "simulation".into(),
3291 sample_size: None,
3292 effect_size: None,
3293 p_value: None,
3294 replicated: false,
3295 replication_count: None,
3296 evidence_spans: vec![],
3297 };
3298 let conditions = Conditions {
3299 text: String::new(),
3300 species_verified: vec![],
3301 species_unverified: vec![],
3302 in_vitro: false,
3303 in_vivo: false,
3304 human_data: false,
3305 clinical_trial: false,
3306 concentration_range: None,
3307 duration: None,
3308 age_group: None,
3309 cell_type: None,
3310 };
3311 let conf = compute_confidence(&evidence, &conditions, false);
3312 let c = conf.components.unwrap();
3313 assert!((c.evidence_strength - 0.30).abs() < 0.001);
3314 assert!((c.replication_strength - 0.70).abs() < 0.001);
3315 assert!((c.sample_strength - 0.60).abs() < 0.001);
3316 assert!((c.model_relevance - 0.50).abs() < 0.001);
3317 assert!((conf.score - 0.063).abs() < 0.001);
3319 }
3320
3321 #[test]
3322 fn compute_confidence_contested_penalty() {
3323 let evidence = Evidence {
3324 evidence_type: "experimental".into(),
3325 model_system: "mouse".into(),
3326 species: Some("Mus musculus".into()),
3327 method: "Western blot".into(),
3328 sample_size: Some("n=30".into()),
3329 effect_size: None,
3330 p_value: None,
3331 replicated: false,
3332 replication_count: None,
3333 evidence_spans: vec![],
3334 };
3335 let conditions = Conditions {
3336 text: String::new(),
3337 species_verified: vec![],
3338 species_unverified: vec![],
3339 in_vitro: false,
3340 in_vivo: true,
3341 human_data: false,
3342 clinical_trial: false,
3343 concentration_range: None,
3344 duration: None,
3345 age_group: None,
3346 cell_type: None,
3347 };
3348 let uncontested = compute_confidence(&evidence, &conditions, false);
3349 let contested = compute_confidence(&evidence, &conditions, true);
3350 assert!((contested.score - (uncontested.score - 0.15)).abs() < 0.001);
3351 }
3352
3353 #[test]
3354 fn compute_confidence_sample_size_parsing() {
3355 assert_eq!(parse_sample_size("n=30"), Some(30));
3356 assert_eq!(parse_sample_size("n = 120"), Some(120));
3357 assert_eq!(parse_sample_size("3 cohorts of 20"), Some(20));
3358 assert_eq!(parse_sample_size("500"), Some(500));
3359 assert_eq!(parse_sample_size(""), None);
3360 }
3361
3362 #[test]
3363 fn compute_confidence_v010_deserialize_compat() {
3364 let json = r#"{"score": 0.75, "basis": "legacy seeded confidence", "extraction_confidence": 0.85}"#;
3366 let conf: Confidence = serde_json::from_str(json).unwrap();
3367 assert!((conf.score - 0.75).abs() < 0.001);
3368 assert_eq!(conf.kind, ConfidenceKind::FrontierEpistemic);
3369 assert_eq!(conf.method, ConfidenceMethod::LlmInitial); assert!(conf.components.is_none());
3371 }
3372
3373 #[test]
3374 fn compute_confidence_components_deserialize_legacy_names() {
3375 let json = r#"{
3376 "score": 0.75,
3377 "basis": "legacy components",
3378 "method": "computed",
3379 "components": {
3380 "evidence_grade": 0.8,
3381 "replication_factor": 0.7,
3382 "sample_strength": 0.6,
3383 "species_relevance": 0.8,
3384 "contradiction_penalty": 0.15
3385 },
3386 "extraction_confidence": 0.85
3387 }"#;
3388 let conf: Confidence = serde_json::from_str(json).unwrap();
3389 let components = conf.components.unwrap();
3390 assert!((components.evidence_strength - 0.8).abs() < 0.001);
3391 assert!((components.replication_strength - 0.7).abs() < 0.001);
3392 assert!((components.sample_strength - 0.6).abs() < 0.001);
3393 assert!((components.model_relevance - 0.8).abs() < 0.001);
3394 assert!((components.review_penalty - 0.15).abs() < 0.001);
3395 assert!((components.calibration_adjustment - 0.0).abs() < 0.001);
3396 }
3397
3398 #[test]
3399 fn compute_confidence_serializes_new_component_names_and_kind() {
3400 let conf = compute_confidence(&sample_evidence(), &sample_conditions(), false);
3401 let value = serde_json::to_value(&conf).unwrap();
3402 assert_eq!(value["kind"], "frontier_epistemic");
3403 let components = &value["components"];
3404 assert!(components.get("evidence_strength").is_some());
3405 assert!(components.get("replication_strength").is_some());
3406 assert!(components.get("model_relevance").is_some());
3407 assert!(components.get("review_penalty").is_some());
3408 assert!(components.get("calibration_adjustment").is_some());
3409 assert!(components.get("evidence_grade").is_none());
3410 assert!(components.get("replication_factor").is_none());
3411 assert!(components.get("species_relevance").is_none());
3412 assert!(components.get("contradiction_penalty").is_none());
3413 }
3414
3415 #[test]
3416 fn recompute_all_updates_findings() {
3417 let mut b = FindingBundle::new(
3418 sample_assertion(),
3419 sample_evidence(),
3420 sample_conditions(),
3421 sample_confidence(),
3422 sample_provenance(),
3423 sample_flags(),
3424 );
3425 let old_score = b.confidence.score;
3427 assert!((old_score - 0.85).abs() < 0.001);
3428 let changed = recompute_all_confidence(std::slice::from_mut(&mut b), &[]);
3429 assert_eq!(b.confidence.method, ConfidenceMethod::Computed);
3430 assert!(b.confidence.components.is_some());
3431 assert!((b.confidence.score - 0.336).abs() < 0.001);
3434 assert_eq!(changed, 1);
3435 }
3436
3437 #[test]
3440 fn causal_multiplier_neutral_when_either_field_none() {
3441 assert!((causal_consistency_multiplier(None, None) - 1.0).abs() < 1e-12);
3442 assert!(
3443 (causal_consistency_multiplier(Some(CausalClaim::Intervention), None) - 1.0).abs()
3444 < 1e-12
3445 );
3446 assert!(
3447 (causal_consistency_multiplier(None, Some(CausalEvidenceGrade::Rct)) - 1.0).abs()
3448 < 1e-12
3449 );
3450 }
3451
3452 #[test]
3453 fn rct_grade_bumps_any_claim() {
3454 for c in [
3455 CausalClaim::Correlation,
3456 CausalClaim::Mediation,
3457 CausalClaim::Intervention,
3458 ] {
3459 assert!(
3460 (causal_consistency_multiplier(Some(c), Some(CausalEvidenceGrade::Rct)) - 1.10)
3461 .abs()
3462 < 1e-12,
3463 "RCT should bump claim {c:?}"
3464 );
3465 }
3466 }
3467
3468 #[test]
3469 fn observational_intervention_gets_strong_penalty() {
3470 let m = causal_consistency_multiplier(
3471 Some(CausalClaim::Intervention),
3472 Some(CausalEvidenceGrade::Observational),
3473 );
3474 assert!(
3475 (m - 0.65).abs() < 1e-12,
3476 "intervention from observational should be 0.65, got {m}"
3477 );
3478 }
3479
3480 #[test]
3481 fn correlation_neutral_under_any_grade() {
3482 for g in [
3483 CausalEvidenceGrade::QuasiExperimental,
3484 CausalEvidenceGrade::Observational,
3485 CausalEvidenceGrade::Theoretical,
3486 ] {
3487 let m = causal_consistency_multiplier(Some(CausalClaim::Correlation), Some(g));
3488 assert!(
3489 (m - 1.0).abs() < 1e-12,
3490 "correlation should be neutral for grade {g:?}, got {m}"
3491 );
3492 }
3493 }
3494
3495 #[test]
3496 fn confidence_score_unchanged_for_pre_v0_38_findings() {
3497 let mut e = sample_evidence();
3501 e.replicated = false;
3502 e.replication_count = None;
3503 let c = sample_conditions();
3504 let score_legacy_path = compute_confidence(&e, &c, false).score;
3505 let score_kernel_path =
3506 compute_confidence_from_components(&e, &c, false, 0, 0, 0, None, None).score;
3507 assert!((score_legacy_path - score_kernel_path).abs() < 1e-12);
3508 let conf = compute_confidence_from_components(&e, &c, false, 0, 0, 0, None, None);
3510 let cc = conf.components.unwrap().causal_consistency;
3511 assert!((cc - 1.0).abs() < 1e-12);
3512 }
3513
3514 #[test]
3515 fn intervention_from_observational_drops_score_meaningfully() {
3516 let e = sample_evidence();
3519 let c = sample_conditions();
3520 let neutral = compute_confidence_from_components(&e, &c, false, 0, 0, 0, None, None);
3521 let observational_intervention = compute_confidence_from_components(
3522 &e,
3523 &c,
3524 false,
3525 0,
3526 0,
3527 0,
3528 Some(CausalClaim::Intervention),
3529 Some(CausalEvidenceGrade::Observational),
3530 );
3531 let drop = neutral.score - observational_intervention.score;
3532 assert!(
3533 drop > 0.05,
3534 "observational-intervention should drop score noticeably; got {drop}"
3535 );
3536 }
3537
3538 #[test]
3539 fn parses_bbb_review_event_with_richer_schema() {
3540 let raw = include_str!("../embedded/tests/fixtures/legacy/rev_001_bbb_correction.json");
3541 let review: ReviewEvent = serde_json::from_str(raw).unwrap();
3542
3543 assert_eq!(review.id, "rev_001_bbb_correction");
3544 assert_eq!(review.workspace.as_deref(), Some("projects/bbb-flagship"));
3545 assert_eq!(review.scope.as_deref(), Some("bbb_opening_trusted_subset"));
3546 assert_eq!(review.status.as_deref(), Some("accepted"));
3547 assert!(matches!(
3548 review.action,
3549 ReviewAction::Qualified { ref target } if target == "trusted_interpretation"
3550 ));
3551 assert_eq!(review.evidence_considered.len(), 3);
3552 assert_eq!(
3553 review.evidence_considered[0].role.as_deref(),
3554 Some("qualifier")
3555 );
3556 assert_eq!(
3557 review
3558 .state_change
3559 .as_ref()
3560 .and_then(|value| value.get("assumption_retired"))
3561 .and_then(|value| value.as_str()),
3562 Some("safe opening implies therapeutic efficacy")
3563 );
3564 }
3565
3566 #[test]
3567 fn artifact_requires_sha256_and_stable_kind() {
3568 let artifact = Artifact::new(
3569 "clinical_trial_record",
3570 "AHEAD 3-45",
3571 "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa",
3572 Some(42),
3573 Some("application/json".into()),
3574 "local_blob",
3575 Some(".vela/artifact-blobs/sha256/aaaaaaaa".into()),
3576 Some("https://clinicaltrials.gov/study/NCT04468659".into()),
3577 Some("ClinicalTrials.gov public record".into()),
3578 vec!["vf_demo".into()],
3579 sample_provenance(),
3580 BTreeMap::new(),
3581 crate::access_tier::AccessTier::Public,
3582 )
3583 .unwrap();
3584
3585 assert!(artifact.id.starts_with("va_"));
3586 assert_eq!(
3587 artifact.content_hash,
3588 "sha256:aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa"
3589 );
3590 assert_eq!(artifact.kind, "clinical_trial_record");
3591 }
3592}