1use std::collections::HashSet;
4use std::fs;
5use std::path::{Path, PathBuf};
6
7use serde::{Deserialize, Serialize};
8
9use crate::project::Project;
10
11pub const DECISION_BRIEF_SCHEMA: &str = "vela.decision-brief.v1";
12pub const TRIAL_OUTCOMES_SCHEMA: &str = "vela.trial-outcomes.v1";
13pub const SOURCE_VERIFICATION_SCHEMA: &str = "vela.source-verification.v1";
14pub const SOURCE_INGEST_PLAN_SCHEMA: &str = "vela.source-ingest-plan.v1";
15
16const DECISION_BRIEF_FILE: &str = "decision-brief.v1.json";
17const TRIAL_OUTCOMES_FILE: &str = "trial-outcomes.v1.json";
18const SOURCE_VERIFICATION_FILE: &str = "source-verification.v1.json";
19const SOURCE_INGEST_PLAN_FILE: &str = "source-ingest-plan.v1.json";
20
21const KNOWN_QUESTION_IDS: &[&str] = &[
22 "clinical-benefit",
23 "biomarkers-vs-cognition",
24 "bace-failures",
25 "aria-apoe4-risk",
26 "delivery-constraints",
27 "next-discriminating-evidence",
28];
29
30#[derive(Debug, Clone, Serialize, Deserialize)]
31pub struct DecisionBrief {
32 pub schema: String,
33 pub frontier_id: Option<String>,
34 pub updated_at: String,
35 pub source_frontier: String,
36 pub questions: Vec<DecisionQuestion>,
37}
38
39#[derive(Debug, Clone, Serialize, Deserialize)]
40pub struct DecisionQuestion {
41 pub id: String,
42 pub title: String,
43 pub short_answer: String,
44 pub caveat: String,
45 pub confidence: String,
46 pub supporting_findings: Vec<String>,
47 #[serde(default)]
48 pub tension_findings: Vec<String>,
49 #[serde(default)]
50 pub gap_findings: Vec<String>,
51 #[serde(default)]
52 pub artifact_ids: Vec<String>,
53 pub what_would_change_this_answer: String,
54 #[serde(default)]
55 pub correction_paths: Vec<DecisionCorrectionPath>,
56 #[serde(default)]
57 pub tags: Vec<String>,
58}
59
60#[derive(Debug, Clone, Serialize, Deserialize)]
61pub struct DecisionCorrectionPath {
62 pub finding_id: String,
63 pub summary: String,
64 #[serde(default)]
65 pub event_ids: Vec<String>,
66 #[serde(default)]
67 pub artifact_ids: Vec<String>,
68 pub status: String,
69}
70
71#[derive(Debug, Clone, Serialize, Deserialize)]
72pub struct TrialOutcomes {
73 pub schema: String,
74 pub frontier_id: Option<String>,
75 pub updated_at: String,
76 pub rows: Vec<TrialOutcomeRow>,
77}
78
79#[derive(Debug, Clone, Serialize, Deserialize)]
80pub struct TrialOutcomeRow {
81 pub id: String,
82 pub program: String,
83 pub drug: String,
84 pub mechanism: String,
85 pub phase: String,
86 #[serde(default)]
87 pub nct_ids: Vec<String>,
88 pub population: String,
89 pub disease_stage: String,
90 pub amyloid_confirmation: String,
91 pub duration: String,
92 pub primary_endpoint: String,
93 pub cognitive_result: String,
94 pub biomarker_result: String,
95 pub aria_or_safety_result: String,
96 pub regulatory_status: String,
97 #[serde(default)]
98 pub source_locators: Vec<String>,
99 #[serde(default)]
100 pub finding_ids: Vec<String>,
101 #[serde(default)]
102 pub artifact_ids: Vec<String>,
103 #[serde(default)]
104 pub tags: Vec<String>,
105}
106
107#[derive(Debug, Clone, Serialize, Deserialize)]
108pub struct SourceVerification {
109 pub schema: String,
110 pub frontier_id: Option<String>,
111 pub verified_at: String,
112 #[serde(default)]
113 pub notes: Vec<String>,
114 pub sources: Vec<VerifiedSource>,
115}
116
117#[derive(Debug, Clone, Serialize, Deserialize)]
118pub struct SourceIngestPlan {
119 pub schema: String,
120 pub frontier_id: Option<String>,
121 pub name: String,
122 pub verified_at: String,
123 #[serde(default)]
124 pub policy: serde_json::Value,
125 pub entries: Vec<SourceIngestEntry>,
126}
127
128#[derive(Debug, Clone, Serialize, Deserialize)]
129pub struct SourceIngestEntry {
130 pub id: String,
131 pub name: String,
132 pub category: String,
133 pub priority: String,
134 pub representation: String,
135 pub source_type: String,
136 pub locator: String,
137 pub ingest_status: String,
138 pub current_frontier_artifact_id: Option<String>,
139 pub access_terms: String,
140 pub license_note: String,
141 #[serde(default)]
142 pub target_findings: Vec<String>,
143 pub target_use: String,
144}
145
146#[derive(Debug, Clone, Serialize, Deserialize)]
147pub struct VerifiedSource {
148 pub id: String,
149 pub title: String,
150 pub url: String,
151 pub agency: String,
152 pub current_status: String,
153}
154
155#[derive(Debug, Clone, Serialize, Deserialize)]
156pub struct ProjectionIssue {
157 pub path: String,
158 pub message: String,
159}
160
161#[derive(Debug, Clone, Serialize)]
162pub struct ProjectionLoad<T>
163where
164 T: Serialize,
165{
166 pub ok: bool,
167 pub available: bool,
168 #[serde(skip_serializing_if = "Option::is_none")]
169 pub projection: Option<T>,
170 pub issues: Vec<ProjectionIssue>,
171 #[serde(skip_serializing_if = "Option::is_none")]
172 pub error: Option<String>,
173}
174
175pub fn decision_projection_dir(source: &Path) -> Option<PathBuf> {
176 if source.is_dir() {
177 let dir = source.join("decision");
178 return dir.exists().then_some(dir);
179 }
180 source
181 .parent()
182 .map(|parent| parent.join("decision"))
183 .filter(|dir| dir.exists())
184}
185
186pub fn load_decision_brief(source: &Path, project: &Project) -> ProjectionLoad<DecisionBrief> {
187 load_projection(
188 source,
189 project,
190 DECISION_BRIEF_FILE,
191 validate_decision_brief,
192 )
193}
194
195pub fn load_trial_outcomes(source: &Path, project: &Project) -> ProjectionLoad<TrialOutcomes> {
196 load_projection(
197 source,
198 project,
199 TRIAL_OUTCOMES_FILE,
200 validate_trial_outcomes,
201 )
202}
203
204pub fn load_source_verification(
205 source: &Path,
206 project: &Project,
207) -> ProjectionLoad<SourceVerification> {
208 load_projection(
209 source,
210 project,
211 SOURCE_VERIFICATION_FILE,
212 validate_source_verification,
213 )
214}
215
216pub fn load_source_ingest_plan(
217 source: &Path,
218 project: &Project,
219) -> ProjectionLoad<SourceIngestPlan> {
220 let Some(dir) = source_ingest_projection_dir(source) else {
221 return ProjectionLoad {
222 ok: false,
223 available: false,
224 projection: None,
225 issues: vec![],
226 error: Some("source ingest plan directory not found".to_string()),
227 };
228 };
229 load_projection_from_dir(
230 &dir,
231 project,
232 SOURCE_INGEST_PLAN_FILE,
233 validate_source_ingest_plan,
234 )
235}
236
237fn load_projection<T, F>(
238 source: &Path,
239 project: &Project,
240 file_name: &str,
241 validate: F,
242) -> ProjectionLoad<T>
243where
244 T: for<'de> Deserialize<'de> + Serialize,
245 F: Fn(&T, &Project) -> Vec<ProjectionIssue>,
246{
247 let Some(dir) = decision_projection_dir(source) else {
248 return ProjectionLoad {
249 ok: false,
250 available: false,
251 projection: None,
252 issues: vec![],
253 error: Some("decision projection directory not found".to_string()),
254 };
255 };
256 load_projection_from_dir(&dir, project, file_name, validate)
257}
258
259fn load_projection_from_dir<T, F>(
260 dir: &Path,
261 project: &Project,
262 file_name: &str,
263 validate: F,
264) -> ProjectionLoad<T>
265where
266 T: for<'de> Deserialize<'de> + Serialize,
267 F: Fn(&T, &Project) -> Vec<ProjectionIssue>,
268{
269 let path = dir.join(file_name);
270 let Ok(bytes) = fs::read_to_string(&path) else {
271 return ProjectionLoad {
272 ok: false,
273 available: false,
274 projection: None,
275 issues: vec![],
276 error: Some(format!("projection file not found: {}", path.display())),
277 };
278 };
279 let projection = match serde_json::from_str::<T>(&bytes) {
280 Ok(projection) => projection,
281 Err(error) => {
282 return ProjectionLoad {
283 ok: false,
284 available: true,
285 projection: None,
286 issues: vec![],
287 error: Some(format!("parse {}: {error}", path.display())),
288 };
289 }
290 };
291 let issues = validate(&projection, project);
292 ProjectionLoad {
293 ok: issues.is_empty(),
294 available: true,
295 projection: Some(projection),
296 issues,
297 error: None,
298 }
299}
300
301pub fn source_ingest_projection_dir(source: &Path) -> Option<PathBuf> {
302 if source.is_dir() {
303 let dir = source.join("ingest");
304 return dir.exists().then_some(dir);
305 }
306 source
307 .parent()
308 .map(|parent| parent.join("ingest"))
309 .filter(|dir| dir.exists())
310}
311
312pub fn validate_decision_brief(brief: &DecisionBrief, project: &Project) -> Vec<ProjectionIssue> {
313 let finding_ids = project
314 .findings
315 .iter()
316 .map(|finding| finding.id.as_str())
317 .collect::<HashSet<_>>();
318 let artifact_ids = project
319 .artifacts
320 .iter()
321 .map(|artifact| artifact.id.as_str())
322 .collect::<HashSet<_>>();
323 let event_ids = project
324 .events
325 .iter()
326 .map(|event| event.id.as_str())
327 .collect::<HashSet<_>>();
328 let mut issues = validate_projection_frontier_id(brief.frontier_id.as_deref(), project);
329 issues.extend(validate_decision_brief_against_sets(
330 brief,
331 &finding_ids,
332 &artifact_ids,
333 &event_ids,
334 ));
335 issues
336}
337
338pub fn validate_trial_outcomes(
339 outcomes: &TrialOutcomes,
340 project: &Project,
341) -> Vec<ProjectionIssue> {
342 let finding_ids = project
343 .findings
344 .iter()
345 .map(|finding| finding.id.as_str())
346 .collect::<HashSet<_>>();
347 let artifact_ids = project
348 .artifacts
349 .iter()
350 .map(|artifact| artifact.id.as_str())
351 .collect::<HashSet<_>>();
352 let mut issues = validate_projection_frontier_id(outcomes.frontier_id.as_deref(), project);
353 issues.extend(validate_trial_outcomes_against_sets(
354 outcomes,
355 &finding_ids,
356 &artifact_ids,
357 ));
358 issues
359}
360
361pub fn validate_source_verification(
362 verification: &SourceVerification,
363 project: &Project,
364) -> Vec<ProjectionIssue> {
365 let mut issues = validate_projection_frontier_id(verification.frontier_id.as_deref(), project);
366 issues.extend(validate_source_verification_shape(verification));
367 issues
368}
369
370pub fn validate_source_ingest_plan(
371 plan: &SourceIngestPlan,
372 project: &Project,
373) -> Vec<ProjectionIssue> {
374 let finding_ids = project
375 .findings
376 .iter()
377 .map(|finding| finding.id.as_str())
378 .collect::<HashSet<_>>();
379 let artifact_ids = project
380 .artifacts
381 .iter()
382 .map(|artifact| artifact.id.as_str())
383 .collect::<HashSet<_>>();
384 let mut issues = validate_projection_frontier_id(plan.frontier_id.as_deref(), project);
385 issues.extend(validate_source_ingest_plan_against_sets(
386 plan,
387 &finding_ids,
388 &artifact_ids,
389 ));
390 issues
391}
392
393fn validate_projection_frontier_id(
394 projected_frontier_id: Option<&str>,
395 project: &Project,
396) -> Vec<ProjectionIssue> {
397 let mut issues = Vec::new();
398 let actual = project.frontier_id();
399 match projected_frontier_id {
400 Some(id) if id == actual => {}
401 Some(id) => push_issue(
402 &mut issues,
403 "frontier_id",
404 format!("projection frontier_id '{id}' does not match frontier '{actual}'"),
405 ),
406 None => push_issue(
407 &mut issues,
408 "frontier_id",
409 "projection must pin the frontier_id it was reviewed against",
410 ),
411 }
412 issues
413}
414
415fn validate_source_ingest_plan_against_sets(
416 plan: &SourceIngestPlan,
417 finding_ids: &HashSet<&str>,
418 artifact_ids: &HashSet<&str>,
419) -> Vec<ProjectionIssue> {
420 let mut issues = Vec::new();
421 if plan.schema != SOURCE_INGEST_PLAN_SCHEMA {
422 push_issue(
423 &mut issues,
424 "schema",
425 format!("expected {SOURCE_INGEST_PLAN_SCHEMA}"),
426 );
427 }
428 require_non_empty(&mut issues, "name", &plan.name);
429 require_non_empty(&mut issues, "verified_at", &plan.verified_at);
430 if plan.entries.is_empty() {
431 push_issue(
432 &mut issues,
433 "entries",
434 "at least one source entry is required",
435 );
436 }
437 let mut seen = HashSet::new();
438 let mut categories = HashSet::new();
439 let mut priorities = HashSet::new();
440 let mut ingested = 0usize;
441 for (idx, entry) in plan.entries.iter().enumerate() {
442 let path = format!("entries[{idx}]");
443 require_non_empty(&mut issues, &format!("{path}.id"), &entry.id);
444 if !entry.id.trim().is_empty() && !seen.insert(entry.id.as_str()) {
445 push_issue(
446 &mut issues,
447 format!("{path}.id"),
448 format!("duplicate source entry id '{}'", entry.id),
449 );
450 }
451 require_non_empty(&mut issues, &format!("{path}.name"), &entry.name);
452 require_non_empty(&mut issues, &format!("{path}.category"), &entry.category);
453 require_non_empty(&mut issues, &format!("{path}.priority"), &entry.priority);
454 require_non_empty(
455 &mut issues,
456 &format!("{path}.representation"),
457 &entry.representation,
458 );
459 require_non_empty(
460 &mut issues,
461 &format!("{path}.source_type"),
462 &entry.source_type,
463 );
464 require_non_empty(
465 &mut issues,
466 &format!("{path}.ingest_status"),
467 &entry.ingest_status,
468 );
469 require_non_empty(
470 &mut issues,
471 &format!("{path}.access_terms"),
472 &entry.access_terms,
473 );
474 require_non_empty(
475 &mut issues,
476 &format!("{path}.license_note"),
477 &entry.license_note,
478 );
479 require_non_empty(
480 &mut issues,
481 &format!("{path}.target_use"),
482 &entry.target_use,
483 );
484 if !usable_source_locator(&entry.locator) {
485 push_issue(
486 &mut issues,
487 format!("{path}.locator"),
488 format!("source locator '{}' is not usable", entry.locator),
489 );
490 }
491 categories.insert(entry.category.as_str());
492 priorities.insert(entry.priority.as_str());
493 if !matches!(entry.priority.as_str(), "P0" | "P1" | "P2") {
494 push_issue(
495 &mut issues,
496 format!("{path}.priority"),
497 "priority must be P0, P1, or P2",
498 );
499 }
500 if !matches!(
501 entry.ingest_status.as_str(),
502 "ingested" | "pointer_only" | "candidate" | "excluded"
503 ) {
504 push_issue(
505 &mut issues,
506 format!("{path}.ingest_status"),
507 "unknown ingest status",
508 );
509 }
510 match entry.ingest_status.as_str() {
511 "ingested" => {
512 ingested += 1;
513 let Some(id) = entry.current_frontier_artifact_id.as_deref() else {
514 push_issue(
515 &mut issues,
516 format!("{path}.current_frontier_artifact_id"),
517 "ingested entries must name a frontier artifact",
518 );
519 continue;
520 };
521 if !artifact_ids.contains(id) {
522 push_issue(
523 &mut issues,
524 format!("{path}.current_frontier_artifact_id"),
525 format!("artifact '{id}' does not resolve in frontier"),
526 );
527 }
528 }
529 _ => {
530 if entry.current_frontier_artifact_id.is_some() {
531 push_issue(
532 &mut issues,
533 format!("{path}.current_frontier_artifact_id"),
534 "only ingested entries may name a frontier artifact",
535 );
536 }
537 }
538 }
539 if entry.target_findings.is_empty() {
540 push_issue(
541 &mut issues,
542 format!("{path}.target_findings"),
543 "at least one target finding is required",
544 );
545 }
546 for id in &entry.target_findings {
547 if !finding_ids.contains(id.as_str()) {
548 push_issue(
549 &mut issues,
550 format!("{path}.target_findings"),
551 format!("finding '{id}' does not resolve in frontier"),
552 );
553 }
554 }
555 }
556 for required in [
557 "clinical_trial_registry",
558 "regulatory",
559 "dataset_or_registry",
560 "code_or_tool",
561 "literature_or_table",
562 ] {
563 if !categories.contains(required) {
564 push_issue(
565 &mut issues,
566 "entries.category",
567 format!("missing source category '{required}'"),
568 );
569 }
570 }
571 for required in ["P0", "P1"] {
572 if !priorities.contains(required) {
573 push_issue(
574 &mut issues,
575 "entries.priority",
576 format!("missing source priority '{required}'"),
577 );
578 }
579 }
580 if ingested == 0 {
581 push_issue(
582 &mut issues,
583 "entries.ingest_status",
584 "at least one source entry must be ingested",
585 );
586 }
587 issues
588}
589
590fn validate_decision_brief_against_sets(
591 brief: &DecisionBrief,
592 finding_ids: &HashSet<&str>,
593 artifact_ids: &HashSet<&str>,
594 event_ids: &HashSet<&str>,
595) -> Vec<ProjectionIssue> {
596 let mut issues = Vec::new();
597 if brief.schema != DECISION_BRIEF_SCHEMA {
598 push_issue(
599 &mut issues,
600 "schema",
601 format!("expected {DECISION_BRIEF_SCHEMA}"),
602 );
603 }
604 if brief.questions.len() != KNOWN_QUESTION_IDS.len() {
605 push_issue(
606 &mut issues,
607 "questions",
608 format!("expected {} decision questions", KNOWN_QUESTION_IDS.len()),
609 );
610 }
611 let mut seen = HashSet::new();
612 for (idx, question) in brief.questions.iter().enumerate() {
613 let path = format!("questions[{idx}]");
614 if !KNOWN_QUESTION_IDS.contains(&question.id.as_str()) {
615 push_issue(
616 &mut issues,
617 format!("{path}.id"),
618 format!("unknown decision question id '{}'", question.id),
619 );
620 }
621 if !seen.insert(question.id.as_str()) {
622 push_issue(
623 &mut issues,
624 format!("{path}.id"),
625 format!("duplicate decision question id '{}'", question.id),
626 );
627 }
628 require_non_empty(&mut issues, &format!("{path}.title"), &question.title);
629 require_non_empty(
630 &mut issues,
631 &format!("{path}.short_answer"),
632 &question.short_answer,
633 );
634 require_non_empty(&mut issues, &format!("{path}.caveat"), &question.caveat);
635 require_non_empty(
636 &mut issues,
637 &format!("{path}.what_would_change_this_answer"),
638 &question.what_would_change_this_answer,
639 );
640 if question.supporting_findings.is_empty() {
641 push_issue(
642 &mut issues,
643 format!("{path}.supporting_findings"),
644 "at least one supporting finding is required",
645 );
646 }
647 for id in question
648 .supporting_findings
649 .iter()
650 .chain(question.tension_findings.iter())
651 .chain(question.gap_findings.iter())
652 {
653 if !finding_ids.contains(id.as_str()) {
654 push_issue(
655 &mut issues,
656 format!("{path}.finding_refs"),
657 format!("finding '{id}' does not resolve in frontier"),
658 );
659 }
660 }
661 for id in &question.artifact_ids {
662 if !artifact_ids.contains(id.as_str()) {
663 push_issue(
664 &mut issues,
665 format!("{path}.artifact_ids"),
666 format!("artifact '{id}' does not resolve in frontier"),
667 );
668 }
669 }
670 for (path_idx, correction_path) in question.correction_paths.iter().enumerate() {
671 let correction_path_path = format!("{path}.correction_paths[{path_idx}]");
672 require_non_empty(
673 &mut issues,
674 &format!("{correction_path_path}.summary"),
675 &correction_path.summary,
676 );
677 require_non_empty(
678 &mut issues,
679 &format!("{correction_path_path}.status"),
680 &correction_path.status,
681 );
682 if !finding_ids.contains(correction_path.finding_id.as_str()) {
683 push_issue(
684 &mut issues,
685 format!("{correction_path_path}.finding_id"),
686 format!(
687 "finding '{}' does not resolve in frontier",
688 correction_path.finding_id
689 ),
690 );
691 }
692 for id in &correction_path.event_ids {
693 if !event_ids.contains(id.as_str()) {
694 push_issue(
695 &mut issues,
696 format!("{correction_path_path}.event_ids"),
697 format!("event '{id}' does not resolve in frontier"),
698 );
699 }
700 }
701 for id in &correction_path.artifact_ids {
702 if !artifact_ids.contains(id.as_str()) {
703 push_issue(
704 &mut issues,
705 format!("{correction_path_path}.artifact_ids"),
706 format!("artifact '{id}' does not resolve in frontier"),
707 );
708 }
709 }
710 }
711 }
712 issues
713}
714
715fn validate_trial_outcomes_against_sets(
716 outcomes: &TrialOutcomes,
717 finding_ids: &HashSet<&str>,
718 artifact_ids: &HashSet<&str>,
719) -> Vec<ProjectionIssue> {
720 let mut issues = Vec::new();
721 if outcomes.schema != TRIAL_OUTCOMES_SCHEMA {
722 push_issue(
723 &mut issues,
724 "schema",
725 format!("expected {TRIAL_OUTCOMES_SCHEMA}"),
726 );
727 }
728 if outcomes.rows.is_empty() {
729 push_issue(&mut issues, "rows", "at least one trial row is required");
730 }
731 let mut seen = HashSet::new();
732 for (idx, row) in outcomes.rows.iter().enumerate() {
733 let path = format!("rows[{idx}]");
734 require_non_empty(&mut issues, &format!("{path}.id"), &row.id);
735 if !row.id.trim().is_empty() && !seen.insert(row.id.as_str()) {
736 push_issue(
737 &mut issues,
738 format!("{path}.id"),
739 format!("duplicate trial row id '{}'", row.id),
740 );
741 }
742 require_non_empty(&mut issues, &format!("{path}.program"), &row.program);
743 require_non_empty(&mut issues, &format!("{path}.drug"), &row.drug);
744 require_non_empty(
745 &mut issues,
746 &format!("{path}.primary_endpoint"),
747 &row.primary_endpoint,
748 );
749 require_non_empty(
750 &mut issues,
751 &format!("{path}.regulatory_status"),
752 &row.regulatory_status,
753 );
754 if row.source_locators.is_empty() {
755 push_issue(
756 &mut issues,
757 format!("{path}.source_locators"),
758 "at least one source locator is required",
759 );
760 }
761 for locator in &row.source_locators {
762 if !usable_source_locator(locator) {
763 push_issue(
764 &mut issues,
765 format!("{path}.source_locators"),
766 format!("source locator '{locator}' is not usable"),
767 );
768 }
769 }
770 if row.finding_ids.is_empty() {
771 push_issue(
772 &mut issues,
773 format!("{path}.finding_ids"),
774 "at least one finding reference is required",
775 );
776 }
777 for id in &row.finding_ids {
778 if !finding_ids.contains(id.as_str()) {
779 push_issue(
780 &mut issues,
781 format!("{path}.finding_ids"),
782 format!("finding '{id}' does not resolve in frontier"),
783 );
784 }
785 }
786 for id in &row.artifact_ids {
787 if !artifact_ids.contains(id.as_str()) {
788 push_issue(
789 &mut issues,
790 format!("{path}.artifact_ids"),
791 format!("artifact '{id}' does not resolve in frontier"),
792 );
793 }
794 }
795 }
796 issues
797}
798
799fn validate_source_verification_shape(verification: &SourceVerification) -> Vec<ProjectionIssue> {
800 let mut issues = Vec::new();
801 if verification.schema != SOURCE_VERIFICATION_SCHEMA {
802 push_issue(
803 &mut issues,
804 "schema",
805 format!("expected {SOURCE_VERIFICATION_SCHEMA}"),
806 );
807 }
808 require_non_empty(&mut issues, "verified_at", &verification.verified_at);
809 if verification.sources.is_empty() {
810 push_issue(
811 &mut issues,
812 "sources",
813 "at least one verified source is required",
814 );
815 }
816 let mut seen = HashSet::new();
817 for (idx, source) in verification.sources.iter().enumerate() {
818 let path = format!("sources[{idx}]");
819 require_non_empty(&mut issues, &format!("{path}.id"), &source.id);
820 if !source.id.trim().is_empty() && !seen.insert(source.id.as_str()) {
821 push_issue(
822 &mut issues,
823 format!("{path}.id"),
824 format!("duplicate source verification id '{}'", source.id),
825 );
826 }
827 require_non_empty(&mut issues, &format!("{path}.title"), &source.title);
828 require_non_empty(&mut issues, &format!("{path}.agency"), &source.agency);
829 require_non_empty(
830 &mut issues,
831 &format!("{path}.current_status"),
832 &source.current_status,
833 );
834 if !usable_source_locator(&source.url) {
835 push_issue(
836 &mut issues,
837 format!("{path}.url"),
838 format!("source url '{}' is not usable", source.url),
839 );
840 }
841 }
842 issues
843}
844
845fn usable_source_locator(locator: &str) -> bool {
846 let trimmed = locator.trim();
847 trimmed.starts_with("https://")
848 || trimmed.starts_with("doi:")
849 || trimmed.starts_with("pmid:")
850 || trimmed.starts_with("NCT")
851}
852
853fn require_non_empty(issues: &mut Vec<ProjectionIssue>, path: &str, value: &str) {
854 if value.trim().is_empty() {
855 push_issue(issues, path, "field must be non-empty");
856 }
857}
858
859fn push_issue(
860 issues: &mut Vec<ProjectionIssue>,
861 path: impl Into<String>,
862 message: impl Into<String>,
863) {
864 issues.push(ProjectionIssue {
865 path: path.into(),
866 message: message.into(),
867 });
868}
869
870#[cfg(test)]
871mod tests {
872 use super::*;
873
874 fn finding_ids() -> HashSet<&'static str> {
875 HashSet::from(["vf_known", "vf_tension", "vf_gap"])
876 }
877
878 fn artifact_ids() -> HashSet<&'static str> {
879 HashSet::from(["va_known"])
880 }
881
882 fn event_ids() -> HashSet<&'static str> {
883 HashSet::from(["vev_known"])
884 }
885
886 fn valid_question(id: &str) -> DecisionQuestion {
887 DecisionQuestion {
888 id: id.to_string(),
889 title: "Question".to_string(),
890 short_answer: "Bounded answer.".to_string(),
891 caveat: "Scoped caveat.".to_string(),
892 confidence: "medium".to_string(),
893 supporting_findings: vec!["vf_known".to_string()],
894 tension_findings: vec!["vf_tension".to_string()],
895 gap_findings: vec!["vf_gap".to_string()],
896 artifact_ids: vec!["va_known".to_string()],
897 what_would_change_this_answer: "A prospective readout.".to_string(),
898 correction_paths: vec![DecisionCorrectionPath {
899 finding_id: "vf_known".to_string(),
900 summary: "Reviewed and caveated for proof use.".to_string(),
901 event_ids: vec!["vev_known".to_string()],
902 artifact_ids: vec!["va_known".to_string()],
903 status: "reviewed".to_string(),
904 }],
905 tags: vec![],
906 }
907 }
908
909 fn valid_brief() -> DecisionBrief {
910 DecisionBrief {
911 schema: DECISION_BRIEF_SCHEMA.to_string(),
912 frontier_id: Some("vfr_test".to_string()),
913 updated_at: "2026-05-06T00:00:00Z".to_string(),
914 source_frontier: "Test frontier".to_string(),
915 questions: KNOWN_QUESTION_IDS
916 .iter()
917 .map(|id| valid_question(id))
918 .collect(),
919 }
920 }
921
922 #[test]
923 fn decision_brief_validates_all_references() {
924 let issues = validate_decision_brief_against_sets(
925 &valid_brief(),
926 &finding_ids(),
927 &artifact_ids(),
928 &event_ids(),
929 );
930 assert!(issues.is_empty(), "{issues:?}");
931 }
932
933 #[test]
934 fn decision_brief_reports_unknown_question_and_missing_refs() {
935 let mut brief = valid_brief();
936 brief.questions[0].id = "treatment-advice".to_string();
937 brief.questions[0].supporting_findings = vec!["vf_missing".to_string()];
938 brief.questions[0].artifact_ids = vec!["va_missing".to_string()];
939 brief.questions[0].correction_paths[0].event_ids = vec!["vev_missing".to_string()];
940
941 let issues = validate_decision_brief_against_sets(
942 &brief,
943 &finding_ids(),
944 &artifact_ids(),
945 &event_ids(),
946 );
947 let messages = issues
948 .iter()
949 .map(|issue| issue.message.as_str())
950 .collect::<Vec<_>>()
951 .join("\n");
952 assert!(messages.contains("unknown decision question id"));
953 assert!(messages.contains("finding 'vf_missing' does not resolve"));
954 assert!(messages.contains("artifact 'va_missing' does not resolve"));
955 assert!(messages.contains("event 'vev_missing' does not resolve"));
956 }
957
958 #[test]
959 fn trial_summary_requires_source_locator_and_refs() {
960 let outcomes = TrialOutcomes {
961 schema: TRIAL_OUTCOMES_SCHEMA.to_string(),
962 frontier_id: Some("vfr_test".to_string()),
963 updated_at: "2026-05-06T00:00:00Z".to_string(),
964 rows: vec![TrialOutcomeRow {
965 id: "clarity-ad".to_string(),
966 program: "CLARITY AD".to_string(),
967 drug: "lecanemab".to_string(),
968 mechanism: "anti-protofibril amyloid beta antibody".to_string(),
969 phase: "Phase 3".to_string(),
970 nct_ids: vec!["NCT03887455".to_string()],
971 population: "Early symptomatic AD".to_string(),
972 disease_stage: "MCI or mild dementia".to_string(),
973 amyloid_confirmation: "Required".to_string(),
974 duration: "18 months".to_string(),
975 primary_endpoint: "CDR-SB".to_string(),
976 cognitive_result: "Positive, modest absolute effect.".to_string(),
977 biomarker_result: "Amyloid reduced.".to_string(),
978 aria_or_safety_result: "ARIA risk requires monitoring.".to_string(),
979 regulatory_status: "FDA traditional approval.".to_string(),
980 source_locators: vec!["ftp://not-accepted".to_string()],
981 finding_ids: vec!["vf_missing".to_string()],
982 artifact_ids: vec!["va_missing".to_string()],
983 tags: vec![],
984 }],
985 };
986
987 let issues =
988 validate_trial_outcomes_against_sets(&outcomes, &finding_ids(), &artifact_ids());
989 let messages = issues
990 .iter()
991 .map(|issue| issue.message.as_str())
992 .collect::<Vec<_>>()
993 .join("\n");
994 assert!(messages.contains("source locator 'ftp://not-accepted' is not usable"));
995 assert!(messages.contains("finding 'vf_missing' does not resolve"));
996 assert!(messages.contains("artifact 'va_missing' does not resolve"));
997 }
998
999 #[test]
1000 fn source_verification_requires_current_source_records() {
1001 let verification = SourceVerification {
1002 schema: SOURCE_VERIFICATION_SCHEMA.to_string(),
1003 frontier_id: Some("vfr_test".to_string()),
1004 verified_at: "2026-05-06T00:00:00Z".to_string(),
1005 notes: vec![],
1006 sources: vec![VerifiedSource {
1007 id: "fda-label".to_string(),
1008 title: "FDA label".to_string(),
1009 url: "https://www.accessdata.fda.gov/example.pdf".to_string(),
1010 agency: "FDA".to_string(),
1011 current_status: "Current label checked for the demo frontier.".to_string(),
1012 }],
1013 };
1014
1015 let issues = validate_source_verification_shape(&verification);
1016 assert!(issues.is_empty(), "{issues:?}");
1017 }
1018
1019 #[test]
1020 fn source_verification_reports_unusable_urls_and_missing_status() {
1021 let verification = SourceVerification {
1022 schema: SOURCE_VERIFICATION_SCHEMA.to_string(),
1023 frontier_id: Some("vfr_test".to_string()),
1024 verified_at: "".to_string(),
1025 notes: vec![],
1026 sources: vec![VerifiedSource {
1027 id: "cms".to_string(),
1028 title: "CMS record".to_string(),
1029 url: "ftp://not-supported".to_string(),
1030 agency: "CMS".to_string(),
1031 current_status: "".to_string(),
1032 }],
1033 };
1034
1035 let issues = validate_source_verification_shape(&verification);
1036 let messages = issues
1037 .iter()
1038 .map(|issue| issue.message.as_str())
1039 .collect::<Vec<_>>()
1040 .join("\n");
1041 assert!(messages.contains("field must be non-empty"));
1042 assert!(messages.contains("source url 'ftp://not-supported' is not usable"));
1043 }
1044
1045 #[test]
1046 fn source_ingest_plan_requires_artifacts_and_target_findings() {
1047 let plan = SourceIngestPlan {
1048 schema: SOURCE_INGEST_PLAN_SCHEMA.to_string(),
1049 frontier_id: Some("vfr_test".to_string()),
1050 name: "Focused source plan".to_string(),
1051 verified_at: "2026-05-06T00:00:00Z".to_string(),
1052 policy: serde_json::json!({}),
1053 entries: vec![
1054 SourceIngestEntry {
1055 id: "ct-primary".to_string(),
1056 name: "Primary trial registry".to_string(),
1057 category: "clinical_trial_registry".to_string(),
1058 priority: "P0".to_string(),
1059 representation: "clinical_trial_record".to_string(),
1060 source_type: "registry_record".to_string(),
1061 locator: "https://clinicaltrials.gov/study/NCT03887455".to_string(),
1062 ingest_status: "ingested".to_string(),
1063 current_frontier_artifact_id: Some("va_known".to_string()),
1064 access_terms: "Public registry metadata".to_string(),
1065 license_note: "Registry terms apply".to_string(),
1066 target_findings: vec!["vf_known".to_string()],
1067 target_use: "Anchor the trial result".to_string(),
1068 },
1069 SourceIngestEntry {
1070 id: "reg-label".to_string(),
1071 name: "Regulatory label".to_string(),
1072 category: "regulatory".to_string(),
1073 priority: "P1".to_string(),
1074 representation: "registry_record".to_string(),
1075 source_type: "regulatory_record".to_string(),
1076 locator: "https://www.fda.gov/example".to_string(),
1077 ingest_status: "candidate".to_string(),
1078 current_frontier_artifact_id: None,
1079 access_terms: "Public locator".to_string(),
1080 license_note: "Regulatory terms apply".to_string(),
1081 target_findings: vec!["vf_known".to_string()],
1082 target_use: "Track current label status".to_string(),
1083 },
1084 SourceIngestEntry {
1085 id: "dataset-access".to_string(),
1086 name: "Dataset access record".to_string(),
1087 category: "dataset_or_registry".to_string(),
1088 priority: "P1".to_string(),
1089 representation: "dataset".to_string(),
1090 source_type: "dataset_access_record".to_string(),
1091 locator: "https://adni.loni.usc.edu/data-samples/access-data/".to_string(),
1092 ingest_status: "candidate".to_string(),
1093 current_frontier_artifact_id: None,
1094 access_terms: "Registration required".to_string(),
1095 license_note: "Do not mirror participant data".to_string(),
1096 target_findings: vec!["vf_known".to_string()],
1097 target_use: "Represent longitudinal biomarker access".to_string(),
1098 },
1099 SourceIngestEntry {
1100 id: "code-gate".to_string(),
1101 name: "Release gate".to_string(),
1102 category: "code_or_tool".to_string(),
1103 priority: "P1".to_string(),
1104 representation: "code".to_string(),
1105 source_type: "repository_code".to_string(),
1106 locator: "https://github.com/vela-science/vela".to_string(),
1107 ingest_status: "candidate".to_string(),
1108 current_frontier_artifact_id: None,
1109 access_terms: "Repository code".to_string(),
1110 license_note: "Repository terms apply".to_string(),
1111 target_findings: vec!["vf_known".to_string()],
1112 target_use: "Make validation executable".to_string(),
1113 },
1114 SourceIngestEntry {
1115 id: "decision-table".to_string(),
1116 name: "Decision table".to_string(),
1117 category: "literature_or_table".to_string(),
1118 priority: "P1".to_string(),
1119 representation: "table".to_string(),
1120 source_type: "frontier_projection".to_string(),
1121 locator: "https://vela-site.fly.dev/workbench".to_string(),
1122 ingest_status: "candidate".to_string(),
1123 current_frontier_artifact_id: None,
1124 access_terms: "Public metadata".to_string(),
1125 license_note: "Source terms apply".to_string(),
1126 target_findings: vec!["vf_known".to_string()],
1127 target_use: "Serve decision projection".to_string(),
1128 },
1129 ],
1130 };
1131
1132 let issues =
1133 validate_source_ingest_plan_against_sets(&plan, &finding_ids(), &artifact_ids());
1134 assert!(issues.is_empty(), "{issues:?}");
1135 }
1136
1137 #[test]
1138 fn source_ingest_plan_reports_unresolved_ingested_entries() {
1139 let plan = SourceIngestPlan {
1140 schema: SOURCE_INGEST_PLAN_SCHEMA.to_string(),
1141 frontier_id: Some("vfr_test".to_string()),
1142 name: "Focused source plan".to_string(),
1143 verified_at: "2026-05-06T00:00:00Z".to_string(),
1144 policy: serde_json::json!({}),
1145 entries: vec![SourceIngestEntry {
1146 id: "ct-primary".to_string(),
1147 name: "Primary trial registry".to_string(),
1148 category: "clinical_trial_registry".to_string(),
1149 priority: "urgent".to_string(),
1150 representation: "clinical_trial_record".to_string(),
1151 source_type: "registry_record".to_string(),
1152 locator: "ftp://not-usable".to_string(),
1153 ingest_status: "ingested".to_string(),
1154 current_frontier_artifact_id: Some("va_missing".to_string()),
1155 access_terms: "Public registry metadata".to_string(),
1156 license_note: "Registry terms apply".to_string(),
1157 target_findings: vec!["vf_missing".to_string()],
1158 target_use: "Anchor the trial result".to_string(),
1159 }],
1160 };
1161
1162 let issues =
1163 validate_source_ingest_plan_against_sets(&plan, &finding_ids(), &artifact_ids());
1164 let messages = issues
1165 .iter()
1166 .map(|issue| issue.message.as_str())
1167 .collect::<Vec<_>>()
1168 .join("\n");
1169 assert!(messages.contains("source locator 'ftp://not-usable' is not usable"));
1170 assert!(messages.contains("priority must be P0, P1, or P2"));
1171 assert!(messages.contains("artifact 'va_missing' does not resolve"));
1172 assert!(messages.contains("finding 'vf_missing' does not resolve"));
1173 }
1174}