Skip to main content

vela_protocol/
decision.rs

1//! Validated frontier-owned decision projections.
2
3use std::collections::HashSet;
4use std::fs;
5use std::path::{Path, PathBuf};
6
7use serde::{Deserialize, Serialize};
8
9use crate::project::Project;
10
11pub const DECISION_BRIEF_SCHEMA: &str = "vela.decision-brief.v1";
12pub const TRIAL_OUTCOMES_SCHEMA: &str = "vela.trial-outcomes.v1";
13pub const SOURCE_VERIFICATION_SCHEMA: &str = "vela.source-verification.v1";
14pub const SOURCE_INGEST_PLAN_SCHEMA: &str = "vela.source-ingest-plan.v1";
15
16const DECISION_BRIEF_FILE: &str = "decision-brief.v1.json";
17const TRIAL_OUTCOMES_FILE: &str = "trial-outcomes.v1.json";
18const SOURCE_VERIFICATION_FILE: &str = "source-verification.v1.json";
19const SOURCE_INGEST_PLAN_FILE: &str = "source-ingest-plan.v1.json";
20
21const KNOWN_QUESTION_IDS: &[&str] = &[
22    "clinical-benefit",
23    "biomarkers-vs-cognition",
24    "bace-failures",
25    "aria-apoe4-risk",
26    "delivery-constraints",
27    "next-discriminating-evidence",
28];
29
30#[derive(Debug, Clone, Serialize, Deserialize)]
31pub struct DecisionBrief {
32    pub schema: String,
33    pub frontier_id: Option<String>,
34    pub updated_at: String,
35    pub source_frontier: String,
36    pub questions: Vec<DecisionQuestion>,
37}
38
39#[derive(Debug, Clone, Serialize, Deserialize)]
40pub struct DecisionQuestion {
41    pub id: String,
42    pub title: String,
43    pub short_answer: String,
44    pub caveat: String,
45    pub confidence: String,
46    pub supporting_findings: Vec<String>,
47    #[serde(default)]
48    pub tension_findings: Vec<String>,
49    #[serde(default)]
50    pub gap_findings: Vec<String>,
51    #[serde(default)]
52    pub artifact_ids: Vec<String>,
53    pub what_would_change_this_answer: String,
54    #[serde(default)]
55    pub correction_paths: Vec<DecisionCorrectionPath>,
56    #[serde(default)]
57    pub tags: Vec<String>,
58}
59
60#[derive(Debug, Clone, Serialize, Deserialize)]
61pub struct DecisionCorrectionPath {
62    pub finding_id: String,
63    pub summary: String,
64    #[serde(default)]
65    pub event_ids: Vec<String>,
66    #[serde(default)]
67    pub artifact_ids: Vec<String>,
68    pub status: String,
69}
70
71#[derive(Debug, Clone, Serialize, Deserialize)]
72pub struct TrialOutcomes {
73    pub schema: String,
74    pub frontier_id: Option<String>,
75    pub updated_at: String,
76    pub rows: Vec<TrialOutcomeRow>,
77}
78
79#[derive(Debug, Clone, Serialize, Deserialize)]
80pub struct TrialOutcomeRow {
81    pub id: String,
82    pub program: String,
83    pub drug: String,
84    pub mechanism: String,
85    pub phase: String,
86    #[serde(default)]
87    pub nct_ids: Vec<String>,
88    pub population: String,
89    pub disease_stage: String,
90    pub amyloid_confirmation: String,
91    pub duration: String,
92    pub primary_endpoint: String,
93    pub cognitive_result: String,
94    pub biomarker_result: String,
95    pub aria_or_safety_result: String,
96    pub regulatory_status: String,
97    #[serde(default)]
98    pub source_locators: Vec<String>,
99    #[serde(default)]
100    pub finding_ids: Vec<String>,
101    #[serde(default)]
102    pub artifact_ids: Vec<String>,
103    #[serde(default)]
104    pub tags: Vec<String>,
105}
106
107#[derive(Debug, Clone, Serialize, Deserialize)]
108pub struct SourceVerification {
109    pub schema: String,
110    pub frontier_id: Option<String>,
111    pub verified_at: String,
112    #[serde(default)]
113    pub notes: Vec<String>,
114    pub sources: Vec<VerifiedSource>,
115}
116
117#[derive(Debug, Clone, Serialize, Deserialize)]
118pub struct SourceIngestPlan {
119    pub schema: String,
120    pub frontier_id: Option<String>,
121    pub name: String,
122    pub verified_at: String,
123    #[serde(default)]
124    pub policy: serde_json::Value,
125    pub entries: Vec<SourceIngestEntry>,
126}
127
128#[derive(Debug, Clone, Serialize, Deserialize)]
129pub struct SourceIngestEntry {
130    pub id: String,
131    pub name: String,
132    pub category: String,
133    pub priority: String,
134    pub representation: String,
135    pub source_type: String,
136    pub locator: String,
137    pub ingest_status: String,
138    pub current_frontier_artifact_id: Option<String>,
139    pub access_terms: String,
140    pub license_note: String,
141    #[serde(default)]
142    pub target_findings: Vec<String>,
143    pub target_use: String,
144}
145
146#[derive(Debug, Clone, Serialize, Deserialize)]
147pub struct VerifiedSource {
148    pub id: String,
149    pub title: String,
150    pub url: String,
151    pub agency: String,
152    pub current_status: String,
153}
154
155#[derive(Debug, Clone, Serialize, Deserialize)]
156pub struct ProjectionIssue {
157    pub path: String,
158    pub message: String,
159}
160
161#[derive(Debug, Clone, Serialize)]
162pub struct ProjectionLoad<T>
163where
164    T: Serialize,
165{
166    pub ok: bool,
167    pub available: bool,
168    #[serde(skip_serializing_if = "Option::is_none")]
169    pub projection: Option<T>,
170    pub issues: Vec<ProjectionIssue>,
171    #[serde(skip_serializing_if = "Option::is_none")]
172    pub error: Option<String>,
173}
174
175pub fn decision_projection_dir(source: &Path) -> Option<PathBuf> {
176    if source.is_dir() {
177        let dir = source.join("decision");
178        return dir.exists().then_some(dir);
179    }
180    source
181        .parent()
182        .map(|parent| parent.join("decision"))
183        .filter(|dir| dir.exists())
184}
185
186pub fn load_decision_brief(source: &Path, project: &Project) -> ProjectionLoad<DecisionBrief> {
187    load_projection(
188        source,
189        project,
190        DECISION_BRIEF_FILE,
191        validate_decision_brief,
192    )
193}
194
195pub fn load_trial_outcomes(source: &Path, project: &Project) -> ProjectionLoad<TrialOutcomes> {
196    load_projection(
197        source,
198        project,
199        TRIAL_OUTCOMES_FILE,
200        validate_trial_outcomes,
201    )
202}
203
204pub fn load_source_verification(
205    source: &Path,
206    project: &Project,
207) -> ProjectionLoad<SourceVerification> {
208    load_projection(
209        source,
210        project,
211        SOURCE_VERIFICATION_FILE,
212        validate_source_verification,
213    )
214}
215
216pub fn load_source_ingest_plan(
217    source: &Path,
218    project: &Project,
219) -> ProjectionLoad<SourceIngestPlan> {
220    let Some(dir) = source_ingest_projection_dir(source) else {
221        return ProjectionLoad {
222            ok: false,
223            available: false,
224            projection: None,
225            issues: vec![],
226            error: Some("source ingest plan directory not found".to_string()),
227        };
228    };
229    load_projection_from_dir(
230        &dir,
231        project,
232        SOURCE_INGEST_PLAN_FILE,
233        validate_source_ingest_plan,
234    )
235}
236
237fn load_projection<T, F>(
238    source: &Path,
239    project: &Project,
240    file_name: &str,
241    validate: F,
242) -> ProjectionLoad<T>
243where
244    T: for<'de> Deserialize<'de> + Serialize,
245    F: Fn(&T, &Project) -> Vec<ProjectionIssue>,
246{
247    let Some(dir) = decision_projection_dir(source) else {
248        return ProjectionLoad {
249            ok: false,
250            available: false,
251            projection: None,
252            issues: vec![],
253            error: Some("decision projection directory not found".to_string()),
254        };
255    };
256    load_projection_from_dir(&dir, project, file_name, validate)
257}
258
259fn load_projection_from_dir<T, F>(
260    dir: &Path,
261    project: &Project,
262    file_name: &str,
263    validate: F,
264) -> ProjectionLoad<T>
265where
266    T: for<'de> Deserialize<'de> + Serialize,
267    F: Fn(&T, &Project) -> Vec<ProjectionIssue>,
268{
269    let path = dir.join(file_name);
270    let Ok(bytes) = fs::read_to_string(&path) else {
271        return ProjectionLoad {
272            ok: false,
273            available: false,
274            projection: None,
275            issues: vec![],
276            error: Some(format!("projection file not found: {}", path.display())),
277        };
278    };
279    let projection = match serde_json::from_str::<T>(&bytes) {
280        Ok(projection) => projection,
281        Err(error) => {
282            return ProjectionLoad {
283                ok: false,
284                available: true,
285                projection: None,
286                issues: vec![],
287                error: Some(format!("parse {}: {error}", path.display())),
288            };
289        }
290    };
291    let issues = validate(&projection, project);
292    ProjectionLoad {
293        ok: issues.is_empty(),
294        available: true,
295        projection: Some(projection),
296        issues,
297        error: None,
298    }
299}
300
301pub fn source_ingest_projection_dir(source: &Path) -> Option<PathBuf> {
302    if source.is_dir() {
303        let dir = source.join("ingest");
304        return dir.exists().then_some(dir);
305    }
306    source
307        .parent()
308        .map(|parent| parent.join("ingest"))
309        .filter(|dir| dir.exists())
310}
311
312pub fn validate_decision_brief(brief: &DecisionBrief, project: &Project) -> Vec<ProjectionIssue> {
313    let finding_ids = project
314        .findings
315        .iter()
316        .map(|finding| finding.id.as_str())
317        .collect::<HashSet<_>>();
318    let artifact_ids = project
319        .artifacts
320        .iter()
321        .map(|artifact| artifact.id.as_str())
322        .collect::<HashSet<_>>();
323    let event_ids = project
324        .events
325        .iter()
326        .map(|event| event.id.as_str())
327        .collect::<HashSet<_>>();
328    let mut issues = validate_projection_frontier_id(brief.frontier_id.as_deref(), project);
329    issues.extend(validate_decision_brief_against_sets(
330        brief,
331        &finding_ids,
332        &artifact_ids,
333        &event_ids,
334    ));
335    issues
336}
337
338pub fn validate_trial_outcomes(
339    outcomes: &TrialOutcomes,
340    project: &Project,
341) -> Vec<ProjectionIssue> {
342    let finding_ids = project
343        .findings
344        .iter()
345        .map(|finding| finding.id.as_str())
346        .collect::<HashSet<_>>();
347    let artifact_ids = project
348        .artifacts
349        .iter()
350        .map(|artifact| artifact.id.as_str())
351        .collect::<HashSet<_>>();
352    let mut issues = validate_projection_frontier_id(outcomes.frontier_id.as_deref(), project);
353    issues.extend(validate_trial_outcomes_against_sets(
354        outcomes,
355        &finding_ids,
356        &artifact_ids,
357    ));
358    issues
359}
360
361pub fn validate_source_verification(
362    verification: &SourceVerification,
363    project: &Project,
364) -> Vec<ProjectionIssue> {
365    let mut issues = validate_projection_frontier_id(verification.frontier_id.as_deref(), project);
366    issues.extend(validate_source_verification_shape(verification));
367    issues
368}
369
370pub fn validate_source_ingest_plan(
371    plan: &SourceIngestPlan,
372    project: &Project,
373) -> Vec<ProjectionIssue> {
374    let finding_ids = project
375        .findings
376        .iter()
377        .map(|finding| finding.id.as_str())
378        .collect::<HashSet<_>>();
379    let artifact_ids = project
380        .artifacts
381        .iter()
382        .map(|artifact| artifact.id.as_str())
383        .collect::<HashSet<_>>();
384    let mut issues = validate_projection_frontier_id(plan.frontier_id.as_deref(), project);
385    issues.extend(validate_source_ingest_plan_against_sets(
386        plan,
387        &finding_ids,
388        &artifact_ids,
389    ));
390    issues
391}
392
393fn validate_projection_frontier_id(
394    projected_frontier_id: Option<&str>,
395    project: &Project,
396) -> Vec<ProjectionIssue> {
397    let mut issues = Vec::new();
398    let actual = project.frontier_id();
399    match projected_frontier_id {
400        Some(id) if id == actual => {}
401        Some(id) => push_issue(
402            &mut issues,
403            "frontier_id",
404            format!("projection frontier_id '{id}' does not match frontier '{actual}'"),
405        ),
406        None => push_issue(
407            &mut issues,
408            "frontier_id",
409            "projection must pin the frontier_id it was reviewed against",
410        ),
411    }
412    issues
413}
414
415fn validate_source_ingest_plan_against_sets(
416    plan: &SourceIngestPlan,
417    finding_ids: &HashSet<&str>,
418    artifact_ids: &HashSet<&str>,
419) -> Vec<ProjectionIssue> {
420    let mut issues = Vec::new();
421    if plan.schema != SOURCE_INGEST_PLAN_SCHEMA {
422        push_issue(
423            &mut issues,
424            "schema",
425            format!("expected {SOURCE_INGEST_PLAN_SCHEMA}"),
426        );
427    }
428    require_non_empty(&mut issues, "name", &plan.name);
429    require_non_empty(&mut issues, "verified_at", &plan.verified_at);
430    if plan.entries.is_empty() {
431        push_issue(
432            &mut issues,
433            "entries",
434            "at least one source entry is required",
435        );
436    }
437    let mut seen = HashSet::new();
438    let mut categories = HashSet::new();
439    let mut priorities = HashSet::new();
440    let mut ingested = 0usize;
441    for (idx, entry) in plan.entries.iter().enumerate() {
442        let path = format!("entries[{idx}]");
443        require_non_empty(&mut issues, &format!("{path}.id"), &entry.id);
444        if !entry.id.trim().is_empty() && !seen.insert(entry.id.as_str()) {
445            push_issue(
446                &mut issues,
447                format!("{path}.id"),
448                format!("duplicate source entry id '{}'", entry.id),
449            );
450        }
451        require_non_empty(&mut issues, &format!("{path}.name"), &entry.name);
452        require_non_empty(&mut issues, &format!("{path}.category"), &entry.category);
453        require_non_empty(&mut issues, &format!("{path}.priority"), &entry.priority);
454        require_non_empty(
455            &mut issues,
456            &format!("{path}.representation"),
457            &entry.representation,
458        );
459        require_non_empty(
460            &mut issues,
461            &format!("{path}.source_type"),
462            &entry.source_type,
463        );
464        require_non_empty(
465            &mut issues,
466            &format!("{path}.ingest_status"),
467            &entry.ingest_status,
468        );
469        require_non_empty(
470            &mut issues,
471            &format!("{path}.access_terms"),
472            &entry.access_terms,
473        );
474        require_non_empty(
475            &mut issues,
476            &format!("{path}.license_note"),
477            &entry.license_note,
478        );
479        require_non_empty(
480            &mut issues,
481            &format!("{path}.target_use"),
482            &entry.target_use,
483        );
484        if !usable_source_locator(&entry.locator) {
485            push_issue(
486                &mut issues,
487                format!("{path}.locator"),
488                format!("source locator '{}' is not usable", entry.locator),
489            );
490        }
491        categories.insert(entry.category.as_str());
492        priorities.insert(entry.priority.as_str());
493        if !matches!(entry.priority.as_str(), "P0" | "P1" | "P2") {
494            push_issue(
495                &mut issues,
496                format!("{path}.priority"),
497                "priority must be P0, P1, or P2",
498            );
499        }
500        if !matches!(
501            entry.ingest_status.as_str(),
502            "ingested" | "pointer_only" | "candidate" | "excluded"
503        ) {
504            push_issue(
505                &mut issues,
506                format!("{path}.ingest_status"),
507                "unknown ingest status",
508            );
509        }
510        match entry.ingest_status.as_str() {
511            "ingested" => {
512                ingested += 1;
513                let Some(id) = entry.current_frontier_artifact_id.as_deref() else {
514                    push_issue(
515                        &mut issues,
516                        format!("{path}.current_frontier_artifact_id"),
517                        "ingested entries must name a frontier artifact",
518                    );
519                    continue;
520                };
521                if !artifact_ids.contains(id) {
522                    push_issue(
523                        &mut issues,
524                        format!("{path}.current_frontier_artifact_id"),
525                        format!("artifact '{id}' does not resolve in frontier"),
526                    );
527                }
528            }
529            _ => {
530                if entry.current_frontier_artifact_id.is_some() {
531                    push_issue(
532                        &mut issues,
533                        format!("{path}.current_frontier_artifact_id"),
534                        "only ingested entries may name a frontier artifact",
535                    );
536                }
537            }
538        }
539        if entry.target_findings.is_empty() {
540            push_issue(
541                &mut issues,
542                format!("{path}.target_findings"),
543                "at least one target finding is required",
544            );
545        }
546        for id in &entry.target_findings {
547            if !finding_ids.contains(id.as_str()) {
548                push_issue(
549                    &mut issues,
550                    format!("{path}.target_findings"),
551                    format!("finding '{id}' does not resolve in frontier"),
552                );
553            }
554        }
555    }
556    for required in [
557        "clinical_trial_registry",
558        "regulatory",
559        "dataset_or_registry",
560        "code_or_tool",
561        "literature_or_table",
562    ] {
563        if !categories.contains(required) {
564            push_issue(
565                &mut issues,
566                "entries.category",
567                format!("missing source category '{required}'"),
568            );
569        }
570    }
571    for required in ["P0", "P1"] {
572        if !priorities.contains(required) {
573            push_issue(
574                &mut issues,
575                "entries.priority",
576                format!("missing source priority '{required}'"),
577            );
578        }
579    }
580    if ingested == 0 {
581        push_issue(
582            &mut issues,
583            "entries.ingest_status",
584            "at least one source entry must be ingested",
585        );
586    }
587    issues
588}
589
590fn validate_decision_brief_against_sets(
591    brief: &DecisionBrief,
592    finding_ids: &HashSet<&str>,
593    artifact_ids: &HashSet<&str>,
594    event_ids: &HashSet<&str>,
595) -> Vec<ProjectionIssue> {
596    let mut issues = Vec::new();
597    if brief.schema != DECISION_BRIEF_SCHEMA {
598        push_issue(
599            &mut issues,
600            "schema",
601            format!("expected {DECISION_BRIEF_SCHEMA}"),
602        );
603    }
604    if brief.questions.len() != KNOWN_QUESTION_IDS.len() {
605        push_issue(
606            &mut issues,
607            "questions",
608            format!("expected {} decision questions", KNOWN_QUESTION_IDS.len()),
609        );
610    }
611    let mut seen = HashSet::new();
612    for (idx, question) in brief.questions.iter().enumerate() {
613        let path = format!("questions[{idx}]");
614        if !KNOWN_QUESTION_IDS.contains(&question.id.as_str()) {
615            push_issue(
616                &mut issues,
617                format!("{path}.id"),
618                format!("unknown decision question id '{}'", question.id),
619            );
620        }
621        if !seen.insert(question.id.as_str()) {
622            push_issue(
623                &mut issues,
624                format!("{path}.id"),
625                format!("duplicate decision question id '{}'", question.id),
626            );
627        }
628        require_non_empty(&mut issues, &format!("{path}.title"), &question.title);
629        require_non_empty(
630            &mut issues,
631            &format!("{path}.short_answer"),
632            &question.short_answer,
633        );
634        require_non_empty(&mut issues, &format!("{path}.caveat"), &question.caveat);
635        require_non_empty(
636            &mut issues,
637            &format!("{path}.what_would_change_this_answer"),
638            &question.what_would_change_this_answer,
639        );
640        if question.supporting_findings.is_empty() {
641            push_issue(
642                &mut issues,
643                format!("{path}.supporting_findings"),
644                "at least one supporting finding is required",
645            );
646        }
647        for id in question
648            .supporting_findings
649            .iter()
650            .chain(question.tension_findings.iter())
651            .chain(question.gap_findings.iter())
652        {
653            if !finding_ids.contains(id.as_str()) {
654                push_issue(
655                    &mut issues,
656                    format!("{path}.finding_refs"),
657                    format!("finding '{id}' does not resolve in frontier"),
658                );
659            }
660        }
661        for id in &question.artifact_ids {
662            if !artifact_ids.contains(id.as_str()) {
663                push_issue(
664                    &mut issues,
665                    format!("{path}.artifact_ids"),
666                    format!("artifact '{id}' does not resolve in frontier"),
667                );
668            }
669        }
670        for (path_idx, correction_path) in question.correction_paths.iter().enumerate() {
671            let correction_path_path = format!("{path}.correction_paths[{path_idx}]");
672            require_non_empty(
673                &mut issues,
674                &format!("{correction_path_path}.summary"),
675                &correction_path.summary,
676            );
677            require_non_empty(
678                &mut issues,
679                &format!("{correction_path_path}.status"),
680                &correction_path.status,
681            );
682            if !finding_ids.contains(correction_path.finding_id.as_str()) {
683                push_issue(
684                    &mut issues,
685                    format!("{correction_path_path}.finding_id"),
686                    format!(
687                        "finding '{}' does not resolve in frontier",
688                        correction_path.finding_id
689                    ),
690                );
691            }
692            for id in &correction_path.event_ids {
693                if !event_ids.contains(id.as_str()) {
694                    push_issue(
695                        &mut issues,
696                        format!("{correction_path_path}.event_ids"),
697                        format!("event '{id}' does not resolve in frontier"),
698                    );
699                }
700            }
701            for id in &correction_path.artifact_ids {
702                if !artifact_ids.contains(id.as_str()) {
703                    push_issue(
704                        &mut issues,
705                        format!("{correction_path_path}.artifact_ids"),
706                        format!("artifact '{id}' does not resolve in frontier"),
707                    );
708                }
709            }
710        }
711    }
712    issues
713}
714
715fn validate_trial_outcomes_against_sets(
716    outcomes: &TrialOutcomes,
717    finding_ids: &HashSet<&str>,
718    artifact_ids: &HashSet<&str>,
719) -> Vec<ProjectionIssue> {
720    let mut issues = Vec::new();
721    if outcomes.schema != TRIAL_OUTCOMES_SCHEMA {
722        push_issue(
723            &mut issues,
724            "schema",
725            format!("expected {TRIAL_OUTCOMES_SCHEMA}"),
726        );
727    }
728    if outcomes.rows.is_empty() {
729        push_issue(&mut issues, "rows", "at least one trial row is required");
730    }
731    let mut seen = HashSet::new();
732    for (idx, row) in outcomes.rows.iter().enumerate() {
733        let path = format!("rows[{idx}]");
734        require_non_empty(&mut issues, &format!("{path}.id"), &row.id);
735        if !row.id.trim().is_empty() && !seen.insert(row.id.as_str()) {
736            push_issue(
737                &mut issues,
738                format!("{path}.id"),
739                format!("duplicate trial row id '{}'", row.id),
740            );
741        }
742        require_non_empty(&mut issues, &format!("{path}.program"), &row.program);
743        require_non_empty(&mut issues, &format!("{path}.drug"), &row.drug);
744        require_non_empty(
745            &mut issues,
746            &format!("{path}.primary_endpoint"),
747            &row.primary_endpoint,
748        );
749        require_non_empty(
750            &mut issues,
751            &format!("{path}.regulatory_status"),
752            &row.regulatory_status,
753        );
754        if row.source_locators.is_empty() {
755            push_issue(
756                &mut issues,
757                format!("{path}.source_locators"),
758                "at least one source locator is required",
759            );
760        }
761        for locator in &row.source_locators {
762            if !usable_source_locator(locator) {
763                push_issue(
764                    &mut issues,
765                    format!("{path}.source_locators"),
766                    format!("source locator '{locator}' is not usable"),
767                );
768            }
769        }
770        if row.finding_ids.is_empty() {
771            push_issue(
772                &mut issues,
773                format!("{path}.finding_ids"),
774                "at least one finding reference is required",
775            );
776        }
777        for id in &row.finding_ids {
778            if !finding_ids.contains(id.as_str()) {
779                push_issue(
780                    &mut issues,
781                    format!("{path}.finding_ids"),
782                    format!("finding '{id}' does not resolve in frontier"),
783                );
784            }
785        }
786        for id in &row.artifact_ids {
787            if !artifact_ids.contains(id.as_str()) {
788                push_issue(
789                    &mut issues,
790                    format!("{path}.artifact_ids"),
791                    format!("artifact '{id}' does not resolve in frontier"),
792                );
793            }
794        }
795    }
796    issues
797}
798
799fn validate_source_verification_shape(verification: &SourceVerification) -> Vec<ProjectionIssue> {
800    let mut issues = Vec::new();
801    if verification.schema != SOURCE_VERIFICATION_SCHEMA {
802        push_issue(
803            &mut issues,
804            "schema",
805            format!("expected {SOURCE_VERIFICATION_SCHEMA}"),
806        );
807    }
808    require_non_empty(&mut issues, "verified_at", &verification.verified_at);
809    if verification.sources.is_empty() {
810        push_issue(
811            &mut issues,
812            "sources",
813            "at least one verified source is required",
814        );
815    }
816    let mut seen = HashSet::new();
817    for (idx, source) in verification.sources.iter().enumerate() {
818        let path = format!("sources[{idx}]");
819        require_non_empty(&mut issues, &format!("{path}.id"), &source.id);
820        if !source.id.trim().is_empty() && !seen.insert(source.id.as_str()) {
821            push_issue(
822                &mut issues,
823                format!("{path}.id"),
824                format!("duplicate source verification id '{}'", source.id),
825            );
826        }
827        require_non_empty(&mut issues, &format!("{path}.title"), &source.title);
828        require_non_empty(&mut issues, &format!("{path}.agency"), &source.agency);
829        require_non_empty(
830            &mut issues,
831            &format!("{path}.current_status"),
832            &source.current_status,
833        );
834        if !usable_source_locator(&source.url) {
835            push_issue(
836                &mut issues,
837                format!("{path}.url"),
838                format!("source url '{}' is not usable", source.url),
839            );
840        }
841    }
842    issues
843}
844
845fn usable_source_locator(locator: &str) -> bool {
846    let trimmed = locator.trim();
847    trimmed.starts_with("https://")
848        || trimmed.starts_with("doi:")
849        || trimmed.starts_with("pmid:")
850        || trimmed.starts_with("NCT")
851}
852
853fn require_non_empty(issues: &mut Vec<ProjectionIssue>, path: &str, value: &str) {
854    if value.trim().is_empty() {
855        push_issue(issues, path, "field must be non-empty");
856    }
857}
858
859fn push_issue(
860    issues: &mut Vec<ProjectionIssue>,
861    path: impl Into<String>,
862    message: impl Into<String>,
863) {
864    issues.push(ProjectionIssue {
865        path: path.into(),
866        message: message.into(),
867    });
868}
869
870#[cfg(test)]
871mod tests {
872    use super::*;
873
874    fn finding_ids() -> HashSet<&'static str> {
875        HashSet::from(["vf_known", "vf_tension", "vf_gap"])
876    }
877
878    fn artifact_ids() -> HashSet<&'static str> {
879        HashSet::from(["va_known"])
880    }
881
882    fn event_ids() -> HashSet<&'static str> {
883        HashSet::from(["vev_known"])
884    }
885
886    fn valid_question(id: &str) -> DecisionQuestion {
887        DecisionQuestion {
888            id: id.to_string(),
889            title: "Question".to_string(),
890            short_answer: "Bounded answer.".to_string(),
891            caveat: "Scoped caveat.".to_string(),
892            confidence: "medium".to_string(),
893            supporting_findings: vec!["vf_known".to_string()],
894            tension_findings: vec!["vf_tension".to_string()],
895            gap_findings: vec!["vf_gap".to_string()],
896            artifact_ids: vec!["va_known".to_string()],
897            what_would_change_this_answer: "A prospective readout.".to_string(),
898            correction_paths: vec![DecisionCorrectionPath {
899                finding_id: "vf_known".to_string(),
900                summary: "Reviewed and caveated for proof use.".to_string(),
901                event_ids: vec!["vev_known".to_string()],
902                artifact_ids: vec!["va_known".to_string()],
903                status: "reviewed".to_string(),
904            }],
905            tags: vec![],
906        }
907    }
908
909    fn valid_brief() -> DecisionBrief {
910        DecisionBrief {
911            schema: DECISION_BRIEF_SCHEMA.to_string(),
912            frontier_id: Some("vfr_test".to_string()),
913            updated_at: "2026-05-06T00:00:00Z".to_string(),
914            source_frontier: "Test frontier".to_string(),
915            questions: KNOWN_QUESTION_IDS
916                .iter()
917                .map(|id| valid_question(id))
918                .collect(),
919        }
920    }
921
922    #[test]
923    fn decision_brief_validates_all_references() {
924        let issues = validate_decision_brief_against_sets(
925            &valid_brief(),
926            &finding_ids(),
927            &artifact_ids(),
928            &event_ids(),
929        );
930        assert!(issues.is_empty(), "{issues:?}");
931    }
932
933    #[test]
934    fn decision_brief_reports_unknown_question_and_missing_refs() {
935        let mut brief = valid_brief();
936        brief.questions[0].id = "treatment-advice".to_string();
937        brief.questions[0].supporting_findings = vec!["vf_missing".to_string()];
938        brief.questions[0].artifact_ids = vec!["va_missing".to_string()];
939        brief.questions[0].correction_paths[0].event_ids = vec!["vev_missing".to_string()];
940
941        let issues = validate_decision_brief_against_sets(
942            &brief,
943            &finding_ids(),
944            &artifact_ids(),
945            &event_ids(),
946        );
947        let messages = issues
948            .iter()
949            .map(|issue| issue.message.as_str())
950            .collect::<Vec<_>>()
951            .join("\n");
952        assert!(messages.contains("unknown decision question id"));
953        assert!(messages.contains("finding 'vf_missing' does not resolve"));
954        assert!(messages.contains("artifact 'va_missing' does not resolve"));
955        assert!(messages.contains("event 'vev_missing' does not resolve"));
956    }
957
958    #[test]
959    fn trial_summary_requires_source_locator_and_refs() {
960        let outcomes = TrialOutcomes {
961            schema: TRIAL_OUTCOMES_SCHEMA.to_string(),
962            frontier_id: Some("vfr_test".to_string()),
963            updated_at: "2026-05-06T00:00:00Z".to_string(),
964            rows: vec![TrialOutcomeRow {
965                id: "clarity-ad".to_string(),
966                program: "CLARITY AD".to_string(),
967                drug: "lecanemab".to_string(),
968                mechanism: "anti-protofibril amyloid beta antibody".to_string(),
969                phase: "Phase 3".to_string(),
970                nct_ids: vec!["NCT03887455".to_string()],
971                population: "Early symptomatic AD".to_string(),
972                disease_stage: "MCI or mild dementia".to_string(),
973                amyloid_confirmation: "Required".to_string(),
974                duration: "18 months".to_string(),
975                primary_endpoint: "CDR-SB".to_string(),
976                cognitive_result: "Positive, modest absolute effect.".to_string(),
977                biomarker_result: "Amyloid reduced.".to_string(),
978                aria_or_safety_result: "ARIA risk requires monitoring.".to_string(),
979                regulatory_status: "FDA traditional approval.".to_string(),
980                source_locators: vec!["ftp://not-accepted".to_string()],
981                finding_ids: vec!["vf_missing".to_string()],
982                artifact_ids: vec!["va_missing".to_string()],
983                tags: vec![],
984            }],
985        };
986
987        let issues =
988            validate_trial_outcomes_against_sets(&outcomes, &finding_ids(), &artifact_ids());
989        let messages = issues
990            .iter()
991            .map(|issue| issue.message.as_str())
992            .collect::<Vec<_>>()
993            .join("\n");
994        assert!(messages.contains("source locator 'ftp://not-accepted' is not usable"));
995        assert!(messages.contains("finding 'vf_missing' does not resolve"));
996        assert!(messages.contains("artifact 'va_missing' does not resolve"));
997    }
998
999    #[test]
1000    fn source_verification_requires_current_source_records() {
1001        let verification = SourceVerification {
1002            schema: SOURCE_VERIFICATION_SCHEMA.to_string(),
1003            frontier_id: Some("vfr_test".to_string()),
1004            verified_at: "2026-05-06T00:00:00Z".to_string(),
1005            notes: vec![],
1006            sources: vec![VerifiedSource {
1007                id: "fda-label".to_string(),
1008                title: "FDA label".to_string(),
1009                url: "https://www.accessdata.fda.gov/example.pdf".to_string(),
1010                agency: "FDA".to_string(),
1011                current_status: "Current label checked for the demo frontier.".to_string(),
1012            }],
1013        };
1014
1015        let issues = validate_source_verification_shape(&verification);
1016        assert!(issues.is_empty(), "{issues:?}");
1017    }
1018
1019    #[test]
1020    fn source_verification_reports_unusable_urls_and_missing_status() {
1021        let verification = SourceVerification {
1022            schema: SOURCE_VERIFICATION_SCHEMA.to_string(),
1023            frontier_id: Some("vfr_test".to_string()),
1024            verified_at: "".to_string(),
1025            notes: vec![],
1026            sources: vec![VerifiedSource {
1027                id: "cms".to_string(),
1028                title: "CMS record".to_string(),
1029                url: "ftp://not-supported".to_string(),
1030                agency: "CMS".to_string(),
1031                current_status: "".to_string(),
1032            }],
1033        };
1034
1035        let issues = validate_source_verification_shape(&verification);
1036        let messages = issues
1037            .iter()
1038            .map(|issue| issue.message.as_str())
1039            .collect::<Vec<_>>()
1040            .join("\n");
1041        assert!(messages.contains("field must be non-empty"));
1042        assert!(messages.contains("source url 'ftp://not-supported' is not usable"));
1043    }
1044
1045    #[test]
1046    fn source_ingest_plan_requires_artifacts_and_target_findings() {
1047        let plan = SourceIngestPlan {
1048            schema: SOURCE_INGEST_PLAN_SCHEMA.to_string(),
1049            frontier_id: Some("vfr_test".to_string()),
1050            name: "Focused source plan".to_string(),
1051            verified_at: "2026-05-06T00:00:00Z".to_string(),
1052            policy: serde_json::json!({}),
1053            entries: vec![
1054                SourceIngestEntry {
1055                    id: "ct-primary".to_string(),
1056                    name: "Primary trial registry".to_string(),
1057                    category: "clinical_trial_registry".to_string(),
1058                    priority: "P0".to_string(),
1059                    representation: "clinical_trial_record".to_string(),
1060                    source_type: "registry_record".to_string(),
1061                    locator: "https://clinicaltrials.gov/study/NCT03887455".to_string(),
1062                    ingest_status: "ingested".to_string(),
1063                    current_frontier_artifact_id: Some("va_known".to_string()),
1064                    access_terms: "Public registry metadata".to_string(),
1065                    license_note: "Registry terms apply".to_string(),
1066                    target_findings: vec!["vf_known".to_string()],
1067                    target_use: "Anchor the trial result".to_string(),
1068                },
1069                SourceIngestEntry {
1070                    id: "reg-label".to_string(),
1071                    name: "Regulatory label".to_string(),
1072                    category: "regulatory".to_string(),
1073                    priority: "P1".to_string(),
1074                    representation: "registry_record".to_string(),
1075                    source_type: "regulatory_record".to_string(),
1076                    locator: "https://www.fda.gov/example".to_string(),
1077                    ingest_status: "candidate".to_string(),
1078                    current_frontier_artifact_id: None,
1079                    access_terms: "Public locator".to_string(),
1080                    license_note: "Regulatory terms apply".to_string(),
1081                    target_findings: vec!["vf_known".to_string()],
1082                    target_use: "Track current label status".to_string(),
1083                },
1084                SourceIngestEntry {
1085                    id: "dataset-access".to_string(),
1086                    name: "Dataset access record".to_string(),
1087                    category: "dataset_or_registry".to_string(),
1088                    priority: "P1".to_string(),
1089                    representation: "dataset".to_string(),
1090                    source_type: "dataset_access_record".to_string(),
1091                    locator: "https://adni.loni.usc.edu/data-samples/access-data/".to_string(),
1092                    ingest_status: "candidate".to_string(),
1093                    current_frontier_artifact_id: None,
1094                    access_terms: "Registration required".to_string(),
1095                    license_note: "Do not mirror participant data".to_string(),
1096                    target_findings: vec!["vf_known".to_string()],
1097                    target_use: "Represent longitudinal biomarker access".to_string(),
1098                },
1099                SourceIngestEntry {
1100                    id: "code-gate".to_string(),
1101                    name: "Release gate".to_string(),
1102                    category: "code_or_tool".to_string(),
1103                    priority: "P1".to_string(),
1104                    representation: "code".to_string(),
1105                    source_type: "repository_code".to_string(),
1106                    locator: "https://github.com/vela-science/vela".to_string(),
1107                    ingest_status: "candidate".to_string(),
1108                    current_frontier_artifact_id: None,
1109                    access_terms: "Repository code".to_string(),
1110                    license_note: "Repository terms apply".to_string(),
1111                    target_findings: vec!["vf_known".to_string()],
1112                    target_use: "Make validation executable".to_string(),
1113                },
1114                SourceIngestEntry {
1115                    id: "decision-table".to_string(),
1116                    name: "Decision table".to_string(),
1117                    category: "literature_or_table".to_string(),
1118                    priority: "P1".to_string(),
1119                    representation: "table".to_string(),
1120                    source_type: "frontier_projection".to_string(),
1121                    locator: "https://vela-site.fly.dev/workbench".to_string(),
1122                    ingest_status: "candidate".to_string(),
1123                    current_frontier_artifact_id: None,
1124                    access_terms: "Public metadata".to_string(),
1125                    license_note: "Source terms apply".to_string(),
1126                    target_findings: vec!["vf_known".to_string()],
1127                    target_use: "Serve decision projection".to_string(),
1128                },
1129            ],
1130        };
1131
1132        let issues =
1133            validate_source_ingest_plan_against_sets(&plan, &finding_ids(), &artifact_ids());
1134        assert!(issues.is_empty(), "{issues:?}");
1135    }
1136
1137    #[test]
1138    fn source_ingest_plan_reports_unresolved_ingested_entries() {
1139        let plan = SourceIngestPlan {
1140            schema: SOURCE_INGEST_PLAN_SCHEMA.to_string(),
1141            frontier_id: Some("vfr_test".to_string()),
1142            name: "Focused source plan".to_string(),
1143            verified_at: "2026-05-06T00:00:00Z".to_string(),
1144            policy: serde_json::json!({}),
1145            entries: vec![SourceIngestEntry {
1146                id: "ct-primary".to_string(),
1147                name: "Primary trial registry".to_string(),
1148                category: "clinical_trial_registry".to_string(),
1149                priority: "urgent".to_string(),
1150                representation: "clinical_trial_record".to_string(),
1151                source_type: "registry_record".to_string(),
1152                locator: "ftp://not-usable".to_string(),
1153                ingest_status: "ingested".to_string(),
1154                current_frontier_artifact_id: Some("va_missing".to_string()),
1155                access_terms: "Public registry metadata".to_string(),
1156                license_note: "Registry terms apply".to_string(),
1157                target_findings: vec!["vf_missing".to_string()],
1158                target_use: "Anchor the trial result".to_string(),
1159            }],
1160        };
1161
1162        let issues =
1163            validate_source_ingest_plan_against_sets(&plan, &finding_ids(), &artifact_ids());
1164        let messages = issues
1165            .iter()
1166            .map(|issue| issue.message.as_str())
1167            .collect::<Vec<_>>()
1168            .join("\n");
1169        assert!(messages.contains("source locator 'ftp://not-usable' is not usable"));
1170        assert!(messages.contains("priority must be P0, P1, or P2"));
1171        assert!(messages.contains("artifact 'va_missing' does not resolve"));
1172        assert!(messages.contains("finding 'vf_missing' does not resolve"));
1173    }
1174}