Skip to main content

vela_protocol/
validate.rs

1//! Schema validation for finding bundles in a frontier or VelaRepo.
2
3use std::collections::HashSet;
4use std::path::Path;
5
6use chrono::DateTime;
7use colored::Colorize;
8
9use crate::cli_style as style;
10use serde::{Deserialize, Serialize};
11
12use crate::bundle::{
13    ConfidenceMethod, FindingBundle, VALID_ASSERTION_TYPES, VALID_ENTITY_TYPES,
14    VALID_EVIDENCE_TYPES, VALID_LINK_TYPES, VALID_PROVENANCE_SOURCE_TYPES,
15};
16use crate::lint;
17use crate::normalize;
18use crate::packet;
19use crate::repo;
20
21const VALID_EXTRACT_METHODS: &[&str] = &[
22    "llm_extraction",
23    "manual_curation",
24    "database_import",
25    "hybrid",
26    // v0.30: agent-specific extraction tags. Distinguished from generic
27    // `llm_extraction` because they carry the agent's identity
28    // (notes-compiler vs scout vs reviewer) — useful for downstream
29    // provenance audits that want "all proposals from compile-notes."
30    "notes_compiler_via_claude_cli",
31    "scout_via_claude_cli",
32    "artifact_to_state_import",
33];
34
35const VALID_LINK_INFERRED_BY: &[&str] = &["compiler", "reviewer", "author"];
36
37/// A single validation error.
38#[derive(Debug, Clone, PartialEq, Eq, Serialize, Deserialize)]
39pub struct ValidationError {
40    pub file: String,
41    pub error: String,
42}
43
44/// Summary of a validation run.
45#[derive(Debug, Clone, PartialEq, Eq, Serialize, Deserialize)]
46pub struct ValidationReport {
47    pub total_files: usize,
48    pub valid: usize,
49    pub invalid: usize,
50    pub errors: Vec<ValidationError>,
51}
52
53#[derive(Debug, Clone, PartialEq, Eq, Serialize, Deserialize)]
54#[serde(rename_all = "snake_case")]
55pub enum Fixability {
56    Safe,
57    ManualReview,
58    NotFixable,
59}
60
61#[derive(Debug, Clone, PartialEq, Eq, Serialize, Deserialize)]
62pub struct QualityCheckOptions {
63    pub schema: bool,
64    pub lint: bool,
65    pub graph: bool,
66    pub repair_plan: bool,
67}
68
69impl Default for QualityCheckOptions {
70    fn default() -> Self {
71        Self {
72            schema: true,
73            lint: true,
74            graph: true,
75            repair_plan: true,
76        }
77    }
78}
79
80#[derive(Debug, Clone, PartialEq, Serialize, Deserialize)]
81pub struct QualityDiagnostic {
82    pub check_id: String,
83    pub severity: String,
84    pub rule_id: String,
85    #[serde(skip_serializing_if = "Option::is_none")]
86    pub finding_id: Option<String>,
87    #[serde(skip_serializing_if = "Option::is_none")]
88    pub file: Option<String>,
89    #[serde(skip_serializing_if = "Option::is_none")]
90    pub path: Option<String>,
91    pub message: String,
92    #[serde(skip_serializing_if = "Option::is_none")]
93    pub suggestion: Option<String>,
94    pub fixability: Fixability,
95}
96
97#[derive(Debug, Clone, PartialEq, Serialize, Deserialize)]
98pub struct QualityCheckSection {
99    pub id: String,
100    pub status: String,
101    pub checked: usize,
102    pub failed: usize,
103    pub diagnostics: Vec<QualityDiagnostic>,
104}
105
106#[derive(Debug, Clone, PartialEq, Eq, Serialize, Deserialize)]
107pub struct QualitySummary {
108    pub status: String,
109    pub checked_findings: usize,
110    pub valid_findings: usize,
111    pub invalid_findings: usize,
112    pub errors: usize,
113    pub warnings: usize,
114    pub info: usize,
115    pub safe_repairs: usize,
116}
117
118#[derive(Debug, Clone, PartialEq, Serialize, Deserialize)]
119pub struct RepairPlanItem {
120    pub id: String,
121    pub finding_id: String,
122    pub path: String,
123    pub action: String,
124    pub before: serde_json::Value,
125    pub after: serde_json::Value,
126    pub safe: bool,
127}
128
129#[derive(Debug, Clone, PartialEq, Serialize, Deserialize)]
130pub struct RepairPlan {
131    pub deterministic: bool,
132    pub safe_items: usize,
133    pub items: Vec<RepairPlanItem>,
134}
135
136#[derive(Debug, Clone, PartialEq, Serialize, Deserialize)]
137pub struct QualityCheckReport {
138    pub ok: bool,
139    pub command: String,
140    pub schema_version: String,
141    pub source: String,
142    pub source_kind: String,
143    pub summary: QualitySummary,
144    pub checks: Vec<QualityCheckSection>,
145    pub repair_plan: RepairPlan,
146}
147
148/// Reusable report API for `vela check --json` style consumers.
149///
150/// The report combines schema validation, statistical lint diagnostics, graph
151/// diagnostics, and deterministic safe normalization repairs.
152pub fn quality_report(source_path: &Path, options: QualityCheckOptions) -> QualityCheckReport {
153    let source = source_path.display().to_string();
154    let source_kind = repo::detect(source_path)
155        .map(|s| source_kind(&s).to_string())
156        .unwrap_or_else(|_| "unknown".to_string());
157
158    let validation = if options.schema {
159        validate(source_path)
160    } else {
161        ValidationReport {
162            total_files: 0,
163            valid: 0,
164            invalid: 0,
165            errors: Vec::new(),
166        }
167    };
168
169    let mut checks = Vec::new();
170    if options.schema {
171        checks.push(schema_section(&validation));
172    }
173
174    let mut repair_items = Vec::new();
175    let mut loaded_findings = None;
176    if let Ok(frontier) = repo::load_from_path(source_path) {
177        loaded_findings = Some(frontier.findings.len());
178        if options.lint {
179            checks.push(lint_section("lint", lint::lint(&frontier, None, None)));
180        }
181        if options.graph {
182            checks.push(lint_section("graph", lint::lint_frontier(&frontier)));
183        }
184        if options.repair_plan {
185            repair_items = normalize::plan_project_changes(&frontier)
186                .into_iter()
187                .enumerate()
188                .map(|(idx, change)| RepairPlanItem {
189                    id: format!("repair_{:04}", idx + 1),
190                    finding_id: change.finding_id,
191                    path: change.path,
192                    action: change.description,
193                    before: change.before,
194                    after: change.after,
195                    safe: change.safe,
196                })
197                .collect();
198        }
199    } else if !options.schema {
200        checks.push(QualityCheckSection {
201            id: "load".to_string(),
202            status: "fail".to_string(),
203            checked: 0,
204            failed: 1,
205            diagnostics: vec![QualityDiagnostic {
206                check_id: "load".to_string(),
207                severity: "error".to_string(),
208                rule_id: "load".to_string(),
209                finding_id: None,
210                file: Some(source.clone()),
211                path: None,
212                message: "Failed to load frontier source".to_string(),
213                suggestion: Some(
214                    "Provide a frontier JSON file, VelaRepo, or packet directory".to_string(),
215                ),
216                fixability: Fixability::ManualReview,
217            }],
218        });
219    }
220
221    let errors = checks
222        .iter()
223        .flat_map(|c| c.diagnostics.iter())
224        .filter(|d| d.severity == "error")
225        .count();
226    let warnings = checks
227        .iter()
228        .flat_map(|c| c.diagnostics.iter())
229        .filter(|d| d.severity == "warning")
230        .count();
231    let info = checks
232        .iter()
233        .flat_map(|c| c.diagnostics.iter())
234        .filter(|d| d.severity == "info")
235        .count();
236    let status = if errors > 0 {
237        "fail"
238    } else if warnings > 0 || info > 0 {
239        "warn"
240    } else {
241        "pass"
242    };
243    let safe_repairs = repair_items.iter().filter(|item| item.safe).count();
244
245    QualityCheckReport {
246        ok: errors == 0,
247        command: "check".to_string(),
248        schema_version: crate::project::VELA_SCHEMA_VERSION.to_string(),
249        source,
250        source_kind,
251        summary: QualitySummary {
252            status: status.to_string(),
253            checked_findings: if options.schema {
254                validation.total_files
255            } else {
256                loaded_findings.unwrap_or(0)
257            },
258            valid_findings: if options.schema {
259                validation.valid
260            } else {
261                loaded_findings.unwrap_or(0)
262            },
263            invalid_findings: if options.schema {
264                validation.invalid
265            } else {
266                errors
267            },
268            errors,
269            warnings,
270            info,
271            safe_repairs,
272        },
273        checks,
274        repair_plan: RepairPlan {
275            deterministic: true,
276            safe_items: safe_repairs,
277            items: repair_items,
278        },
279    }
280}
281
282pub fn quality_report_json(
283    source_path: &Path,
284    options: QualityCheckOptions,
285) -> Result<String, serde_json::Error> {
286    serde_json::to_string_pretty(&quality_report(source_path, options))
287}
288
289fn schema_section(report: &ValidationReport) -> QualityCheckSection {
290    let diagnostics = report
291        .errors
292        .iter()
293        .map(|error| QualityDiagnostic {
294            check_id: "schema".to_string(),
295            severity: "error".to_string(),
296            rule_id: schema_rule_id(&error.error).to_string(),
297            finding_id: if error.file.starts_with("vf_") {
298                Some(error.file.clone())
299            } else {
300                None
301            },
302            file: Some(error.file.clone()),
303            path: None,
304            message: error.error.clone(),
305            suggestion: schema_suggestion(&error.error).map(str::to_string),
306            fixability: schema_fixability(&error.error),
307        })
308        .collect::<Vec<_>>();
309
310    QualityCheckSection {
311        id: "schema".to_string(),
312        status: if diagnostics.is_empty() {
313            "pass".to_string()
314        } else {
315            "fail".to_string()
316        },
317        checked: report.total_files,
318        failed: report.invalid,
319        diagnostics,
320    }
321}
322
323fn lint_section(id: &str, report: lint::LintReport) -> QualityCheckSection {
324    let failed = report
325        .diagnostics
326        .iter()
327        .filter(|d| d.severity == lint::Severity::Error)
328        .count();
329    let diagnostics = report
330        .diagnostics
331        .into_iter()
332        .map(|diagnostic| QualityDiagnostic {
333            check_id: id.to_string(),
334            severity: diagnostic.severity.to_string(),
335            rule_id: diagnostic.rule_id.clone(),
336            finding_id: Some(diagnostic.finding_id),
337            file: None,
338            path: None,
339            message: diagnostic.message,
340            suggestion: Some(diagnostic.suggestion),
341            fixability: lint_fixability(&diagnostic.rule_id),
342        })
343        .collect::<Vec<_>>();
344
345    QualityCheckSection {
346        id: id.to_string(),
347        status: if failed > 0 {
348            "fail".to_string()
349        } else if diagnostics.is_empty() {
350            "pass".to_string()
351        } else {
352            "warn".to_string()
353        },
354        checked: report.findings_checked,
355        failed,
356        diagnostics,
357    }
358}
359
360fn schema_rule_id(message: &str) -> &'static str {
361    if message.contains("Invalid entity type") {
362        "schema.entity_type"
363    } else if message.contains("Invalid assertion type") {
364        "schema.assertion_type"
365    } else if message.contains("Invalid evidence type") {
366        "schema.evidence_type"
367    } else if message.contains("does not match content-address") {
368        "schema.content_address"
369    } else if message.contains("Duplicate finding ID") {
370        "schema.duplicate_id"
371    } else if message.contains("does not exist in frontier") {
372        "schema.link_target"
373    } else if message.contains("not RFC3339") {
374        "schema.timestamp"
375    } else if message.contains("Project stats.") {
376        "schema.project_stats"
377    } else if message.contains("Packet validation failed") {
378        "schema.packet"
379    } else if message.contains("Failed to load") {
380        "schema.load"
381    } else {
382        "schema"
383    }
384}
385
386fn schema_suggestion(message: &str) -> Option<&'static str> {
387    if message.contains("Invalid entity type") {
388        Some("Run the normalization plan/apply API to map entity types to schema vocabulary")
389    } else if message.contains("Project stats.") {
390        Some("Reassemble or resave the frontier after applying content changes")
391    } else if message.contains("does not match content-address") {
392        Some(
393            "Recompute finding IDs and update dependent links only after reviewing the identity change",
394        )
395    } else if message.contains("does not exist in frontier") {
396        Some("Remove the broken link or add the missing target finding")
397    } else {
398        None
399    }
400}
401
402fn schema_fixability(message: &str) -> Fixability {
403    if message.contains("Invalid entity type") {
404        Fixability::Safe
405    } else if message.contains("Packet validation failed") || message.contains("Failed to load") {
406        Fixability::NotFixable
407    } else {
408        Fixability::ManualReview
409    }
410}
411
412fn lint_fixability(rule_id: &str) -> Fixability {
413    match rule_id {
414        "orphan"
415        | "missing_crossref"
416        | "unresolved_contradiction"
417        | "critical_gap"
418        | "fragile_anchor"
419        | "stale_superseded"
420        | "L001"
421        | "L002"
422        | "L003"
423        | "L004"
424        | "L005"
425        | "L006"
426        | "L007"
427        | "L008"
428        | "L009"
429        | "L010" => Fixability::ManualReview,
430        _ => Fixability::NotFixable,
431    }
432}
433
434fn source_kind(source: &repo::VelaSource) -> &'static str {
435    match source {
436        repo::VelaSource::ProjectFile(_) => "project_file",
437        repo::VelaSource::VelaRepo(_) => "vela_repo",
438        repo::VelaSource::PacketDir(_) => "packet_dir",
439    }
440}
441
442/// Validate all findings in a frontier against the schema.
443pub fn validate(source_path: &Path) -> ValidationReport {
444    let source_label = source_path.display().to_string();
445    let frontier = match repo::load_from_path(source_path) {
446        Ok(c) => c,
447        Err(e) => {
448            return ValidationReport {
449                total_files: 0,
450                valid: 0,
451                invalid: 0,
452                errors: vec![ValidationError {
453                    file: source_path.display().to_string(),
454                    error: format!("Failed to load: {e}"),
455                }],
456            };
457        }
458    };
459
460    let mut errors: Vec<ValidationError> = Vec::new();
461    let mut seen_ids: HashSet<String> = HashSet::new();
462    let all_ids: HashSet<String> = frontier.findings.iter().map(|f| f.id.clone()).collect();
463    // v0.8: declared cross-frontier dependencies. Any link target of
464    // the form `vf_X@vfr_Y` must reference a Y in this set.
465    let declared_deps: HashSet<String> = frontier
466        .cross_frontier_deps()
467        .filter_map(|d| d.vfr_id.clone())
468        .collect();
469
470    if matches!(
471        repo::detect(source_path),
472        Ok(repo::VelaSource::PacketDir(_))
473    ) && let Err(packet_err) = packet::validate(source_path)
474    {
475        errors.push(ValidationError {
476            file: source_label.clone(),
477            error: format!("Packet validation failed: {packet_err}"),
478        });
479    }
480
481    validate_project_metadata(&frontier, source_path, &mut errors);
482
483    // v0.8: every cross-frontier dep must declare both a locator and
484    // a pinned snapshot hash. Without those the dep can be neither
485    // fetched nor verified, so a strict reader rejects.
486    for dep in frontier.cross_frontier_deps() {
487        let Some(vfr) = &dep.vfr_id else { continue };
488        if dep.locator.as_deref().unwrap_or("").is_empty() {
489            errors.push(ValidationError {
490                file: source_label.clone(),
491                error: format!("Cross-frontier dependency '{vfr}' is missing 'locator'"),
492            });
493        }
494        if dep.pinned_snapshot_hash.as_deref().unwrap_or("").is_empty() {
495            errors.push(ValidationError {
496                file: source_label.clone(),
497                error: format!(
498                    "Cross-frontier dependency '{vfr}' is missing 'pinned_snapshot_hash'"
499                ),
500            });
501        }
502    }
503
504    for finding in &frontier.findings {
505        let file_label = &finding.id;
506        validate_finding(
507            finding,
508            file_label,
509            &all_ids,
510            &declared_deps,
511            &mut seen_ids,
512            &mut errors,
513        );
514    }
515
516    let invalid_count = errors.iter().map(|e| &e.file).collect::<HashSet<_>>().len();
517    let valid_count = frontier.findings.len().saturating_sub(invalid_count);
518
519    ValidationReport {
520        total_files: frontier.findings.len(),
521        valid: valid_count,
522        invalid: invalid_count,
523        errors,
524    }
525}
526
527fn validate_finding(
528    finding: &FindingBundle,
529    file_label: &str,
530    all_ids: &HashSet<String>,
531    declared_deps: &HashSet<String>,
532    seen_ids: &mut HashSet<String>,
533    errors: &mut Vec<ValidationError>,
534) {
535    // Check ID pattern: vf_ + 16 hex chars
536    let id_valid = finding.id.starts_with("vf_")
537        && finding.id.len() == 19
538        && finding.id[3..].chars().all(|c| c.is_ascii_hexdigit());
539    if !id_valid {
540        errors.push(ValidationError {
541            file: file_label.to_string(),
542            error: format!(
543                "Invalid ID format '{}': expected vf_ + 16 hex chars",
544                finding.id
545            ),
546        });
547    }
548
549    // Duplicate ID check
550    if !seen_ids.insert(finding.id.clone()) {
551        errors.push(ValidationError {
552            file: file_label.to_string(),
553            error: format!("Duplicate finding ID '{}'", finding.id),
554        });
555    }
556
557    // Required fields presence (these are enforced by Rust types, but
558    // check for empty strings which indicate missing data)
559    if finding.assertion.text.is_empty() {
560        errors.push(ValidationError {
561            file: file_label.to_string(),
562            error: "Assertion text is empty".to_string(),
563        });
564    }
565
566    if finding.created.is_empty() {
567        errors.push(ValidationError {
568            file: file_label.to_string(),
569            error: "Created timestamp is empty".to_string(),
570        });
571    }
572    if !finding.created.is_empty() && DateTime::parse_from_rfc3339(&finding.created).is_err() {
573        errors.push(ValidationError {
574            file: file_label.to_string(),
575            error: format!("Created timestamp '{}' is not RFC3339", finding.created),
576        });
577    }
578    if let Some(updated) = &finding.updated
579        && !updated.is_empty()
580        && DateTime::parse_from_rfc3339(updated).is_err()
581    {
582        errors.push(ValidationError {
583            file: file_label.to_string(),
584            error: format!("Updated timestamp '{}' is not RFC3339", updated),
585        });
586    }
587
588    let expected_id = FindingBundle::content_address(&finding.assertion, &finding.provenance);
589    if finding.id != expected_id {
590        errors.push(ValidationError {
591            file: file_label.to_string(),
592            error: format!(
593                "Finding id '{}' does not match content-address '{}'",
594                finding.id, expected_id
595            ),
596        });
597    }
598
599    // Confidence score range
600    if !(0.0..=1.0).contains(&finding.confidence.score) {
601        errors.push(ValidationError {
602            file: file_label.to_string(),
603            error: format!(
604                "Confidence score {} is outside 0.0-1.0 range",
605                finding.confidence.score
606            ),
607        });
608    }
609
610    // Assertion type validation
611    if !VALID_ASSERTION_TYPES.contains(&finding.assertion.assertion_type.as_str()) {
612        errors.push(ValidationError {
613            file: file_label.to_string(),
614            error: format!(
615                "Invalid assertion type '{}'. Valid: {}",
616                finding.assertion.assertion_type,
617                VALID_ASSERTION_TYPES.join(", "),
618            ),
619        });
620    }
621
622    // Evidence type validation
623    if !VALID_EVIDENCE_TYPES.contains(&finding.evidence.evidence_type.as_str()) {
624        errors.push(ValidationError {
625            file: file_label.to_string(),
626            error: format!(
627                "Invalid evidence type '{}'. Valid: {}",
628                finding.evidence.evidence_type,
629                VALID_EVIDENCE_TYPES.join(", "),
630            ),
631        });
632    }
633
634    for entity in &finding.assertion.entities {
635        if !VALID_ENTITY_TYPES.contains(&entity.entity_type.as_str()) {
636            errors.push(ValidationError {
637                file: file_label.to_string(),
638                error: format!(
639                    "Invalid entity type '{}' for entity '{}'. Valid: {}",
640                    entity.entity_type,
641                    entity.name,
642                    VALID_ENTITY_TYPES.join(", "),
643                ),
644            });
645        }
646    }
647
648    if !VALID_PROVENANCE_SOURCE_TYPES.contains(&finding.provenance.source_type.as_str()) {
649        errors.push(ValidationError {
650            file: file_label.to_string(),
651            error: format!(
652                "Invalid source_type '{}'. Valid: {}",
653                finding.provenance.source_type,
654                VALID_PROVENANCE_SOURCE_TYPES.join(", "),
655            ),
656        });
657    }
658
659    if !VALID_EXTRACT_METHODS.contains(&finding.provenance.extraction.method.as_str()) {
660        errors.push(ValidationError {
661            file: file_label.to_string(),
662            error: format!(
663                "Invalid extraction method '{}'. Valid: {}",
664                finding.provenance.extraction.method,
665                VALID_EXTRACT_METHODS.join(", "),
666            ),
667        });
668    }
669
670    if finding.confidence.method == ConfidenceMethod::Computed
671        && finding.confidence.components.is_none()
672    {
673        errors.push(ValidationError {
674            file: file_label.to_string(),
675            error: "Computed confidence must include components".to_string(),
676        });
677    }
678
679    // Link targets must either reference an existing in-frontier vf_id
680    // (`vf_…`) or, in v0.8+, a vf_id in a declared cross-frontier dep
681    // (`vf_…@vfr_…`).
682    for link in &finding.links {
683        match crate::bundle::LinkRef::parse(&link.target) {
684            Err(e) => {
685                errors.push(ValidationError {
686                    file: file_label.to_string(),
687                    error: format!("Invalid link target '{}': {e}", link.target),
688                });
689            }
690            Ok(crate::bundle::LinkRef::Local { vf_id }) => {
691                // Old shape: must be vf_ + 16 hex (19 chars total) and
692                // exist in the current frontier.
693                let id_well_formed =
694                    vf_id.len() == 19 && vf_id[3..].chars().all(|c| c.is_ascii_hexdigit());
695                if !id_well_formed {
696                    errors.push(ValidationError {
697                        file: file_label.to_string(),
698                        error: format!("Invalid link target format '{}'", link.target),
699                    });
700                } else if !all_ids.contains(&vf_id) {
701                    errors.push(ValidationError {
702                        file: file_label.to_string(),
703                        error: format!("Link target '{}' does not exist in frontier", link.target),
704                    });
705                }
706            }
707            Ok(crate::bundle::LinkRef::Cross { vf_id, vfr_id }) => {
708                // v0.8 cross-frontier link: well-formed ids, plus the
709                // referenced vfr_id must appear in
710                // `frontier.dependencies`. We don't verify the remote's
711                // snapshot_hash here — that's the registry's job at
712                // pull time. Validation only enforces declaration.
713                let vf_well_formed =
714                    vf_id.len() == 19 && vf_id[3..].chars().all(|c| c.is_ascii_hexdigit());
715                let vfr_well_formed =
716                    vfr_id.len() == 20 && vfr_id[4..].chars().all(|c| c.is_ascii_hexdigit());
717                if !vf_well_formed {
718                    errors.push(ValidationError {
719                        file: file_label.to_string(),
720                        error: format!(
721                            "Invalid cross-frontier link target '{}': vf_ part must be 19 chars (vf_ + 16 hex)",
722                            link.target
723                        ),
724                    });
725                }
726                if !vfr_well_formed {
727                    errors.push(ValidationError {
728                        file: file_label.to_string(),
729                        error: format!(
730                            "Invalid cross-frontier link target '{}': vfr_ part must be 20 chars (vfr_ + 16 hex)",
731                            link.target
732                        ),
733                    });
734                }
735                if vfr_well_formed && !declared_deps.contains(&vfr_id) {
736                    errors.push(ValidationError {
737                        file: file_label.to_string(),
738                        error: format!(
739                            "Cross-frontier link target '{}' references undeclared dependency '{}'; add it via `vela frontier add-dep`",
740                            link.target, vfr_id
741                        ),
742                    });
743                }
744            }
745        }
746        if link.created_at.is_empty() {
747            errors.push(ValidationError {
748                file: file_label.to_string(),
749                error: format!("Link created_at is empty for target '{}'", link.target),
750            });
751        } else if DateTime::parse_from_rfc3339(&link.created_at).is_err() {
752            errors.push(ValidationError {
753                file: file_label.to_string(),
754                error: format!("Link created_at '{}' is not RFC3339", link.created_at),
755            });
756        }
757        if !VALID_LINK_TYPES.contains(&link.link_type.as_str()) {
758            errors.push(ValidationError {
759                file: file_label.to_string(),
760                error: format!("Invalid link type '{}'", link.link_type),
761            });
762        }
763        if !VALID_LINK_INFERRED_BY.contains(&link.inferred_by.as_str()) {
764            errors.push(ValidationError {
765                file: file_label.to_string(),
766                error: format!("Invalid link inferred_by '{}'", link.inferred_by),
767            });
768        }
769    }
770}
771
772fn validate_project_metadata(
773    frontier: &crate::project::Project,
774    source_path: &Path,
775    errors: &mut Vec<ValidationError>,
776) {
777    // `vela_version` and `schema` are publisher-claimed, like the compiler
778    // stamp. Pre-v0.10 frontiers (BBB at v0.8.0, the v0.8 conformance vector)
779    // must continue to validate under newer binaries without recomputing
780    // their content-addressed identity. v0.10's enum extensions are additive,
781    // so any pre-v0.10 schema URL listed in `KNOWN_SCHEMA_URLS` validates
782    // against the current code.
783    const KNOWN_VELA_VERSIONS: &[&str] = &["0.8.0", "0.10.0"];
784    const KNOWN_SCHEMA_URLS: &[&str] = &[
785        "https://vela.science/schema/finding-bundle/v0.8.0",
786        "https://vela.science/schema/finding-bundle/v0.10.0",
787    ];
788    if !KNOWN_VELA_VERSIONS.contains(&frontier.vela_version.as_str()) {
789        errors.push(ValidationError {
790            file: source_path.display().to_string(),
791            error: format!(
792                "Unknown vela_version '{}': expected one of {}",
793                frontier.vela_version,
794                KNOWN_VELA_VERSIONS.join(", "),
795            ),
796        });
797    }
798    if !KNOWN_SCHEMA_URLS.contains(&frontier.schema.as_str()) {
799        errors.push(ValidationError {
800            file: source_path.display().to_string(),
801            error: format!(
802                "Unknown schema '{}': expected one of {}",
803                frontier.schema,
804                KNOWN_SCHEMA_URLS.join(", "),
805            ),
806        });
807    }
808    // The compiler stamp is publisher-claimed — it records which binary
809    // *produced* the canonical bytes, not which binary may validate them.
810    // We require the `vela/X.Y.Z` shape (so it's still a structured field
811    // and not free-form prose) but allow any version, current or older,
812    // so frontiers compiled with a v0.7 binary continue to validate under
813    // a v0.9 binary without churning their content-addressed identity.
814    if !frontier.project.compiler.starts_with("vela/")
815        || frontier.project.compiler.len() <= "vela/".len()
816    {
817        errors.push(ValidationError {
818            file: source_path.display().to_string(),
819            error: format!(
820                "Invalid compiler '{}': expected 'vela/X.Y.Z'",
821                frontier.project.compiler,
822            ),
823        });
824    }
825    if frontier.project.compiled_at.is_empty() {
826        errors.push(ValidationError {
827            file: source_path.display().to_string(),
828            error: "Project compiled_at is empty".to_string(),
829        });
830    } else if DateTime::parse_from_rfc3339(&frontier.project.compiled_at).is_err() {
831        errors.push(ValidationError {
832            file: source_path.display().to_string(),
833            error: format!(
834                "Project compiled_at '{}' is not RFC3339",
835                frontier.project.compiled_at
836            ),
837        });
838    }
839
840    let expected_links: usize = frontier.findings.iter().map(|f| f.links.len()).sum();
841    if frontier.stats.findings != frontier.findings.len() {
842        errors.push(ValidationError {
843            file: source_path.display().to_string(),
844            error: format!(
845                "Project stats.findings {} does not match findings length {}",
846                frontier.stats.findings,
847                frontier.findings.len()
848            ),
849        });
850    }
851    if frontier.stats.links != expected_links {
852        errors.push(ValidationError {
853            file: source_path.display().to_string(),
854            error: format!(
855                "Project stats.links {} does not match aggregated links {}",
856                frontier.stats.links, expected_links
857            ),
858        });
859    }
860}
861
862/// CLI entry point for `vela validate`.
863pub fn run(source: &Path) {
864    let report = validate(source);
865
866    println!();
867    println!("  {}", "VELA · VALIDATE".dimmed());
868    println!("  {}", style::tick_row(60));
869    println!("  total findings: {}", report.total_files);
870    println!(
871        "  valid:           {}",
872        style::moss(report.valid.to_string())
873    );
874    println!(
875        "  invalid:         {}",
876        if report.invalid > 0 {
877            style::madder(report.invalid.to_string()).to_string()
878        } else {
879            report.invalid.to_string()
880        }
881    );
882
883    if !report.errors.is_empty() {
884        println!();
885        println!("  {}", "ERRORS".dimmed());
886        for err in &report.errors {
887            println!(
888                "  {} {} · {}",
889                style::madder("-"),
890                err.file.dimmed(),
891                err.error
892            );
893        }
894    } else {
895        println!("\n  {} all findings valid.", style::ok("ok"));
896    }
897
898    if report.invalid > 0 {
899        std::process::exit(1);
900    }
901}
902
903#[cfg(test)]
904mod tests {
905    use super::*;
906    use crate::bundle::*;
907    use crate::project;
908    use chrono::Utc;
909    use tempfile::TempDir;
910
911    fn make_valid_finding(seed: &str) -> FindingBundle {
912        let assertion = Assertion {
913            text: format!("Test assertion {}", seed),
914            assertion_type: "mechanism".into(),
915            entities: vec![],
916            relation: None,
917            direction: None,
918            causal_claim: None,
919            causal_evidence_grade: None,
920        };
921        let provenance = Provenance {
922            source_type: "published_paper".into(),
923            doi: Some(format!("10.0000/{}", seed)),
924            pmid: None,
925            pmc: None,
926            openalex_id: None,
927            url: None,
928            title: format!("Test {seed}"),
929            authors: vec![],
930            year: Some(2024),
931            journal: None,
932            license: None,
933            publisher: None,
934            funders: vec![],
935            extraction: Extraction {
936                method: "llm_extraction".into(),
937                model: None,
938                model_version: None,
939                extracted_at: "1970-01-01T00:00:00Z".to_string(),
940                extractor_version: "vela/0.2.0".to_string(),
941            },
942            review: None,
943            citation_count: None,
944        };
945        let mut finding = FindingBundle::new(
946            assertion,
947            Evidence {
948                evidence_type: "experimental".into(),
949                model_system: String::new(),
950                species: None,
951                method: String::new(),
952                sample_size: None,
953                effect_size: None,
954                p_value: None,
955                replicated: false,
956                replication_count: None,
957                evidence_spans: vec![],
958            },
959            Conditions {
960                text: String::new(),
961                species_verified: vec![],
962                species_unverified: vec![],
963                in_vitro: false,
964                in_vivo: false,
965                human_data: false,
966                clinical_trial: false,
967                concentration_range: None,
968                duration: None,
969                age_group: None,
970                cell_type: None,
971            },
972            Confidence::raw(0.85, "test", 0.9),
973            provenance,
974            Flags {
975                gap: false,
976                negative_space: false,
977                contested: false,
978                retracted: false,
979                declining: false,
980                gravity_well: false,
981                review_state: None,
982                superseded: false,
983                signature_threshold: None,
984                jointly_accepted: false,
985            },
986        );
987        finding.id = FindingBundle::content_address(&finding.assertion, &finding.provenance);
988        finding
989    }
990
991    fn write_frontier(dir: &Path, findings: Vec<FindingBundle>) -> std::path::PathBuf {
992        let c = project::assemble("test", findings, 1, 0, "Test");
993        let path = dir.join("test.json");
994        let json = serde_json::to_string_pretty(&c).unwrap();
995        std::fs::write(&path, json).unwrap();
996        path
997    }
998
999    fn write_project(dir: &Path, frontier: &project::Project) -> std::path::PathBuf {
1000        let path = dir.join("test.json");
1001        let json = serde_json::to_string_pretty(frontier).unwrap();
1002        std::fs::write(&path, json).unwrap();
1003        path
1004    }
1005
1006    #[test]
1007    fn valid_frontier_passes() {
1008        let tmp = TempDir::new().unwrap();
1009        let path = write_frontier(
1010            tmp.path(),
1011            vec![
1012                make_valid_finding("vf_0000000000000001"),
1013                make_valid_finding("vf_0000000000000002"),
1014            ],
1015        );
1016        let report = validate(&path);
1017        assert_eq!(report.total_files, 2);
1018        assert_eq!(report.valid, 2);
1019        assert_eq!(report.invalid, 0);
1020        assert!(report.errors.is_empty());
1021    }
1022
1023    #[test]
1024    fn project_metadata_validation() {
1025        let tmp = TempDir::new().unwrap();
1026        let mut c = project::assemble(
1027            "test",
1028            vec![make_valid_finding("vf_0000000000000001")],
1029            1,
1030            0,
1031            "Test",
1032        );
1033        c.vela_version = "0.1.0".into();
1034        let path = write_project(tmp.path(), &c);
1035        let report = validate(&path);
1036        assert!(
1037            report
1038                .errors
1039                .iter()
1040                .any(|e| e.error.contains("Unknown vela_version"))
1041        );
1042    }
1043
1044    #[test]
1045    fn invalid_provenance_source_type_detected() {
1046        let tmp = TempDir::new().unwrap();
1047        let mut f = make_valid_finding("vf_0000000000000001");
1048        f.provenance.source_type = "invalid_source".into();
1049        let path = write_frontier(tmp.path(), vec![f]);
1050        let report = validate(&path);
1051        assert!(
1052            report
1053                .errors
1054                .iter()
1055                .any(|e| e.error.contains("Invalid source_type"))
1056        );
1057    }
1058
1059    #[test]
1060    fn invalid_extraction_method_detected() {
1061        let tmp = TempDir::new().unwrap();
1062        let mut f = make_valid_finding("vf_0000000000000001");
1063        f.provenance.extraction.method = "invalid_method".into();
1064        let path = write_frontier(tmp.path(), vec![f]);
1065        let report = validate(&path);
1066        assert!(
1067            report
1068                .errors
1069                .iter()
1070                .any(|e| e.error.contains("Invalid extraction method"))
1071        );
1072    }
1073
1074    #[test]
1075    fn invalid_computed_confidence_components_detected() {
1076        let tmp = TempDir::new().unwrap();
1077        let mut f = make_valid_finding("vf_0000000000000001");
1078        f.confidence.method = ConfidenceMethod::Computed;
1079        f.confidence.components = None;
1080        let path = write_frontier(tmp.path(), vec![f]);
1081        let report = validate(&path);
1082        assert!(report.errors.iter().any(|e| {
1083            e.error
1084                .contains("Computed confidence must include components")
1085        }));
1086    }
1087
1088    #[test]
1089    fn invalid_content_address_detected() {
1090        let tmp = TempDir::new().unwrap();
1091        let mut f = make_valid_finding("vf_0000000000000001");
1092        f.id = "vf_0000000000000002".into();
1093        let path = write_frontier(tmp.path(), vec![f]);
1094        let report = validate(&path);
1095        assert!(
1096            report
1097                .errors
1098                .iter()
1099                .any(|e| e.error.contains("does not match content-address"))
1100        );
1101    }
1102
1103    #[test]
1104    fn invalid_link_type_detected() {
1105        let tmp = TempDir::new().unwrap();
1106        let mut f = make_valid_finding("vf_link_type");
1107        let target = f.id.clone();
1108        f.links.push(Link {
1109            target,
1110            link_type: "bad_type".into(),
1111            note: String::new(),
1112            inferred_by: "compiler".into(),
1113            created_at: Utc::now().to_rfc3339(),
1114            mechanism: None,
1115        });
1116        let path = write_frontier(tmp.path(), vec![f]);
1117        let report = validate(&path);
1118        assert!(
1119            report
1120                .errors
1121                .iter()
1122                .any(|e| e.error.contains("Invalid link type"))
1123        );
1124    }
1125
1126    #[test]
1127    fn invalid_id_format_detected() {
1128        let tmp = TempDir::new().unwrap();
1129        let mut f = make_valid_finding("bad_id");
1130        f.id = "bad_id".into();
1131        let path = write_frontier(tmp.path(), vec![f]);
1132        let report = validate(&path);
1133        assert!(report.invalid > 0);
1134        assert!(
1135            report
1136                .errors
1137                .iter()
1138                .any(|e| e.error.contains("Invalid ID format"))
1139        );
1140    }
1141
1142    #[test]
1143    fn invalid_confidence_detected() {
1144        let tmp = TempDir::new().unwrap();
1145        let mut f = make_valid_finding("vf_0000000000000001");
1146        f.confidence.score = 1.5;
1147        let path = write_frontier(tmp.path(), vec![f]);
1148        let report = validate(&path);
1149        assert!(
1150            report
1151                .errors
1152                .iter()
1153                .any(|e| e.error.contains("Confidence score"))
1154        );
1155    }
1156
1157    #[test]
1158    fn invalid_assertion_type_detected() {
1159        let tmp = TempDir::new().unwrap();
1160        let mut f = make_valid_finding("vf_0000000000000001");
1161        f.assertion.assertion_type = "bogus_type".into();
1162        let path = write_frontier(tmp.path(), vec![f]);
1163        let report = validate(&path);
1164        assert!(
1165            report
1166                .errors
1167                .iter()
1168                .any(|e| e.error.contains("Invalid assertion type"))
1169        );
1170    }
1171
1172    #[test]
1173    fn invalid_evidence_type_detected() {
1174        let tmp = TempDir::new().unwrap();
1175        let mut f = make_valid_finding("vf_0000000000000001");
1176        f.evidence.evidence_type = "anecdotal".into();
1177        let path = write_frontier(tmp.path(), vec![f]);
1178        let report = validate(&path);
1179        assert!(
1180            report
1181                .errors
1182                .iter()
1183                .any(|e| e.error.contains("Invalid evidence type"))
1184        );
1185    }
1186
1187    #[test]
1188    fn broken_link_target_detected() {
1189        let tmp = TempDir::new().unwrap();
1190        let mut f = make_valid_finding("vf_0000000000000001");
1191        f.links.push(Link {
1192            target: "vf_deadbeefdeadbeef".into(),
1193            link_type: "extends".into(),
1194            note: String::new(),
1195            inferred_by: "compiler".into(),
1196            created_at: Utc::now().to_rfc3339(),
1197            mechanism: None,
1198        });
1199        let path = write_frontier(tmp.path(), vec![f]);
1200        let report = validate(&path);
1201        assert!(
1202            report
1203                .errors
1204                .iter()
1205                .any(|e| e.error.contains("does not exist"))
1206        );
1207    }
1208
1209    #[test]
1210    fn duplicate_id_detected() {
1211        let tmp = TempDir::new().unwrap();
1212        let f1 = make_valid_finding("vf_0000000000000001");
1213        let f2 = make_valid_finding("vf_0000000000000001");
1214        let path = write_frontier(tmp.path(), vec![f1, f2]);
1215        let report = validate(&path);
1216        assert!(report.errors.iter().any(|e| e.error.contains("Duplicate")));
1217    }
1218
1219    #[test]
1220    fn invalid_entity_type_detected_and_marked_fixable() {
1221        let tmp = TempDir::new().unwrap();
1222        let mut f = make_valid_finding("vf_0000000000000001");
1223        f.assertion.entities.push(Entity {
1224            name: "BBB".into(),
1225            entity_type: "biological_barrier".into(),
1226            identifiers: serde_json::Map::new(),
1227            canonical_id: None,
1228            candidates: vec![],
1229            aliases: vec![],
1230            resolution_provenance: None,
1231            resolution_confidence: 1.0,
1232            resolution_method: None,
1233            species_context: None,
1234            needs_review: false,
1235        });
1236        f.id = FindingBundle::content_address(&f.assertion, &f.provenance);
1237        let path = write_frontier(tmp.path(), vec![f]);
1238
1239        let report = quality_report(&path, QualityCheckOptions::default());
1240
1241        assert!(
1242            report
1243                .checks
1244                .iter()
1245                .flat_map(|check| check.diagnostics.iter())
1246                .any(|diagnostic| diagnostic.rule_id == "schema.entity_type"
1247                    && diagnostic.fixability == Fixability::Safe)
1248        );
1249        assert!(report.repair_plan.safe_items >= 2);
1250    }
1251
1252    #[test]
1253    fn quality_report_includes_schema_lint_and_graph_sections() {
1254        let tmp = TempDir::new().unwrap();
1255        let mut f = make_valid_finding("vf_0000000000000001");
1256        f.evidence.sample_size = Some("n=4".into());
1257        f.evidence.replicated = false;
1258        f.confidence.score = 0.9;
1259        f.id = FindingBundle::content_address(&f.assertion, &f.provenance);
1260        let path = write_frontier(tmp.path(), vec![f]);
1261
1262        let report = quality_report(&path, QualityCheckOptions::default());
1263
1264        assert!(report.checks.iter().any(|check| check.id == "schema"));
1265        assert!(report.checks.iter().any(|check| check.id == "lint"));
1266        assert!(report.checks.iter().any(|check| check.id == "graph"));
1267        assert!(
1268            report
1269                .checks
1270                .iter()
1271                .flat_map(|check| check.diagnostics.iter())
1272                .any(|diagnostic| diagnostic.rule_id == "L001")
1273        );
1274        assert!(
1275            report
1276                .checks
1277                .iter()
1278                .flat_map(|check| check.diagnostics.iter())
1279                .any(|diagnostic| diagnostic.rule_id == "orphan")
1280        );
1281    }
1282
1283    // ── v0.8: cross-frontier link validation ──────────────────────────
1284
1285    fn make_finding_with_link(seed: &str, target: &str) -> FindingBundle {
1286        let mut f = make_valid_finding(seed);
1287        f.links = vec![Link {
1288            target: target.to_string(),
1289            link_type: "extends".to_string(),
1290            note: String::new(),
1291            inferred_by: "compiler".to_string(),
1292            created_at: "2024-01-01T00:00:00Z".to_string(),
1293            mechanism: None,
1294        }];
1295        f
1296    }
1297
1298    #[test]
1299    fn cross_frontier_link_with_declared_dep_passes() {
1300        let tmp = TempDir::new().unwrap();
1301        let target_vfr = "vfr_0000000000000aaa";
1302        let f1 = make_valid_finding("vf_0000000000000001");
1303        let f2 = make_finding_with_link(
1304            "vf_0000000000000002",
1305            &format!("vf_0000000000000003@{target_vfr}"),
1306        );
1307        let mut c = project::assemble("test", vec![f1, f2], 1, 0, "Test");
1308        c.project.dependencies.push(project::ProjectDependency {
1309            name: "ext-frontier".into(),
1310            source: "vela.hub".into(),
1311            version: None,
1312            pinned_hash: None,
1313            vfr_id: Some(target_vfr.into()),
1314            locator: Some("https://example.test/ext.json".into()),
1315            pinned_snapshot_hash: Some("a".repeat(64)),
1316        });
1317        let path = write_project(tmp.path(), &c);
1318        let report = validate(&path);
1319        let cross_errors: Vec<_> = report
1320            .errors
1321            .iter()
1322            .filter(|e| e.error.contains("cross-frontier") || e.error.contains("undeclared"))
1323            .collect();
1324        assert!(
1325            cross_errors.is_empty(),
1326            "expected no cross-frontier errors, got: {cross_errors:?}",
1327        );
1328    }
1329
1330    #[test]
1331    fn cross_frontier_link_without_declared_dep_fails() {
1332        let tmp = TempDir::new().unwrap();
1333        let f = make_finding_with_link(
1334            "vf_0000000000000001",
1335            "vf_0000000000000002@vfr_0000000000000bbb",
1336        );
1337        let path = write_frontier(tmp.path(), vec![f]);
1338        let report = validate(&path);
1339        assert!(
1340            report
1341                .errors
1342                .iter()
1343                .any(|e| e.error.contains("undeclared dependency")),
1344            "expected undeclared-dep error, got: {:?}",
1345            report.errors
1346        );
1347    }
1348
1349    #[test]
1350    fn cross_frontier_dep_without_locator_or_snapshot_fails() {
1351        let tmp = TempDir::new().unwrap();
1352        let mut c = project::assemble(
1353            "test",
1354            vec![make_valid_finding("vf_0000000000000001")],
1355            1,
1356            0,
1357            "Test",
1358        );
1359        c.project.dependencies.push(project::ProjectDependency {
1360            name: "incomplete-dep".into(),
1361            source: "vela.hub".into(),
1362            version: None,
1363            pinned_hash: None,
1364            vfr_id: Some("vfr_0000000000000ccc".into()),
1365            locator: None,
1366            pinned_snapshot_hash: None,
1367        });
1368        let path = write_project(tmp.path(), &c);
1369        let report = validate(&path);
1370        assert!(
1371            report
1372                .errors
1373                .iter()
1374                .any(|e| e.error.contains("missing 'locator'")),
1375            "expected missing-locator error",
1376        );
1377        assert!(
1378            report
1379                .errors
1380                .iter()
1381                .any(|e| e.error.contains("missing 'pinned_snapshot_hash'")),
1382            "expected missing-snapshot error",
1383        );
1384    }
1385
1386    #[test]
1387    fn malformed_cross_frontier_link_target_fails() {
1388        let tmp = TempDir::new().unwrap();
1389        // bad: vfr_ part is not 16 hex chars
1390        let f = make_finding_with_link("vf_0000000000000001", "vf_0000000000000002@vfr_too_short");
1391        let path = write_frontier(tmp.path(), vec![f]);
1392        let report = validate(&path);
1393        assert!(
1394            report
1395                .errors
1396                .iter()
1397                .any(|e| e.error.contains("vfr_ part must be 20 chars")),
1398            "expected malformed-vfr error, got: {:?}",
1399            report.errors
1400        );
1401    }
1402}