1use std::collections::HashSet;
4use std::path::Path;
5
6use chrono::DateTime;
7use colored::Colorize;
8
9use crate::cli_style as style;
10use serde::{Deserialize, Serialize};
11
12use crate::bundle::{
13 ConfidenceMethod, FindingBundle, VALID_ASSERTION_TYPES, VALID_ENTITY_TYPES,
14 VALID_EVIDENCE_TYPES, VALID_LINK_TYPES, VALID_PROVENANCE_SOURCE_TYPES,
15};
16use crate::lint;
17use crate::normalize;
18use crate::packet;
19use crate::repo;
20
21const VALID_EXTRACT_METHODS: &[&str] = &[
22 "llm_extraction",
23 "manual_curation",
24 "database_import",
25 "hybrid",
26 "notes_compiler_via_claude_cli",
31 "scout_via_claude_cli",
32 "artifact_to_state_import",
33];
34
35const VALID_LINK_INFERRED_BY: &[&str] = &["compiler", "reviewer", "author"];
36
37#[derive(Debug, Clone, PartialEq, Eq, Serialize, Deserialize)]
39pub struct ValidationError {
40 pub file: String,
41 pub error: String,
42}
43
44#[derive(Debug, Clone, PartialEq, Eq, Serialize, Deserialize)]
46pub struct ValidationReport {
47 pub total_files: usize,
48 pub valid: usize,
49 pub invalid: usize,
50 pub errors: Vec<ValidationError>,
51}
52
53#[derive(Debug, Clone, PartialEq, Eq, Serialize, Deserialize)]
54#[serde(rename_all = "snake_case")]
55pub enum Fixability {
56 Safe,
57 ManualReview,
58 NotFixable,
59}
60
61#[derive(Debug, Clone, PartialEq, Eq, Serialize, Deserialize)]
62pub struct QualityCheckOptions {
63 pub schema: bool,
64 pub lint: bool,
65 pub graph: bool,
66 pub repair_plan: bool,
67}
68
69impl Default for QualityCheckOptions {
70 fn default() -> Self {
71 Self {
72 schema: true,
73 lint: true,
74 graph: true,
75 repair_plan: true,
76 }
77 }
78}
79
80#[derive(Debug, Clone, PartialEq, Serialize, Deserialize)]
81pub struct QualityDiagnostic {
82 pub check_id: String,
83 pub severity: String,
84 pub rule_id: String,
85 #[serde(skip_serializing_if = "Option::is_none")]
86 pub finding_id: Option<String>,
87 #[serde(skip_serializing_if = "Option::is_none")]
88 pub file: Option<String>,
89 #[serde(skip_serializing_if = "Option::is_none")]
90 pub path: Option<String>,
91 pub message: String,
92 #[serde(skip_serializing_if = "Option::is_none")]
93 pub suggestion: Option<String>,
94 pub fixability: Fixability,
95}
96
97#[derive(Debug, Clone, PartialEq, Serialize, Deserialize)]
98pub struct QualityCheckSection {
99 pub id: String,
100 pub status: String,
101 pub checked: usize,
102 pub failed: usize,
103 pub diagnostics: Vec<QualityDiagnostic>,
104}
105
106#[derive(Debug, Clone, PartialEq, Eq, Serialize, Deserialize)]
107pub struct QualitySummary {
108 pub status: String,
109 pub checked_findings: usize,
110 pub valid_findings: usize,
111 pub invalid_findings: usize,
112 pub errors: usize,
113 pub warnings: usize,
114 pub info: usize,
115 pub safe_repairs: usize,
116}
117
118#[derive(Debug, Clone, PartialEq, Serialize, Deserialize)]
119pub struct RepairPlanItem {
120 pub id: String,
121 pub finding_id: String,
122 pub path: String,
123 pub action: String,
124 pub before: serde_json::Value,
125 pub after: serde_json::Value,
126 pub safe: bool,
127}
128
129#[derive(Debug, Clone, PartialEq, Serialize, Deserialize)]
130pub struct RepairPlan {
131 pub deterministic: bool,
132 pub safe_items: usize,
133 pub items: Vec<RepairPlanItem>,
134}
135
136#[derive(Debug, Clone, PartialEq, Serialize, Deserialize)]
137pub struct QualityCheckReport {
138 pub ok: bool,
139 pub command: String,
140 pub schema_version: String,
141 pub source: String,
142 pub source_kind: String,
143 pub summary: QualitySummary,
144 pub checks: Vec<QualityCheckSection>,
145 pub repair_plan: RepairPlan,
146}
147
148pub fn quality_report(source_path: &Path, options: QualityCheckOptions) -> QualityCheckReport {
153 let source = source_path.display().to_string();
154 let source_kind = repo::detect(source_path)
155 .map(|s| source_kind(&s).to_string())
156 .unwrap_or_else(|_| "unknown".to_string());
157
158 let validation = if options.schema {
159 validate(source_path)
160 } else {
161 ValidationReport {
162 total_files: 0,
163 valid: 0,
164 invalid: 0,
165 errors: Vec::new(),
166 }
167 };
168
169 let mut checks = Vec::new();
170 if options.schema {
171 checks.push(schema_section(&validation));
172 }
173
174 let mut repair_items = Vec::new();
175 let mut loaded_findings = None;
176 if let Ok(frontier) = repo::load_from_path(source_path) {
177 loaded_findings = Some(frontier.findings.len());
178 if options.lint {
179 checks.push(lint_section("lint", lint::lint(&frontier, None, None)));
180 }
181 if options.graph {
182 checks.push(lint_section("graph", lint::lint_frontier(&frontier)));
183 }
184 if options.repair_plan {
185 repair_items = normalize::plan_project_changes(&frontier)
186 .into_iter()
187 .enumerate()
188 .map(|(idx, change)| RepairPlanItem {
189 id: format!("repair_{:04}", idx + 1),
190 finding_id: change.finding_id,
191 path: change.path,
192 action: change.description,
193 before: change.before,
194 after: change.after,
195 safe: change.safe,
196 })
197 .collect();
198 }
199 } else if !options.schema {
200 checks.push(QualityCheckSection {
201 id: "load".to_string(),
202 status: "fail".to_string(),
203 checked: 0,
204 failed: 1,
205 diagnostics: vec![QualityDiagnostic {
206 check_id: "load".to_string(),
207 severity: "error".to_string(),
208 rule_id: "load".to_string(),
209 finding_id: None,
210 file: Some(source.clone()),
211 path: None,
212 message: "Failed to load frontier source".to_string(),
213 suggestion: Some(
214 "Provide a frontier JSON file, VelaRepo, or packet directory".to_string(),
215 ),
216 fixability: Fixability::ManualReview,
217 }],
218 });
219 }
220
221 let errors = checks
222 .iter()
223 .flat_map(|c| c.diagnostics.iter())
224 .filter(|d| d.severity == "error")
225 .count();
226 let warnings = checks
227 .iter()
228 .flat_map(|c| c.diagnostics.iter())
229 .filter(|d| d.severity == "warning")
230 .count();
231 let info = checks
232 .iter()
233 .flat_map(|c| c.diagnostics.iter())
234 .filter(|d| d.severity == "info")
235 .count();
236 let status = if errors > 0 {
237 "fail"
238 } else if warnings > 0 || info > 0 {
239 "warn"
240 } else {
241 "pass"
242 };
243 let safe_repairs = repair_items.iter().filter(|item| item.safe).count();
244
245 QualityCheckReport {
246 ok: errors == 0,
247 command: "check".to_string(),
248 schema_version: crate::project::VELA_SCHEMA_VERSION.to_string(),
249 source,
250 source_kind,
251 summary: QualitySummary {
252 status: status.to_string(),
253 checked_findings: if options.schema {
254 validation.total_files
255 } else {
256 loaded_findings.unwrap_or(0)
257 },
258 valid_findings: if options.schema {
259 validation.valid
260 } else {
261 loaded_findings.unwrap_or(0)
262 },
263 invalid_findings: if options.schema {
264 validation.invalid
265 } else {
266 errors
267 },
268 errors,
269 warnings,
270 info,
271 safe_repairs,
272 },
273 checks,
274 repair_plan: RepairPlan {
275 deterministic: true,
276 safe_items: safe_repairs,
277 items: repair_items,
278 },
279 }
280}
281
282pub fn quality_report_json(
283 source_path: &Path,
284 options: QualityCheckOptions,
285) -> Result<String, serde_json::Error> {
286 serde_json::to_string_pretty(&quality_report(source_path, options))
287}
288
289fn schema_section(report: &ValidationReport) -> QualityCheckSection {
290 let diagnostics = report
291 .errors
292 .iter()
293 .map(|error| QualityDiagnostic {
294 check_id: "schema".to_string(),
295 severity: "error".to_string(),
296 rule_id: schema_rule_id(&error.error).to_string(),
297 finding_id: if error.file.starts_with("vf_") {
298 Some(error.file.clone())
299 } else {
300 None
301 },
302 file: Some(error.file.clone()),
303 path: None,
304 message: error.error.clone(),
305 suggestion: schema_suggestion(&error.error).map(str::to_string),
306 fixability: schema_fixability(&error.error),
307 })
308 .collect::<Vec<_>>();
309
310 QualityCheckSection {
311 id: "schema".to_string(),
312 status: if diagnostics.is_empty() {
313 "pass".to_string()
314 } else {
315 "fail".to_string()
316 },
317 checked: report.total_files,
318 failed: report.invalid,
319 diagnostics,
320 }
321}
322
323fn lint_section(id: &str, report: lint::LintReport) -> QualityCheckSection {
324 let failed = report
325 .diagnostics
326 .iter()
327 .filter(|d| d.severity == lint::Severity::Error)
328 .count();
329 let diagnostics = report
330 .diagnostics
331 .into_iter()
332 .map(|diagnostic| QualityDiagnostic {
333 check_id: id.to_string(),
334 severity: diagnostic.severity.to_string(),
335 rule_id: diagnostic.rule_id.clone(),
336 finding_id: Some(diagnostic.finding_id),
337 file: None,
338 path: None,
339 message: diagnostic.message,
340 suggestion: Some(diagnostic.suggestion),
341 fixability: lint_fixability(&diagnostic.rule_id),
342 })
343 .collect::<Vec<_>>();
344
345 QualityCheckSection {
346 id: id.to_string(),
347 status: if failed > 0 {
348 "fail".to_string()
349 } else if diagnostics.is_empty() {
350 "pass".to_string()
351 } else {
352 "warn".to_string()
353 },
354 checked: report.findings_checked,
355 failed,
356 diagnostics,
357 }
358}
359
360fn schema_rule_id(message: &str) -> &'static str {
361 if message.contains("Invalid entity type") {
362 "schema.entity_type"
363 } else if message.contains("Invalid assertion type") {
364 "schema.assertion_type"
365 } else if message.contains("Invalid evidence type") {
366 "schema.evidence_type"
367 } else if message.contains("does not match content-address") {
368 "schema.content_address"
369 } else if message.contains("Duplicate finding ID") {
370 "schema.duplicate_id"
371 } else if message.contains("does not exist in frontier") {
372 "schema.link_target"
373 } else if message.contains("not RFC3339") {
374 "schema.timestamp"
375 } else if message.contains("Project stats.") {
376 "schema.project_stats"
377 } else if message.contains("Packet validation failed") {
378 "schema.packet"
379 } else if message.contains("Failed to load") {
380 "schema.load"
381 } else {
382 "schema"
383 }
384}
385
386fn schema_suggestion(message: &str) -> Option<&'static str> {
387 if message.contains("Invalid entity type") {
388 Some("Run the normalization plan/apply API to map entity types to schema vocabulary")
389 } else if message.contains("Project stats.") {
390 Some("Reassemble or resave the frontier after applying content changes")
391 } else if message.contains("does not match content-address") {
392 Some(
393 "Recompute finding IDs and update dependent links only after reviewing the identity change",
394 )
395 } else if message.contains("does not exist in frontier") {
396 Some("Remove the broken link or add the missing target finding")
397 } else {
398 None
399 }
400}
401
402fn schema_fixability(message: &str) -> Fixability {
403 if message.contains("Invalid entity type") {
404 Fixability::Safe
405 } else if message.contains("Packet validation failed") || message.contains("Failed to load") {
406 Fixability::NotFixable
407 } else {
408 Fixability::ManualReview
409 }
410}
411
412fn lint_fixability(rule_id: &str) -> Fixability {
413 match rule_id {
414 "orphan"
415 | "missing_crossref"
416 | "unresolved_contradiction"
417 | "critical_gap"
418 | "fragile_anchor"
419 | "stale_superseded"
420 | "L001"
421 | "L002"
422 | "L003"
423 | "L004"
424 | "L005"
425 | "L006"
426 | "L007"
427 | "L008"
428 | "L009"
429 | "L010" => Fixability::ManualReview,
430 _ => Fixability::NotFixable,
431 }
432}
433
434fn source_kind(source: &repo::VelaSource) -> &'static str {
435 match source {
436 repo::VelaSource::ProjectFile(_) => "project_file",
437 repo::VelaSource::VelaRepo(_) => "vela_repo",
438 repo::VelaSource::PacketDir(_) => "packet_dir",
439 }
440}
441
442pub fn validate(source_path: &Path) -> ValidationReport {
444 let source_label = source_path.display().to_string();
445 let frontier = match repo::load_from_path(source_path) {
446 Ok(c) => c,
447 Err(e) => {
448 return ValidationReport {
449 total_files: 0,
450 valid: 0,
451 invalid: 0,
452 errors: vec![ValidationError {
453 file: source_path.display().to_string(),
454 error: format!("Failed to load: {e}"),
455 }],
456 };
457 }
458 };
459
460 let mut errors: Vec<ValidationError> = Vec::new();
461 let mut seen_ids: HashSet<String> = HashSet::new();
462 let all_ids: HashSet<String> = frontier.findings.iter().map(|f| f.id.clone()).collect();
463 let declared_deps: HashSet<String> = frontier
466 .cross_frontier_deps()
467 .filter_map(|d| d.vfr_id.clone())
468 .collect();
469
470 if matches!(
471 repo::detect(source_path),
472 Ok(repo::VelaSource::PacketDir(_))
473 ) && let Err(packet_err) = packet::validate(source_path)
474 {
475 errors.push(ValidationError {
476 file: source_label.clone(),
477 error: format!("Packet validation failed: {packet_err}"),
478 });
479 }
480
481 validate_project_metadata(&frontier, source_path, &mut errors);
482
483 for dep in frontier.cross_frontier_deps() {
487 let Some(vfr) = &dep.vfr_id else { continue };
488 if dep.locator.as_deref().unwrap_or("").is_empty() {
489 errors.push(ValidationError {
490 file: source_label.clone(),
491 error: format!("Cross-frontier dependency '{vfr}' is missing 'locator'"),
492 });
493 }
494 if dep.pinned_snapshot_hash.as_deref().unwrap_or("").is_empty() {
495 errors.push(ValidationError {
496 file: source_label.clone(),
497 error: format!(
498 "Cross-frontier dependency '{vfr}' is missing 'pinned_snapshot_hash'"
499 ),
500 });
501 }
502 }
503
504 for finding in &frontier.findings {
505 let file_label = &finding.id;
506 validate_finding(
507 finding,
508 file_label,
509 &all_ids,
510 &declared_deps,
511 &mut seen_ids,
512 &mut errors,
513 );
514 }
515
516 let invalid_count = errors.iter().map(|e| &e.file).collect::<HashSet<_>>().len();
517 let valid_count = frontier.findings.len().saturating_sub(invalid_count);
518
519 ValidationReport {
520 total_files: frontier.findings.len(),
521 valid: valid_count,
522 invalid: invalid_count,
523 errors,
524 }
525}
526
527fn validate_finding(
528 finding: &FindingBundle,
529 file_label: &str,
530 all_ids: &HashSet<String>,
531 declared_deps: &HashSet<String>,
532 seen_ids: &mut HashSet<String>,
533 errors: &mut Vec<ValidationError>,
534) {
535 let id_valid = finding.id.starts_with("vf_")
537 && finding.id.len() == 19
538 && finding.id[3..].chars().all(|c| c.is_ascii_hexdigit());
539 if !id_valid {
540 errors.push(ValidationError {
541 file: file_label.to_string(),
542 error: format!(
543 "Invalid ID format '{}': expected vf_ + 16 hex chars",
544 finding.id
545 ),
546 });
547 }
548
549 if !seen_ids.insert(finding.id.clone()) {
551 errors.push(ValidationError {
552 file: file_label.to_string(),
553 error: format!("Duplicate finding ID '{}'", finding.id),
554 });
555 }
556
557 if finding.assertion.text.is_empty() {
560 errors.push(ValidationError {
561 file: file_label.to_string(),
562 error: "Assertion text is empty".to_string(),
563 });
564 }
565
566 if finding.created.is_empty() {
567 errors.push(ValidationError {
568 file: file_label.to_string(),
569 error: "Created timestamp is empty".to_string(),
570 });
571 }
572 if !finding.created.is_empty() && DateTime::parse_from_rfc3339(&finding.created).is_err() {
573 errors.push(ValidationError {
574 file: file_label.to_string(),
575 error: format!("Created timestamp '{}' is not RFC3339", finding.created),
576 });
577 }
578 if let Some(updated) = &finding.updated
579 && !updated.is_empty()
580 && DateTime::parse_from_rfc3339(updated).is_err()
581 {
582 errors.push(ValidationError {
583 file: file_label.to_string(),
584 error: format!("Updated timestamp '{}' is not RFC3339", updated),
585 });
586 }
587
588 let expected_id = FindingBundle::content_address(&finding.assertion, &finding.provenance);
589 if finding.id != expected_id {
590 errors.push(ValidationError {
591 file: file_label.to_string(),
592 error: format!(
593 "Finding id '{}' does not match content-address '{}'",
594 finding.id, expected_id
595 ),
596 });
597 }
598
599 if !(0.0..=1.0).contains(&finding.confidence.score) {
601 errors.push(ValidationError {
602 file: file_label.to_string(),
603 error: format!(
604 "Confidence score {} is outside 0.0-1.0 range",
605 finding.confidence.score
606 ),
607 });
608 }
609
610 if !VALID_ASSERTION_TYPES.contains(&finding.assertion.assertion_type.as_str()) {
612 errors.push(ValidationError {
613 file: file_label.to_string(),
614 error: format!(
615 "Invalid assertion type '{}'. Valid: {}",
616 finding.assertion.assertion_type,
617 VALID_ASSERTION_TYPES.join(", "),
618 ),
619 });
620 }
621
622 if !VALID_EVIDENCE_TYPES.contains(&finding.evidence.evidence_type.as_str()) {
624 errors.push(ValidationError {
625 file: file_label.to_string(),
626 error: format!(
627 "Invalid evidence type '{}'. Valid: {}",
628 finding.evidence.evidence_type,
629 VALID_EVIDENCE_TYPES.join(", "),
630 ),
631 });
632 }
633
634 for entity in &finding.assertion.entities {
635 if !VALID_ENTITY_TYPES.contains(&entity.entity_type.as_str()) {
636 errors.push(ValidationError {
637 file: file_label.to_string(),
638 error: format!(
639 "Invalid entity type '{}' for entity '{}'. Valid: {}",
640 entity.entity_type,
641 entity.name,
642 VALID_ENTITY_TYPES.join(", "),
643 ),
644 });
645 }
646 }
647
648 if !VALID_PROVENANCE_SOURCE_TYPES.contains(&finding.provenance.source_type.as_str()) {
649 errors.push(ValidationError {
650 file: file_label.to_string(),
651 error: format!(
652 "Invalid source_type '{}'. Valid: {}",
653 finding.provenance.source_type,
654 VALID_PROVENANCE_SOURCE_TYPES.join(", "),
655 ),
656 });
657 }
658
659 if !VALID_EXTRACT_METHODS.contains(&finding.provenance.extraction.method.as_str()) {
660 errors.push(ValidationError {
661 file: file_label.to_string(),
662 error: format!(
663 "Invalid extraction method '{}'. Valid: {}",
664 finding.provenance.extraction.method,
665 VALID_EXTRACT_METHODS.join(", "),
666 ),
667 });
668 }
669
670 if finding.confidence.method == ConfidenceMethod::Computed
671 && finding.confidence.components.is_none()
672 {
673 errors.push(ValidationError {
674 file: file_label.to_string(),
675 error: "Computed confidence must include components".to_string(),
676 });
677 }
678
679 for link in &finding.links {
683 match crate::bundle::LinkRef::parse(&link.target) {
684 Err(e) => {
685 errors.push(ValidationError {
686 file: file_label.to_string(),
687 error: format!("Invalid link target '{}': {e}", link.target),
688 });
689 }
690 Ok(crate::bundle::LinkRef::Local { vf_id }) => {
691 let id_well_formed =
694 vf_id.len() == 19 && vf_id[3..].chars().all(|c| c.is_ascii_hexdigit());
695 if !id_well_formed {
696 errors.push(ValidationError {
697 file: file_label.to_string(),
698 error: format!("Invalid link target format '{}'", link.target),
699 });
700 } else if !all_ids.contains(&vf_id) {
701 errors.push(ValidationError {
702 file: file_label.to_string(),
703 error: format!("Link target '{}' does not exist in frontier", link.target),
704 });
705 }
706 }
707 Ok(crate::bundle::LinkRef::Cross { vf_id, vfr_id }) => {
708 let vf_well_formed =
714 vf_id.len() == 19 && vf_id[3..].chars().all(|c| c.is_ascii_hexdigit());
715 let vfr_well_formed =
716 vfr_id.len() == 20 && vfr_id[4..].chars().all(|c| c.is_ascii_hexdigit());
717 if !vf_well_formed {
718 errors.push(ValidationError {
719 file: file_label.to_string(),
720 error: format!(
721 "Invalid cross-frontier link target '{}': vf_ part must be 19 chars (vf_ + 16 hex)",
722 link.target
723 ),
724 });
725 }
726 if !vfr_well_formed {
727 errors.push(ValidationError {
728 file: file_label.to_string(),
729 error: format!(
730 "Invalid cross-frontier link target '{}': vfr_ part must be 20 chars (vfr_ + 16 hex)",
731 link.target
732 ),
733 });
734 }
735 if vfr_well_formed && !declared_deps.contains(&vfr_id) {
736 errors.push(ValidationError {
737 file: file_label.to_string(),
738 error: format!(
739 "Cross-frontier link target '{}' references undeclared dependency '{}'; add it via `vela frontier add-dep`",
740 link.target, vfr_id
741 ),
742 });
743 }
744 }
745 }
746 if link.created_at.is_empty() {
747 errors.push(ValidationError {
748 file: file_label.to_string(),
749 error: format!("Link created_at is empty for target '{}'", link.target),
750 });
751 } else if DateTime::parse_from_rfc3339(&link.created_at).is_err() {
752 errors.push(ValidationError {
753 file: file_label.to_string(),
754 error: format!("Link created_at '{}' is not RFC3339", link.created_at),
755 });
756 }
757 if !VALID_LINK_TYPES.contains(&link.link_type.as_str()) {
758 errors.push(ValidationError {
759 file: file_label.to_string(),
760 error: format!("Invalid link type '{}'", link.link_type),
761 });
762 }
763 if !VALID_LINK_INFERRED_BY.contains(&link.inferred_by.as_str()) {
764 errors.push(ValidationError {
765 file: file_label.to_string(),
766 error: format!("Invalid link inferred_by '{}'", link.inferred_by),
767 });
768 }
769 }
770}
771
772fn validate_project_metadata(
773 frontier: &crate::project::Project,
774 source_path: &Path,
775 errors: &mut Vec<ValidationError>,
776) {
777 const KNOWN_VELA_VERSIONS: &[&str] = &["0.8.0", "0.10.0"];
784 const KNOWN_SCHEMA_URLS: &[&str] = &[
785 "https://vela.science/schema/finding-bundle/v0.8.0",
786 "https://vela.science/schema/finding-bundle/v0.10.0",
787 ];
788 if !KNOWN_VELA_VERSIONS.contains(&frontier.vela_version.as_str()) {
789 errors.push(ValidationError {
790 file: source_path.display().to_string(),
791 error: format!(
792 "Unknown vela_version '{}': expected one of {}",
793 frontier.vela_version,
794 KNOWN_VELA_VERSIONS.join(", "),
795 ),
796 });
797 }
798 if !KNOWN_SCHEMA_URLS.contains(&frontier.schema.as_str()) {
799 errors.push(ValidationError {
800 file: source_path.display().to_string(),
801 error: format!(
802 "Unknown schema '{}': expected one of {}",
803 frontier.schema,
804 KNOWN_SCHEMA_URLS.join(", "),
805 ),
806 });
807 }
808 if !frontier.project.compiler.starts_with("vela/")
815 || frontier.project.compiler.len() <= "vela/".len()
816 {
817 errors.push(ValidationError {
818 file: source_path.display().to_string(),
819 error: format!(
820 "Invalid compiler '{}': expected 'vela/X.Y.Z'",
821 frontier.project.compiler,
822 ),
823 });
824 }
825 if frontier.project.compiled_at.is_empty() {
826 errors.push(ValidationError {
827 file: source_path.display().to_string(),
828 error: "Project compiled_at is empty".to_string(),
829 });
830 } else if DateTime::parse_from_rfc3339(&frontier.project.compiled_at).is_err() {
831 errors.push(ValidationError {
832 file: source_path.display().to_string(),
833 error: format!(
834 "Project compiled_at '{}' is not RFC3339",
835 frontier.project.compiled_at
836 ),
837 });
838 }
839
840 let expected_links: usize = frontier.findings.iter().map(|f| f.links.len()).sum();
841 if frontier.stats.findings != frontier.findings.len() {
842 errors.push(ValidationError {
843 file: source_path.display().to_string(),
844 error: format!(
845 "Project stats.findings {} does not match findings length {}",
846 frontier.stats.findings,
847 frontier.findings.len()
848 ),
849 });
850 }
851 if frontier.stats.links != expected_links {
852 errors.push(ValidationError {
853 file: source_path.display().to_string(),
854 error: format!(
855 "Project stats.links {} does not match aggregated links {}",
856 frontier.stats.links, expected_links
857 ),
858 });
859 }
860}
861
862pub fn run(source: &Path) {
864 let report = validate(source);
865
866 println!();
867 println!(" {}", "VELA · VALIDATE".dimmed());
868 println!(" {}", style::tick_row(60));
869 println!(" total findings: {}", report.total_files);
870 println!(
871 " valid: {}",
872 style::moss(report.valid.to_string())
873 );
874 println!(
875 " invalid: {}",
876 if report.invalid > 0 {
877 style::madder(report.invalid.to_string()).to_string()
878 } else {
879 report.invalid.to_string()
880 }
881 );
882
883 if !report.errors.is_empty() {
884 println!();
885 println!(" {}", "ERRORS".dimmed());
886 for err in &report.errors {
887 println!(
888 " {} {} · {}",
889 style::madder("-"),
890 err.file.dimmed(),
891 err.error
892 );
893 }
894 } else {
895 println!("\n {} all findings valid.", style::ok("ok"));
896 }
897
898 if report.invalid > 0 {
899 std::process::exit(1);
900 }
901}
902
903#[cfg(test)]
904mod tests {
905 use super::*;
906 use crate::bundle::*;
907 use crate::project;
908 use chrono::Utc;
909 use tempfile::TempDir;
910
911 fn make_valid_finding(seed: &str) -> FindingBundle {
912 let assertion = Assertion {
913 text: format!("Test assertion {}", seed),
914 assertion_type: "mechanism".into(),
915 entities: vec![],
916 relation: None,
917 direction: None,
918 causal_claim: None,
919 causal_evidence_grade: None,
920 };
921 let provenance = Provenance {
922 source_type: "published_paper".into(),
923 doi: Some(format!("10.0000/{}", seed)),
924 pmid: None,
925 pmc: None,
926 openalex_id: None,
927 url: None,
928 title: format!("Test {seed}"),
929 authors: vec![],
930 year: Some(2024),
931 journal: None,
932 license: None,
933 publisher: None,
934 funders: vec![],
935 extraction: Extraction {
936 method: "llm_extraction".into(),
937 model: None,
938 model_version: None,
939 extracted_at: "1970-01-01T00:00:00Z".to_string(),
940 extractor_version: "vela/0.2.0".to_string(),
941 },
942 review: None,
943 citation_count: None,
944 };
945 let mut finding = FindingBundle::new(
946 assertion,
947 Evidence {
948 evidence_type: "experimental".into(),
949 model_system: String::new(),
950 species: None,
951 method: String::new(),
952 sample_size: None,
953 effect_size: None,
954 p_value: None,
955 replicated: false,
956 replication_count: None,
957 evidence_spans: vec![],
958 },
959 Conditions {
960 text: String::new(),
961 species_verified: vec![],
962 species_unverified: vec![],
963 in_vitro: false,
964 in_vivo: false,
965 human_data: false,
966 clinical_trial: false,
967 concentration_range: None,
968 duration: None,
969 age_group: None,
970 cell_type: None,
971 },
972 Confidence::raw(0.85, "test", 0.9),
973 provenance,
974 Flags {
975 gap: false,
976 negative_space: false,
977 contested: false,
978 retracted: false,
979 declining: false,
980 gravity_well: false,
981 review_state: None,
982 superseded: false,
983 signature_threshold: None,
984 jointly_accepted: false,
985 },
986 );
987 finding.id = FindingBundle::content_address(&finding.assertion, &finding.provenance);
988 finding
989 }
990
991 fn write_frontier(dir: &Path, findings: Vec<FindingBundle>) -> std::path::PathBuf {
992 let c = project::assemble("test", findings, 1, 0, "Test");
993 let path = dir.join("test.json");
994 let json = serde_json::to_string_pretty(&c).unwrap();
995 std::fs::write(&path, json).unwrap();
996 path
997 }
998
999 fn write_project(dir: &Path, frontier: &project::Project) -> std::path::PathBuf {
1000 let path = dir.join("test.json");
1001 let json = serde_json::to_string_pretty(frontier).unwrap();
1002 std::fs::write(&path, json).unwrap();
1003 path
1004 }
1005
1006 #[test]
1007 fn valid_frontier_passes() {
1008 let tmp = TempDir::new().unwrap();
1009 let path = write_frontier(
1010 tmp.path(),
1011 vec![
1012 make_valid_finding("vf_0000000000000001"),
1013 make_valid_finding("vf_0000000000000002"),
1014 ],
1015 );
1016 let report = validate(&path);
1017 assert_eq!(report.total_files, 2);
1018 assert_eq!(report.valid, 2);
1019 assert_eq!(report.invalid, 0);
1020 assert!(report.errors.is_empty());
1021 }
1022
1023 #[test]
1024 fn project_metadata_validation() {
1025 let tmp = TempDir::new().unwrap();
1026 let mut c = project::assemble(
1027 "test",
1028 vec![make_valid_finding("vf_0000000000000001")],
1029 1,
1030 0,
1031 "Test",
1032 );
1033 c.vela_version = "0.1.0".into();
1034 let path = write_project(tmp.path(), &c);
1035 let report = validate(&path);
1036 assert!(
1037 report
1038 .errors
1039 .iter()
1040 .any(|e| e.error.contains("Unknown vela_version"))
1041 );
1042 }
1043
1044 #[test]
1045 fn invalid_provenance_source_type_detected() {
1046 let tmp = TempDir::new().unwrap();
1047 let mut f = make_valid_finding("vf_0000000000000001");
1048 f.provenance.source_type = "invalid_source".into();
1049 let path = write_frontier(tmp.path(), vec![f]);
1050 let report = validate(&path);
1051 assert!(
1052 report
1053 .errors
1054 .iter()
1055 .any(|e| e.error.contains("Invalid source_type"))
1056 );
1057 }
1058
1059 #[test]
1060 fn invalid_extraction_method_detected() {
1061 let tmp = TempDir::new().unwrap();
1062 let mut f = make_valid_finding("vf_0000000000000001");
1063 f.provenance.extraction.method = "invalid_method".into();
1064 let path = write_frontier(tmp.path(), vec![f]);
1065 let report = validate(&path);
1066 assert!(
1067 report
1068 .errors
1069 .iter()
1070 .any(|e| e.error.contains("Invalid extraction method"))
1071 );
1072 }
1073
1074 #[test]
1075 fn invalid_computed_confidence_components_detected() {
1076 let tmp = TempDir::new().unwrap();
1077 let mut f = make_valid_finding("vf_0000000000000001");
1078 f.confidence.method = ConfidenceMethod::Computed;
1079 f.confidence.components = None;
1080 let path = write_frontier(tmp.path(), vec![f]);
1081 let report = validate(&path);
1082 assert!(report.errors.iter().any(|e| {
1083 e.error
1084 .contains("Computed confidence must include components")
1085 }));
1086 }
1087
1088 #[test]
1089 fn invalid_content_address_detected() {
1090 let tmp = TempDir::new().unwrap();
1091 let mut f = make_valid_finding("vf_0000000000000001");
1092 f.id = "vf_0000000000000002".into();
1093 let path = write_frontier(tmp.path(), vec![f]);
1094 let report = validate(&path);
1095 assert!(
1096 report
1097 .errors
1098 .iter()
1099 .any(|e| e.error.contains("does not match content-address"))
1100 );
1101 }
1102
1103 #[test]
1104 fn invalid_link_type_detected() {
1105 let tmp = TempDir::new().unwrap();
1106 let mut f = make_valid_finding("vf_link_type");
1107 let target = f.id.clone();
1108 f.links.push(Link {
1109 target,
1110 link_type: "bad_type".into(),
1111 note: String::new(),
1112 inferred_by: "compiler".into(),
1113 created_at: Utc::now().to_rfc3339(),
1114 mechanism: None,
1115 });
1116 let path = write_frontier(tmp.path(), vec![f]);
1117 let report = validate(&path);
1118 assert!(
1119 report
1120 .errors
1121 .iter()
1122 .any(|e| e.error.contains("Invalid link type"))
1123 );
1124 }
1125
1126 #[test]
1127 fn invalid_id_format_detected() {
1128 let tmp = TempDir::new().unwrap();
1129 let mut f = make_valid_finding("bad_id");
1130 f.id = "bad_id".into();
1131 let path = write_frontier(tmp.path(), vec![f]);
1132 let report = validate(&path);
1133 assert!(report.invalid > 0);
1134 assert!(
1135 report
1136 .errors
1137 .iter()
1138 .any(|e| e.error.contains("Invalid ID format"))
1139 );
1140 }
1141
1142 #[test]
1143 fn invalid_confidence_detected() {
1144 let tmp = TempDir::new().unwrap();
1145 let mut f = make_valid_finding("vf_0000000000000001");
1146 f.confidence.score = 1.5;
1147 let path = write_frontier(tmp.path(), vec![f]);
1148 let report = validate(&path);
1149 assert!(
1150 report
1151 .errors
1152 .iter()
1153 .any(|e| e.error.contains("Confidence score"))
1154 );
1155 }
1156
1157 #[test]
1158 fn invalid_assertion_type_detected() {
1159 let tmp = TempDir::new().unwrap();
1160 let mut f = make_valid_finding("vf_0000000000000001");
1161 f.assertion.assertion_type = "bogus_type".into();
1162 let path = write_frontier(tmp.path(), vec![f]);
1163 let report = validate(&path);
1164 assert!(
1165 report
1166 .errors
1167 .iter()
1168 .any(|e| e.error.contains("Invalid assertion type"))
1169 );
1170 }
1171
1172 #[test]
1173 fn invalid_evidence_type_detected() {
1174 let tmp = TempDir::new().unwrap();
1175 let mut f = make_valid_finding("vf_0000000000000001");
1176 f.evidence.evidence_type = "anecdotal".into();
1177 let path = write_frontier(tmp.path(), vec![f]);
1178 let report = validate(&path);
1179 assert!(
1180 report
1181 .errors
1182 .iter()
1183 .any(|e| e.error.contains("Invalid evidence type"))
1184 );
1185 }
1186
1187 #[test]
1188 fn broken_link_target_detected() {
1189 let tmp = TempDir::new().unwrap();
1190 let mut f = make_valid_finding("vf_0000000000000001");
1191 f.links.push(Link {
1192 target: "vf_deadbeefdeadbeef".into(),
1193 link_type: "extends".into(),
1194 note: String::new(),
1195 inferred_by: "compiler".into(),
1196 created_at: Utc::now().to_rfc3339(),
1197 mechanism: None,
1198 });
1199 let path = write_frontier(tmp.path(), vec![f]);
1200 let report = validate(&path);
1201 assert!(
1202 report
1203 .errors
1204 .iter()
1205 .any(|e| e.error.contains("does not exist"))
1206 );
1207 }
1208
1209 #[test]
1210 fn duplicate_id_detected() {
1211 let tmp = TempDir::new().unwrap();
1212 let f1 = make_valid_finding("vf_0000000000000001");
1213 let f2 = make_valid_finding("vf_0000000000000001");
1214 let path = write_frontier(tmp.path(), vec![f1, f2]);
1215 let report = validate(&path);
1216 assert!(report.errors.iter().any(|e| e.error.contains("Duplicate")));
1217 }
1218
1219 #[test]
1220 fn invalid_entity_type_detected_and_marked_fixable() {
1221 let tmp = TempDir::new().unwrap();
1222 let mut f = make_valid_finding("vf_0000000000000001");
1223 f.assertion.entities.push(Entity {
1224 name: "BBB".into(),
1225 entity_type: "biological_barrier".into(),
1226 identifiers: serde_json::Map::new(),
1227 canonical_id: None,
1228 candidates: vec![],
1229 aliases: vec![],
1230 resolution_provenance: None,
1231 resolution_confidence: 1.0,
1232 resolution_method: None,
1233 species_context: None,
1234 needs_review: false,
1235 });
1236 f.id = FindingBundle::content_address(&f.assertion, &f.provenance);
1237 let path = write_frontier(tmp.path(), vec![f]);
1238
1239 let report = quality_report(&path, QualityCheckOptions::default());
1240
1241 assert!(
1242 report
1243 .checks
1244 .iter()
1245 .flat_map(|check| check.diagnostics.iter())
1246 .any(|diagnostic| diagnostic.rule_id == "schema.entity_type"
1247 && diagnostic.fixability == Fixability::Safe)
1248 );
1249 assert!(report.repair_plan.safe_items >= 2);
1250 }
1251
1252 #[test]
1253 fn quality_report_includes_schema_lint_and_graph_sections() {
1254 let tmp = TempDir::new().unwrap();
1255 let mut f = make_valid_finding("vf_0000000000000001");
1256 f.evidence.sample_size = Some("n=4".into());
1257 f.evidence.replicated = false;
1258 f.confidence.score = 0.9;
1259 f.id = FindingBundle::content_address(&f.assertion, &f.provenance);
1260 let path = write_frontier(tmp.path(), vec![f]);
1261
1262 let report = quality_report(&path, QualityCheckOptions::default());
1263
1264 assert!(report.checks.iter().any(|check| check.id == "schema"));
1265 assert!(report.checks.iter().any(|check| check.id == "lint"));
1266 assert!(report.checks.iter().any(|check| check.id == "graph"));
1267 assert!(
1268 report
1269 .checks
1270 .iter()
1271 .flat_map(|check| check.diagnostics.iter())
1272 .any(|diagnostic| diagnostic.rule_id == "L001")
1273 );
1274 assert!(
1275 report
1276 .checks
1277 .iter()
1278 .flat_map(|check| check.diagnostics.iter())
1279 .any(|diagnostic| diagnostic.rule_id == "orphan")
1280 );
1281 }
1282
1283 fn make_finding_with_link(seed: &str, target: &str) -> FindingBundle {
1286 let mut f = make_valid_finding(seed);
1287 f.links = vec![Link {
1288 target: target.to_string(),
1289 link_type: "extends".to_string(),
1290 note: String::new(),
1291 inferred_by: "compiler".to_string(),
1292 created_at: "2024-01-01T00:00:00Z".to_string(),
1293 mechanism: None,
1294 }];
1295 f
1296 }
1297
1298 #[test]
1299 fn cross_frontier_link_with_declared_dep_passes() {
1300 let tmp = TempDir::new().unwrap();
1301 let target_vfr = "vfr_0000000000000aaa";
1302 let f1 = make_valid_finding("vf_0000000000000001");
1303 let f2 = make_finding_with_link(
1304 "vf_0000000000000002",
1305 &format!("vf_0000000000000003@{target_vfr}"),
1306 );
1307 let mut c = project::assemble("test", vec![f1, f2], 1, 0, "Test");
1308 c.project.dependencies.push(project::ProjectDependency {
1309 name: "ext-frontier".into(),
1310 source: "vela.hub".into(),
1311 version: None,
1312 pinned_hash: None,
1313 vfr_id: Some(target_vfr.into()),
1314 locator: Some("https://example.test/ext.json".into()),
1315 pinned_snapshot_hash: Some("a".repeat(64)),
1316 });
1317 let path = write_project(tmp.path(), &c);
1318 let report = validate(&path);
1319 let cross_errors: Vec<_> = report
1320 .errors
1321 .iter()
1322 .filter(|e| e.error.contains("cross-frontier") || e.error.contains("undeclared"))
1323 .collect();
1324 assert!(
1325 cross_errors.is_empty(),
1326 "expected no cross-frontier errors, got: {cross_errors:?}",
1327 );
1328 }
1329
1330 #[test]
1331 fn cross_frontier_link_without_declared_dep_fails() {
1332 let tmp = TempDir::new().unwrap();
1333 let f = make_finding_with_link(
1334 "vf_0000000000000001",
1335 "vf_0000000000000002@vfr_0000000000000bbb",
1336 );
1337 let path = write_frontier(tmp.path(), vec![f]);
1338 let report = validate(&path);
1339 assert!(
1340 report
1341 .errors
1342 .iter()
1343 .any(|e| e.error.contains("undeclared dependency")),
1344 "expected undeclared-dep error, got: {:?}",
1345 report.errors
1346 );
1347 }
1348
1349 #[test]
1350 fn cross_frontier_dep_without_locator_or_snapshot_fails() {
1351 let tmp = TempDir::new().unwrap();
1352 let mut c = project::assemble(
1353 "test",
1354 vec![make_valid_finding("vf_0000000000000001")],
1355 1,
1356 0,
1357 "Test",
1358 );
1359 c.project.dependencies.push(project::ProjectDependency {
1360 name: "incomplete-dep".into(),
1361 source: "vela.hub".into(),
1362 version: None,
1363 pinned_hash: None,
1364 vfr_id: Some("vfr_0000000000000ccc".into()),
1365 locator: None,
1366 pinned_snapshot_hash: None,
1367 });
1368 let path = write_project(tmp.path(), &c);
1369 let report = validate(&path);
1370 assert!(
1371 report
1372 .errors
1373 .iter()
1374 .any(|e| e.error.contains("missing 'locator'")),
1375 "expected missing-locator error",
1376 );
1377 assert!(
1378 report
1379 .errors
1380 .iter()
1381 .any(|e| e.error.contains("missing 'pinned_snapshot_hash'")),
1382 "expected missing-snapshot error",
1383 );
1384 }
1385
1386 #[test]
1387 fn malformed_cross_frontier_link_target_fails() {
1388 let tmp = TempDir::new().unwrap();
1389 let f = make_finding_with_link("vf_0000000000000001", "vf_0000000000000002@vfr_too_short");
1391 let path = write_frontier(tmp.path(), vec![f]);
1392 let report = validate(&path);
1393 assert!(
1394 report
1395 .errors
1396 .iter()
1397 .any(|e| e.error.contains("vfr_ part must be 20 chars")),
1398 "expected malformed-vfr error, got: {:?}",
1399 report.errors
1400 );
1401 }
1402}