Skip to main content

vela_protocol/
conformance.rs

1//! Conformance test runner — validates an implementation against test vectors.
2//!
3//! Reads JSON test vector files from a directory and runs each case against
4//! Vela's actual implementation, reporting pass/fail for each.
5
6use std::collections::HashSet;
7use std::path::Path;
8
9use colored::Colorize;
10
11use crate::cli_style as style;
12
13use crate::bundle::*;
14use crate::confidence;
15use crate::link;
16use crate::observer;
17use crate::project;
18use crate::propagate::{self, PropagationAction};
19
20/// Run all conformance test vectors in the given directory.
21/// Returns (passed, failed) counts.
22pub fn run(dir: &Path) -> (usize, usize) {
23    let mut passed = 0usize;
24    let mut failed = 0usize;
25
26    let mut entries: Vec<_> = std::fs::read_dir(dir)
27        .unwrap_or_else(|e| {
28            eprintln!(
29                "{} failed to read directory {}: {e}",
30                style::err_prefix(),
31                dir.display()
32            );
33            std::process::exit(1);
34        })
35        .filter_map(|e| e.ok())
36        .filter(|e| e.path().extension().is_some_and(|ext| ext == "json"))
37        .collect();
38    entries.sort_by_key(|e| e.path());
39
40    if entries.is_empty() {
41        eprintln!("no .json test vector files found in {}", dir.display());
42        std::process::exit(1);
43    }
44
45    for entry in &entries {
46        let path = entry.path();
47        let content = std::fs::read_to_string(&path).unwrap_or_else(|e| {
48            eprintln!(
49                "{} failed to read {}: {e}",
50                style::err_prefix(),
51                path.display()
52            );
53            std::process::exit(1);
54        });
55        let suite: serde_json::Value = serde_json::from_str(&content).unwrap_or_else(|e| {
56            eprintln!(
57                "{} failed to parse {}: {e}",
58                style::err_prefix(),
59                path.display()
60            );
61            std::process::exit(1);
62        });
63
64        let suite_name = suite["suite"].as_str().unwrap_or("unknown");
65        let cases = suite["cases"].as_array();
66
67        if cases.is_none() {
68            eprintln!("  no cases found in {}", path.display());
69            continue;
70        }
71
72        println!();
73        println!(
74            "  {}",
75            format!("SUITE · {suite_name}").to_uppercase().dimmed()
76        );
77        println!("  {}", style::tick_row(60));
78
79        for case in cases.unwrap() {
80            let name = case["name"].as_str().unwrap_or("unnamed");
81            let input = &case["input"];
82            let expected = &case["expected_output"];
83
84            let result = match suite_name {
85                "id-generation" => run_id_generation(input, expected),
86                "link-inference" => run_link_inference(input, expected),
87                "confidence-scoring" => run_confidence_scoring(input, expected),
88                "retraction-propagation" => run_retraction_propagation(input, expected),
89                "replication-cascade" => run_retraction_propagation(input, expected),
90                "observer-policies" => run_observer_policies(input, expected),
91                "directory-layout" => run_directory_layout(input, expected),
92                "proposal-idempotency" => run_proposal_idempotency(input, expected),
93                "note-provenance" => run_proposal_idempotency(input, expected),
94                "registry-publish-pull" => run_registry_publish_pull(input, expected),
95                "auto-apply-tier" => run_auto_apply_tier(input, expected),
96                _ => {
97                    eprintln!("  {} unknown suite: {suite_name}", style::err_prefix());
98                    Err("unknown suite".into())
99                }
100            };
101
102            match result {
103                Ok(()) => {
104                    println!("  {} {name}", style::ok("ok"));
105                    passed += 1;
106                }
107                Err(msg) => {
108                    println!("  {} {name}: {msg}", style::lost("lost"));
109                    failed += 1;
110                }
111            }
112        }
113    }
114
115    println!();
116    if failed == 0 {
117        println!(
118            "  {} all {passed} conformance tests passed.",
119            style::ok("ok")
120        );
121    } else {
122        println!(
123            "  {} {passed} passed, {failed} failed.",
124            style::lost("lost")
125        );
126    }
127
128    (passed, failed)
129}
130
131// ── ID generation ───────────────────────────────────────────────────────
132
133fn run_id_generation(
134    input: &serde_json::Value,
135    expected: &serde_json::Value,
136) -> Result<(), String> {
137    let assertion: Assertion = serde_json::from_value(input["assertion"].clone())
138        .map_err(|e| format!("parse assertion: {e}"))?;
139    let evidence: Evidence = serde_json::from_value(input["evidence"].clone())
140        .map_err(|e| format!("parse evidence: {e}"))?;
141    let conditions: Conditions = serde_json::from_value(input["conditions"].clone())
142        .map_err(|e| format!("parse conditions: {e}"))?;
143    let confidence: Confidence = serde_json::from_value(input["confidence"].clone())
144        .map_err(|e| format!("parse confidence: {e}"))?;
145    let provenance: Provenance = serde_json::from_value(input["provenance"].clone())
146        .map_err(|e| format!("parse provenance: {e}"))?;
147
148    let flags = if input.get("flags").is_some() {
149        serde_json::from_value(input["flags"].clone()).unwrap_or_else(|_| default_flags())
150    } else {
151        default_flags()
152    };
153
154    let bundle = FindingBundle::new(
155        assertion, evidence, conditions, confidence, provenance, flags,
156    );
157
158    let expected_id = expected["id"].as_str().ok_or("missing expected id")?;
159    if bundle.id != expected_id {
160        return Err(format!("expected {expected_id}, got {}", bundle.id));
161    }
162
163    if let Some(len) = expected["id_length"].as_u64()
164        && bundle.id.len() != len as usize
165    {
166        return Err(format!("expected id length {len}, got {}", bundle.id.len()));
167    }
168
169    if let Some(prefix) = expected["prefix"].as_str()
170        && !bundle.id.starts_with(prefix)
171    {
172        return Err(format!("expected prefix {prefix}, got {}", &bundle.id[..3]));
173    }
174
175    Ok(())
176}
177
178// ── Link inference ──────────────────────────────────────────────────────
179
180fn make_test_finding(v: &serde_json::Value) -> FindingBundle {
181    let id = v["id"].as_str().unwrap_or("unknown").to_string();
182    let direction = v["direction"].as_str().map(|s| s.to_string());
183    let doi = v["doi"].as_str().map(|s| s.to_string());
184    let year = v["year"].as_i64().unwrap_or(2020) as i32;
185    let conf = v["confidence"].as_f64().unwrap_or(0.7);
186
187    let entities: Vec<Entity> = v["entities"]
188        .as_array()
189        .map(|arr| {
190            arr.iter()
191                .map(|e| {
192                    let aliases: Vec<String> = e["aliases"]
193                        .as_array()
194                        .map(|a| {
195                            a.iter()
196                                .filter_map(|s| s.as_str().map(|s| s.to_string()))
197                                .collect()
198                        })
199                        .unwrap_or_default();
200                    Entity {
201                        name: e["name"].as_str().unwrap_or("").to_string(),
202                        entity_type: e["type"].as_str().unwrap_or("other").to_string(),
203                        identifiers: serde_json::Map::new(),
204                        canonical_id: None,
205                        candidates: vec![],
206                        aliases,
207                        resolution_provenance: None,
208                        resolution_confidence: 1.0,
209                        resolution_method: None,
210                        species_context: None,
211                        needs_review: false,
212                    }
213                })
214                .collect()
215        })
216        .unwrap_or_default();
217
218    FindingBundle {
219        id,
220        version: 1,
221        previous_version: None,
222        assertion: Assertion {
223            text: "Test finding".to_string(),
224            assertion_type: "mechanism".into(),
225            entities,
226            relation: None,
227            direction,
228            causal_claim: None,
229            causal_evidence_grade: None,
230        },
231        evidence: Evidence {
232            evidence_type: "experimental".into(),
233            model_system: String::new(),
234            species: None,
235            method: String::new(),
236            sample_size: None,
237            effect_size: None,
238            p_value: None,
239            replicated: false,
240            replication_count: None,
241            evidence_spans: vec![],
242        },
243        conditions: Conditions {
244            text: String::new(),
245            species_verified: vec![],
246            species_unverified: vec![],
247            in_vitro: false,
248            in_vivo: false,
249            human_data: false,
250            clinical_trial: false,
251            concentration_range: None,
252            duration: None,
253            age_group: None,
254            cell_type: None,
255        },
256        confidence: Confidence::raw(conf, "test", 0.85),
257        provenance: Provenance {
258            source_type: "published_paper".into(),
259            doi,
260            pmid: None,
261            pmc: None,
262            openalex_id: None,
263            url: None,
264            title: "Test".into(),
265            authors: vec![],
266            year: Some(year),
267            journal: None,
268            license: None,
269            publisher: None,
270            funders: vec![],
271            extraction: Extraction::default(),
272            review: None,
273            citation_count: None,
274        },
275        flags: default_flags(),
276        links: vec![],
277        annotations: vec![],
278        attachments: vec![],
279        created: String::new(),
280        updated: None,
281        access_tier: crate::access_tier::AccessTier::Public,
282    }
283}
284
285fn run_link_inference(
286    input: &serde_json::Value,
287    expected: &serde_json::Value,
288) -> Result<(), String> {
289    let findings_val = input["findings"]
290        .as_array()
291        .ok_or("missing findings array")?;
292
293    let mut bundles: Vec<FindingBundle> = findings_val.iter().map(make_test_finding).collect();
294    let count = link::deterministic_links(&mut bundles);
295
296    let expected_count = expected["link_count"].as_u64().unwrap_or(0) as usize;
297    if count != expected_count {
298        return Err(format!("expected {expected_count} links, got {count}"));
299    }
300
301    if let Some(expected_links) = expected["links"].as_array() {
302        for el in expected_links {
303            let from_id = el["from"].as_str().unwrap_or("");
304            let to_id = el["to"].as_str().unwrap_or("");
305            let link_type = el["type"].as_str().unwrap_or("");
306            let inferred_by = el["inferred_by"].as_str().unwrap_or("");
307
308            let found = bundles.iter().any(|b| {
309                b.id == from_id
310                    && b.links.iter().any(|l| {
311                        l.target == to_id
312                            && l.link_type == link_type
313                            && (inferred_by.is_empty() || l.inferred_by == inferred_by)
314                    })
315            });
316
317            if !found {
318                return Err(format!(
319                    "expected link {from_id} -> {to_id} ({link_type}) not found"
320                ));
321            }
322
323            // Check note_contains if present.
324            if let Some(note_contains) = el["note_contains"].as_str() {
325                let has_note = bundles.iter().any(|b| {
326                    b.id == from_id
327                        && b.links
328                            .iter()
329                            .any(|l| l.target == to_id && l.note.contains(note_contains))
330                });
331                if !has_note {
332                    return Err(format!(
333                        "link {from_id} -> {to_id} note does not contain '{note_contains}'"
334                    ));
335                }
336            }
337        }
338    }
339
340    Ok(())
341}
342
343// ── Confidence scoring ──────────────────────────────────────────────────
344
345fn make_confidence_bundle(v: &serde_json::Value) -> FindingBundle {
346    let score = v
347        .get("seed_score")
348        .and_then(|value| value.as_f64())
349        .or_else(|| v.get("llm_score").and_then(|value| value.as_f64()))
350        .unwrap_or(0.7);
351    let citations = v["citation_count"].as_u64().unwrap_or(0);
352    let year = v["year"].as_i64().unwrap_or(2020) as i32;
353    let etype = v["evidence_type"].as_str().unwrap_or("experimental");
354    let human = v["human_data"].as_bool().unwrap_or(false);
355    let has_spans = v["has_evidence_spans"].as_bool().unwrap_or(false);
356
357    let bundle = FindingBundle {
358        id: "test".into(),
359        version: 1,
360        previous_version: None,
361        assertion: Assertion {
362            text: "Test".into(),
363            assertion_type: "mechanism".into(),
364            entities: vec![],
365            relation: None,
366            direction: None,
367            causal_claim: None,
368            causal_evidence_grade: None,
369        },
370        evidence: Evidence {
371            evidence_type: etype.into(),
372            model_system: String::new(),
373            species: None,
374            method: String::new(),
375            sample_size: None,
376            effect_size: None,
377            p_value: None,
378            replicated: false,
379            replication_count: None,
380            evidence_spans: if has_spans {
381                vec![serde_json::json!({"text": "span"})]
382            } else {
383                vec![]
384            },
385        },
386        conditions: Conditions {
387            text: String::new(),
388            species_verified: vec![],
389            species_unverified: vec![],
390            in_vitro: false,
391            in_vivo: false,
392            human_data: human,
393            clinical_trial: false,
394            concentration_range: None,
395            duration: None,
396            age_group: None,
397            cell_type: None,
398        },
399        confidence: Confidence::raw(score, "seeded prior", 0.85),
400        provenance: Provenance {
401            source_type: "published_paper".into(),
402            doi: None,
403            pmid: None,
404            pmc: None,
405            openalex_id: None,
406            url: None,
407            title: "Test".into(),
408            authors: vec![],
409            year: Some(year),
410            journal: None,
411            license: None,
412            publisher: None,
413            funders: vec![],
414            extraction: Extraction::default(),
415            review: None,
416            citation_count: Some(citations),
417        },
418        flags: default_flags(),
419        links: vec![],
420        annotations: vec![],
421        attachments: vec![],
422        created: String::new(),
423        updated: None,
424        access_tier: crate::access_tier::AccessTier::Public,
425    };
426
427    #[allow(clippy::let_and_return)]
428    bundle
429}
430
431fn run_confidence_scoring(
432    input: &serde_json::Value,
433    expected: &serde_json::Value,
434) -> Result<(), String> {
435    // Check if this is a comparison test.
436    if input["comparison"].as_bool().unwrap_or(false) {
437        let mut bundle_a = make_confidence_bundle(&input["finding_a"]);
438        let mut bundle_b = make_confidence_bundle(&input["finding_b"]);
439        bundle_a.id = "a".into();
440        bundle_b.id = "b".into();
441        let mut bundles = vec![bundle_a, bundle_b];
442        confidence::ground_confidence(&mut bundles);
443
444        if expected["a_higher_than_b"].as_bool().unwrap_or(false)
445            && bundles[0].confidence.score <= bundles[1].confidence.score
446        {
447            return Err(format!(
448                "expected a ({:.3}) > b ({:.3})",
449                bundles[0].confidence.score, bundles[1].confidence.score
450            ));
451        }
452        return Ok(());
453    }
454
455    let bundle = make_confidence_bundle(input);
456    let mut bundles = vec![bundle];
457    confidence::ground_confidence(&mut bundles);
458    let score = bundles[0].confidence.score;
459
460    if let Some(range) = expected["score_range"].as_array() {
461        let lo = range[0].as_f64().unwrap_or(0.0);
462        let hi = range[1].as_f64().unwrap_or(1.0);
463        if score < lo || score > hi {
464            return Err(format!("score {score:.3} not in range [{lo}, {hi}]"));
465        }
466    }
467
468    if let Some(floor) = expected["score_at_least"].as_f64()
469        && score < floor
470    {
471        return Err(format!("score {score:.3} below floor {floor}"));
472    }
473
474    if let Some(ceil) = expected["score_at_most"].as_f64()
475        && score > ceil
476    {
477        return Err(format!("score {score:.3} above ceiling {ceil}"));
478    }
479
480    if let Some(lower) = expected["score_lower_than"].as_f64()
481        && score >= lower
482    {
483        return Err(format!("expected score < {lower}, got {score:.3}"));
484    }
485
486    if let Some(higher) = expected["score_higher_than"].as_f64()
487        && score <= higher
488    {
489        return Err(format!("expected score > {higher}, got {score:.3}"));
490    }
491
492    Ok(())
493}
494
495// ── Simulated dependency impact ─────────────────────────────────────────
496
497fn run_retraction_propagation(
498    input: &serde_json::Value,
499    expected: &serde_json::Value,
500) -> Result<(), String> {
501    let findings_val = input["findings"]
502        .as_array()
503        .ok_or("missing findings array")?;
504
505    let bundles: Vec<FindingBundle> = findings_val
506        .iter()
507        .map(|v| {
508            let id = v["id"].as_str().unwrap_or("").to_string();
509            let conf = v["confidence"].as_f64().unwrap_or(0.7);
510            let links: Vec<Link> = v["links"]
511                .as_array()
512                .map(|arr| {
513                    arr.iter()
514                        .map(|l| Link {
515                            target: l["target"].as_str().unwrap_or("").to_string(),
516                            link_type: l["type"].as_str().unwrap_or("depends").to_string(),
517                            note: String::new(),
518                            inferred_by: "test".into(),
519                            created_at: String::new(),
520                            mechanism: None,
521                        })
522                        .collect()
523                })
524                .unwrap_or_default();
525
526            FindingBundle {
527                id,
528                version: 1,
529                previous_version: None,
530                assertion: Assertion {
531                    text: "Test".into(),
532                    assertion_type: "mechanism".into(),
533                    entities: vec![],
534                    relation: None,
535                    direction: None,
536                    causal_claim: None,
537                    causal_evidence_grade: None,
538                },
539                evidence: Evidence {
540                    evidence_type: "experimental".into(),
541                    model_system: String::new(),
542                    species: None,
543                    method: String::new(),
544                    sample_size: None,
545                    effect_size: None,
546                    p_value: None,
547                    replicated: false,
548                    replication_count: None,
549                    evidence_spans: vec![],
550                },
551                conditions: Conditions {
552                    text: String::new(),
553                    species_verified: vec![],
554                    species_unverified: vec![],
555                    in_vitro: false,
556                    in_vivo: false,
557                    human_data: false,
558                    clinical_trial: false,
559                    concentration_range: None,
560                    duration: None,
561                    age_group: None,
562                    cell_type: None,
563                },
564                confidence: Confidence::raw(conf, "test", 0.85),
565                provenance: Provenance {
566                    source_type: "published_paper".into(),
567                    doi: None,
568                    pmid: None,
569                    pmc: None,
570                    openalex_id: None,
571                    url: None,
572                    title: "Test".into(),
573                    authors: vec![],
574                    year: Some(2025),
575                    journal: None,
576                    license: None,
577                    publisher: None,
578                    funders: vec![],
579                    extraction: Extraction::default(),
580                    review: None,
581                    citation_count: None,
582                },
583                flags: default_flags(),
584                links,
585                annotations: vec![],
586                attachments: vec![],
587                created: String::new(),
588                updated: None,
589                access_tier: crate::access_tier::AccessTier::Public,
590            }
591        })
592        .collect();
593
594    let action_val = &input["action"];
595    let finding_id = action_val["finding_id"].as_str().unwrap_or("");
596    let action_type = action_val["type"].as_str().unwrap_or("");
597
598    let mut corr = project::assemble("test", bundles, 1, 0, "test");
599
600    // v0.36.2: ingest optional `replications` array so the harness can
601    // exercise the `ReplicationOutcome` cascade. Each entry produces a
602    // canonical `Replication` whose target field aligns with a finding
603    // id from the case input.
604    if let Some(reps) = input["replications"].as_array() {
605        for r in reps {
606            let target = r["target_finding"].as_str().unwrap_or("").to_string();
607            let outcome = r["outcome"].as_str().unwrap_or("").to_string();
608            let attempted_by = r["attempted_by"].as_str().unwrap_or("test:lab").to_string();
609            corr.replications.push(crate::bundle::Replication {
610                id: format!("vrep_test_{}_{}", target.replace("vf_", ""), outcome),
611                target_finding: target,
612                attempted_by,
613                outcome,
614                evidence: Evidence {
615                    evidence_type: "experimental".into(),
616                    model_system: String::new(),
617                    species: None,
618                    method: "replication_attempt".into(),
619                    sample_size: None,
620                    effect_size: None,
621                    p_value: None,
622                    replicated: false,
623                    replication_count: None,
624                    evidence_spans: vec![],
625                },
626                conditions: Conditions {
627                    text: String::new(),
628                    species_verified: vec![],
629                    species_unverified: vec![],
630                    in_vitro: false,
631                    in_vivo: false,
632                    human_data: false,
633                    clinical_trial: false,
634                    concentration_range: None,
635                    duration: None,
636                    age_group: None,
637                    cell_type: None,
638                },
639                provenance: Provenance {
640                    source_type: "published_paper".into(),
641                    doi: None,
642                    pmid: None,
643                    pmc: None,
644                    openalex_id: None,
645                    url: None,
646                    title: "test".into(),
647                    authors: vec![],
648                    year: None,
649                    journal: None,
650                    license: None,
651                    publisher: None,
652                    funders: vec![],
653                    extraction: Extraction::default(),
654                    review: None,
655                    citation_count: None,
656                },
657                notes: String::new(),
658                created: String::new(),
659                previous_attempt: None,
660            });
661        }
662    }
663
664    let action = match action_type {
665        "retracted" => PropagationAction::Retracted,
666        "confidence_reduced" => {
667            let new_score = action_val["new_score"].as_f64().unwrap_or(0.5);
668            PropagationAction::ConfidenceReduced { new_score }
669        }
670        "replication_outcome" => {
671            let outcome = action_val["outcome"]
672                .as_str()
673                .unwrap_or("replicated")
674                .to_string();
675            let vrep_id = action_val["vrep_id"]
676                .as_str()
677                .unwrap_or("vrep_testxxxx")
678                .to_string();
679            PropagationAction::ReplicationOutcome { outcome, vrep_id }
680        }
681        _ => return Err(format!("unknown action type: {action_type}")),
682    };
683
684    let result = propagate::propagate_correction(&mut corr, finding_id, action);
685
686    if let Some(retracted) = expected["source_retracted"].as_bool()
687        && retracted
688    {
689        let source = corr.findings.iter().find(|f| f.id == finding_id);
690        if let Some(s) = source
691            && !s.flags.retracted
692        {
693            return Err("source finding not marked as retracted".into());
694        }
695    }
696
697    if let Some(count) = expected["affected_count"].as_u64()
698        && result.affected != count as usize
699    {
700        return Err(format!(
701            "expected {count} affected, got {}",
702            result.affected
703        ));
704    }
705
706    if let Some(max) = expected["affected_at_most"].as_u64()
707        && result.affected > max as usize
708    {
709        return Err(format!(
710            "expected at most {max} affected, got {}",
711            result.affected
712        ));
713    }
714
715    if let Some(conf) = expected["source_confidence"].as_f64() {
716        let source = corr.findings.iter().find(|f| f.id == finding_id);
717        if let Some(s) = source
718            && (s.confidence.score - conf).abs() > 0.001
719        {
720            return Err(format!(
721                "expected source confidence {conf}, got {}",
722                s.confidence.score
723            ));
724        }
725    }
726
727    if let Some(expected_flags) = expected["flag_types"].as_array() {
728        let actual_flags: Vec<String> = result
729            .events
730            .iter()
731            .filter_map(|e| match &e.action {
732                crate::bundle::ReviewAction::Flagged { flag_type } => Some(flag_type.clone()),
733                _ => None,
734            })
735            .collect();
736        for ef in expected_flags {
737            let want = ef.as_str().unwrap_or("");
738            if !actual_flags.iter().any(|a| a == want) {
739                return Err(format!(
740                    "expected flag '{want}' not found in events: {actual_flags:?}"
741                ));
742            }
743        }
744    }
745
746    if let Some(contested) = expected["contested_findings"].as_array() {
747        for cid in contested {
748            let cid_str = cid.as_str().unwrap_or("");
749            let f = corr.findings.iter().find(|f| f.id == cid_str);
750            if let Some(f) = f {
751                if !f.flags.contested {
752                    return Err(format!("finding {cid_str} not marked as contested"));
753                }
754            } else {
755                return Err(format!("finding {cid_str} not found"));
756            }
757        }
758    }
759
760    Ok(())
761}
762
763// ── Observer policies ───────────────────────────────────────────────────
764
765fn make_observer_finding(v: &serde_json::Value) -> FindingBundle {
766    let id = v["id"].as_str().unwrap_or("").to_string();
767    let conf = v["confidence"].as_f64().unwrap_or(0.7);
768    let clinical_trial = v["clinical_trial"].as_bool().unwrap_or(false);
769    let human_data = v["human_data"].as_bool().unwrap_or(false);
770    let replicated = v["replicated"].as_bool().unwrap_or(false);
771    let year = v["year"].as_i64().unwrap_or(2020) as i32;
772    let citations = v["citation_count"].as_u64().unwrap_or(0);
773    let has_spans = v["has_spans"].as_bool().unwrap_or(false);
774    let assertion_type = v["assertion_type"]
775        .as_str()
776        .unwrap_or("mechanism")
777        .to_string();
778    let gap = v["gap"].as_bool().unwrap_or(false);
779    let negative_space = v["negative_space"].as_bool().unwrap_or(false);
780
781    FindingBundle {
782        id,
783        version: 1,
784        previous_version: None,
785        assertion: Assertion {
786            text: "Test assertion".to_string(),
787            assertion_type,
788            entities: vec![],
789            relation: None,
790            direction: None,
791            causal_claim: None,
792            causal_evidence_grade: None,
793        },
794        evidence: Evidence {
795            evidence_type: "experimental".into(),
796            model_system: String::new(),
797            species: None,
798            method: String::new(),
799            sample_size: None,
800            effect_size: None,
801            p_value: None,
802            replicated,
803            replication_count: None,
804            evidence_spans: if has_spans {
805                vec![serde_json::json!({"text": "span"})]
806            } else {
807                vec![]
808            },
809        },
810        conditions: Conditions {
811            text: String::new(),
812            species_verified: vec![],
813            species_unverified: vec![],
814            in_vitro: false,
815            in_vivo: false,
816            human_data,
817            clinical_trial,
818            concentration_range: None,
819            duration: None,
820            age_group: None,
821            cell_type: None,
822        },
823        confidence: Confidence::raw(conf, "test", 0.85),
824        provenance: Provenance {
825            source_type: "published_paper".into(),
826            doi: None,
827            pmid: None,
828            pmc: None,
829            openalex_id: None,
830            url: None,
831            title: "Test".into(),
832            authors: vec![],
833            year: Some(year),
834            journal: None,
835            license: None,
836            publisher: None,
837            funders: vec![],
838            extraction: Extraction::default(),
839            review: None,
840            citation_count: Some(citations),
841        },
842        flags: Flags {
843            gap,
844            negative_space,
845            ..Flags::default()
846        },
847        links: vec![],
848        annotations: vec![],
849        attachments: vec![],
850        created: String::new(),
851        updated: None,
852        access_tier: crate::access_tier::AccessTier::Public,
853    }
854}
855
856fn run_observer_policies(
857    input: &serde_json::Value,
858    expected: &serde_json::Value,
859) -> Result<(), String> {
860    let policy_name = input["policy"].as_str().ok_or("missing policy name")?;
861    let findings_val = input["findings"]
862        .as_array()
863        .ok_or("missing findings array")?;
864
865    let bundles: Vec<FindingBundle> = findings_val.iter().map(make_observer_finding).collect();
866
867    let policy = observer::policy_by_name(policy_name)
868        .ok_or_else(|| format!("unknown policy: {policy_name}"))?;
869
870    let view = observer::observe(&bundles, &[], &policy);
871
872    if let Some(count) = expected["hidden_count"].as_u64()
873        && view.hidden != count as usize
874    {
875        return Err(format!("expected {count} hidden, got {}", view.hidden));
876    }
877
878    if let Some(all_visible) = expected["all_visible"].as_bool()
879        && all_visible
880        && view.hidden != 0
881    {
882        return Err(format!("expected all visible, got {} hidden", view.hidden));
883    }
884
885    if let Some(ranking) = expected["ranking"].as_array() {
886        let view_ids: Vec<&str> = view
887            .findings
888            .iter()
889            .map(|f| f.finding_id.as_str())
890            .collect();
891        for (i, expected_id) in ranking.iter().enumerate() {
892            let eid = expected_id.as_str().unwrap_or("");
893            if i >= view_ids.len() {
894                return Err(format!(
895                    "expected rank {} to be {eid}, but only {} visible",
896                    i + 1,
897                    view_ids.len()
898                ));
899            }
900            if view_ids[i] != eid {
901                return Err(format!(
902                    "expected rank {} to be {eid}, got {}",
903                    i + 1,
904                    view_ids[i]
905                ));
906            }
907        }
908    }
909
910    if let Some(hidden_ids) = expected["hidden_ids"].as_array() {
911        let visible: HashSet<&str> = view
912            .findings
913            .iter()
914            .map(|f| f.finding_id.as_str())
915            .collect();
916        for hid in hidden_ids {
917            let hid_str = hid.as_str().unwrap_or("");
918            if visible.contains(hid_str) {
919                return Err(format!("{hid_str} should be hidden but is visible"));
920            }
921        }
922    }
923
924    if let Some(true) = expected["f1_rank_better_than_f2"].as_bool() {
925        let f1_rank = view
926            .findings
927            .iter()
928            .find(|f| f.finding_id == "f1")
929            .map(|f| f.rank);
930        let f2_rank = view
931            .findings
932            .iter()
933            .find(|f| f.finding_id == "f2")
934            .map(|f| f.rank);
935        match (f1_rank, f2_rank) {
936            (Some(r1), Some(r2)) if r1 < r2 => {}
937            (Some(r1), Some(r2)) => {
938                return Err(format!("expected f1 (rank {r1}) < f2 (rank {r2})"));
939            }
940            _ => return Err("f1 or f2 not found in visible findings".into()),
941        }
942    }
943
944    Ok(())
945}
946
947// ── Directory layout ────────────────────────────────────────────────────
948
949fn run_directory_layout(
950    input: &serde_json::Value,
951    expected: &serde_json::Value,
952) -> Result<(), String> {
953    // This is a structural test — we verify the expected paths list is consistent
954    // with the inputs, not against the filesystem.
955    let finding_count = input["finding_count"].as_u64().unwrap_or(0) as usize;
956
957    if let Some(paths) = expected["required_paths"].as_array() {
958        // Must have .vela/config.toml
959        let has_config = paths
960            .iter()
961            .any(|p| p.as_str() == Some(".vela/config.toml"));
962        if !has_config {
963            return Err("required_paths missing .vela/config.toml".into());
964        }
965
966        for required in [".vela/findings/", ".vela/events/", ".vela/proposals/"] {
967            let present = paths.iter().any(|p| p.as_str() == Some(required));
968            if finding_count == 0 && !present {
969                return Err(format!("required_paths missing {required}"));
970            }
971        }
972
973        // Count finding files.
974        let finding_files: Vec<_> = paths
975            .iter()
976            .filter_map(|p| p.as_str())
977            .filter(|p| p.starts_with(".vela/findings/vf_"))
978            .collect();
979
980        if finding_files.len() != finding_count {
981            return Err(format!(
982                "expected {} finding files, got {}",
983                finding_count,
984                finding_files.len()
985            ));
986        }
987    }
988
989    if let Some(count) = expected["finding_file_count"].as_u64()
990        && count as usize != finding_count
991    {
992        return Err(format!(
993            "finding_file_count {count} != input finding_count {finding_count}"
994        ));
995    }
996
997    Ok(())
998}
999
1000// ── Helpers ─────────────────────────────────────────────────────────────
1001
1002fn default_flags() -> Flags {
1003    Flags::default()
1004}
1005
1006// ── Phase U (v0.5): proposal-idempotency suite ─────────────────────────
1007
1008fn run_proposal_idempotency(
1009    input: &serde_json::Value,
1010    expected: &serde_json::Value,
1011) -> Result<(), String> {
1012    use crate::proposals::{StateProposal, proposal_id};
1013    // Construct a proposal with a fixed `created_at`; the substrate
1014    // claim is that the resulting `vpr_…` does NOT depend on
1015    // `created_at`. To prove it, compute the id with two distinct
1016    // timestamps and assert equality.
1017    let mut proposal_a = StateProposal {
1018        schema: input["schema"].as_str().unwrap_or("").to_string(),
1019        id: String::new(),
1020        kind: input["kind"].as_str().unwrap_or("").to_string(),
1021        target: serde_json::from_value(input["target"].clone())
1022            .map_err(|e| format!("parse target: {e}"))?,
1023        actor: serde_json::from_value(input["actor"].clone())
1024            .map_err(|e| format!("parse actor: {e}"))?,
1025        created_at: "2026-01-01T00:00:00Z".to_string(),
1026        drafted_at: None,
1027        reason: input["reason"].as_str().unwrap_or("").to_string(),
1028        payload: input["payload"].clone(),
1029        source_refs: input["source_refs"]
1030            .as_array()
1031            .map(|arr| {
1032                arr.iter()
1033                    .filter_map(|v| v.as_str().map(String::from))
1034                    .collect()
1035            })
1036            .unwrap_or_default(),
1037        status: "pending_review".to_string(),
1038        reviewed_by: None,
1039        reviewed_at: None,
1040        decision_reason: None,
1041        applied_event_id: None,
1042        caveats: input["caveats"]
1043            .as_array()
1044            .map(|arr| {
1045                arr.iter()
1046                    .filter_map(|v| v.as_str().map(String::from))
1047                    .collect()
1048            })
1049            .unwrap_or_default(),
1050        agent_run: None,
1051    };
1052    let id_a = proposal_id(&proposal_a);
1053
1054    let mut proposal_b = proposal_a.clone();
1055    proposal_b.created_at = "2099-12-31T23:59:59Z".to_string();
1056    let id_b = proposal_id(&proposal_b);
1057
1058    if id_a != id_b {
1059        return Err(format!(
1060            "proposal_id depends on created_at: {id_a} vs {id_b}"
1061        ));
1062    }
1063
1064    proposal_a.id = id_a.clone();
1065    let prefix = expected["prefix"].as_str().unwrap_or("vpr_");
1066    if !proposal_a.id.starts_with(prefix) {
1067        return Err(format!(
1068            "id '{}' does not start with '{prefix}'",
1069            proposal_a.id
1070        ));
1071    }
1072    if let Some(expected_len) = expected["id_length"].as_u64()
1073        && proposal_a.id.len() as u64 != expected_len
1074    {
1075        return Err(format!(
1076            "id length {} != expected {expected_len}",
1077            proposal_a.id.len()
1078        ));
1079    }
1080    if let Some(expected_id) = expected["id"].as_str()
1081        && proposal_a.id != expected_id
1082    {
1083        return Err(format!(
1084            "id '{}' != expected '{expected_id}'",
1085            proposal_a.id
1086        ));
1087    }
1088    Ok(())
1089}
1090
1091// ── Phase δ (v0.6): auto-apply-tier suite ──────────────────────────────
1092
1093fn run_auto_apply_tier(
1094    input: &serde_json::Value,
1095    expected: &serde_json::Value,
1096) -> Result<(), String> {
1097    use crate::sign::{ActorRecord, actor_can_auto_apply};
1098    let tier = input["tier"].as_str().map(String::from);
1099    let kind = input["kind"]
1100        .as_str()
1101        .ok_or("auto-apply-tier input missing `kind`")?;
1102    let actor = ActorRecord {
1103        id: "test".to_string(),
1104        public_key: "0".repeat(64),
1105        algorithm: "ed25519".to_string(),
1106        created_at: "2026-04-25T00:00:00Z".to_string(),
1107        tier,
1108        orcid: None,
1109        access_clearance: None,
1110    };
1111    let actual = actor_can_auto_apply(&actor, kind);
1112    let want = expected["permits"]
1113        .as_bool()
1114        .ok_or("auto-apply-tier expected.permits must be a boolean")?;
1115    if actual != want {
1116        return Err(format!(
1117            "actor_can_auto_apply(tier={:?}, kind={}) returned {}; expected {}",
1118            input["tier"], kind, actual, want
1119        ));
1120    }
1121    Ok(())
1122}
1123
1124// ── Phase U (v0.5): registry-publish-pull suite ────────────────────────
1125
1126fn run_registry_publish_pull(
1127    input: &serde_json::Value,
1128    expected: &serde_json::Value,
1129) -> Result<(), String> {
1130    use crate::registry::{RegistryEntry, entry_signing_bytes};
1131    use sha2::{Digest, Sha256};
1132    let entry: RegistryEntry = serde_json::from_value({
1133        let mut v = input.clone();
1134        v["signature"] = serde_json::Value::String(String::new());
1135        v
1136    })
1137    .map_err(|e| format!("parse entry: {e}"))?;
1138    let bytes = entry_signing_bytes(&entry)?;
1139    let actual_hash = hex::encode(Sha256::digest(&bytes));
1140    if let Some(expected_hash) = expected["preimage_sha256"].as_str()
1141        && actual_hash != expected_hash
1142    {
1143        return Err(format!(
1144            "canonical preimage sha256 mismatch: actual={actual_hash}, expected={expected_hash}"
1145        ));
1146    }
1147    Ok(())
1148}
1149
1150#[cfg(test)]
1151mod tests {
1152    use super::*;
1153
1154    #[test]
1155    fn default_flags_are_all_false() {
1156        let f = default_flags();
1157        assert!(!f.gap);
1158        assert!(!f.retracted);
1159        assert!(!f.contested);
1160    }
1161}