vela-protocol 0.77.0

Core library for the Vela scientific knowledge protocol: replayable frontier state, signed canonical events, and proof packets.
Documentation
//! v0.75: Carina-primitive JSON Schema validation.
//!
//! Vela ships hand-authored JSON Schema (draft-07) files for the
//! 14 Carina primitives. This module loads them at compile time
//! via `include_str!` and runs a small subset of draft-07
//! validation against an input value.
//!
//! Subset implemented (enough to validate every primitive in
//! `examples/carina-kernel/primitives.v0.3.json`):
//!
//! - `type` (object, string, number, array, null, or array-of-types).
//! - `required` (list of must-have property keys on objects).
//! - `properties` (recurse into named properties).
//! - `items` (recurse into array elements).
//! - `const` (string-equality pin; used on the `schema` discriminator).
//! - `enum` (string-equality membership).
//! - `pattern` (regex match on strings).
//! - `minLength` (string length floor).
//! - `minimum` / `maximum` (number bounds).
//!
//! `additionalProperties` and `$ref` are not implemented; the bundled
//! schemas do not use them. A future cycle can reach for the
//! `jsonschema` crate if the substrate grows enough complexity to
//! need draft-07 in full.
//!
//! The Rust event-payload validator at `events.rs::validate_event_payload`
//! stays authoritative for replay. The schemas here are the public
//! Carina spec deliverable; the conformance test in
//! `tests/carina_examples.rs` cross-checks the two.
//!
//! Doctrine: see Gowers (2026-05-08), `docs/CARINA.md` §v0.3, and
//! `docs/AI_ATTRIBUTION.md`.

use serde_json::Value;

/// Each primitive ships a `*.schema.json` file under
/// `crates/vela-protocol/embedded/carina-schemas/`. The strings are
/// embedded at compile time so `cargo publish` ships them.
const FINDING_SCHEMA: &str = include_str!("../embedded/carina-schemas/finding.schema.json");
const EVIDENCE_SCHEMA: &str = include_str!("../embedded/carina-schemas/evidence.schema.json");
const ARTIFACT_SCHEMA: &str = include_str!("../embedded/carina-schemas/artifact.schema.json");
const PROPOSAL_SCHEMA: &str = include_str!("../embedded/carina-schemas/proposal.schema.json");
const DIFF_SCHEMA: &str = include_str!("../embedded/carina-schemas/diff.schema.json");
const EVENT_SCHEMA: &str = include_str!("../embedded/carina-schemas/event.schema.json");
const ATTESTATION_SCHEMA: &str = include_str!("../embedded/carina-schemas/attestation.schema.json");
const QUESTION_SCHEMA: &str = include_str!("../embedded/carina-schemas/question.schema.json");
const PROTOCOL_SCHEMA: &str = include_str!("../embedded/carina-schemas/protocol.schema.json");
const EXPERIMENT_SCHEMA: &str = include_str!("../embedded/carina-schemas/experiment.schema.json");
const MECHANISM_SCHEMA: &str = include_str!("../embedded/carina-schemas/mechanism.schema.json");
const LINEAGE_SCHEMA: &str = include_str!("../embedded/carina-schemas/lineage.schema.json");
const CONFIDENCE_SCHEMA: &str = include_str!("../embedded/carina-schemas/confidence.schema.json");
const PROOF_SCHEMA: &str = include_str!("../embedded/carina-schemas/proof.schema.json");

/// The 14 Carina v0.3 primitives. The discriminator on each input
/// is a `schema` field of the form `"carina.<name>.v0.<minor>"`.
pub const PRIMITIVE_NAMES: &[&str] = &[
    "finding",
    "evidence",
    "artifact",
    "proposal",
    "diff",
    "event",
    "attestation",
    "question",
    "protocol",
    "experiment",
    "mechanism",
    "lineage",
    "confidence",
    "proof",
];

/// Look up the JSON Schema text for one primitive name.
pub fn schema_text(primitive: &str) -> Option<&'static str> {
    Some(match primitive {
        "finding" => FINDING_SCHEMA,
        "evidence" => EVIDENCE_SCHEMA,
        "artifact" => ARTIFACT_SCHEMA,
        "proposal" => PROPOSAL_SCHEMA,
        "diff" => DIFF_SCHEMA,
        "event" => EVENT_SCHEMA,
        "attestation" => ATTESTATION_SCHEMA,
        "question" => QUESTION_SCHEMA,
        "protocol" => PROTOCOL_SCHEMA,
        "experiment" => EXPERIMENT_SCHEMA,
        "mechanism" => MECHANISM_SCHEMA,
        "lineage" => LINEAGE_SCHEMA,
        "confidence" => CONFIDENCE_SCHEMA,
        "proof" => PROOF_SCHEMA,
        _ => return None,
    })
}

/// Detect which primitive a value claims to be by reading its
/// `schema` field. Accepts `carina.<name>.v0.X` for any minor
/// version. Returns `None` if the field is missing or unrecognized.
pub fn detect_primitive(value: &Value) -> Option<&'static str> {
    let tag = value.as_object()?.get("schema")?.as_str()?;
    let rest = tag.strip_prefix("carina.")?;
    let (name, _version) = rest.split_once('.')?;
    PRIMITIVE_NAMES
        .iter()
        .copied()
        .find(|candidate| *candidate == name)
}

/// Validate `value` against the primitive's JSON Schema, returning
/// either Ok(()) or a list of human-readable violation strings.
pub fn validate(primitive: &str, value: &Value) -> Result<(), Vec<String>> {
    let schema_str = schema_text(primitive)
        .ok_or_else(|| vec![format!("unknown Carina primitive '{primitive}'")])?;
    let schema: Value = serde_json::from_str(schema_str)
        .map_err(|e| vec![format!("schema {primitive}.schema.json is malformed: {e}")])?;
    let mut errors = Vec::new();
    walk(value, &schema, "", &mut errors);
    if errors.is_empty() {
        Ok(())
    } else {
        Err(errors)
    }
}

/// Auto-detect the primitive from `value.schema` and validate.
pub fn validate_auto(value: &Value) -> Result<&'static str, Vec<String>> {
    let primitive = detect_primitive(value).ok_or_else(|| {
        vec!["could not detect Carina primitive (input must carry a `schema: \"carina.<name>.v0.X\"` field)".to_string()]
    })?;
    validate(primitive, value)?;
    Ok(primitive)
}

fn walk(value: &Value, schema: &Value, path: &str, errors: &mut Vec<String>) {
    let Some(obj) = schema.as_object() else {
        return;
    };

    if let Some(t) = obj.get("type") {
        check_type(t, value, path, errors);
    }

    if let Some(c) = obj.get("const")
        && value != c
    {
        errors.push(format!(
            "{path}: expected const {c}, got {value}",
            path = path_or_root(path)
        ));
    }

    if let Some(Value::Array(variants)) = obj.get("enum")
        && !variants.iter().any(|v| v == value)
    {
        errors.push(format!(
            "{path}: value {value} not in enum {variants:?}",
            path = path_or_root(path)
        ));
    }

    if let Some(Value::String(s)) = value.is_string().then_some(value) {
        if let Some(Value::String(pat)) = obj.get("pattern")
            && let Ok(re) = regex::Regex::new(pat)
            && !re.is_match(s)
        {
            errors.push(format!(
                "{path}: string {s:?} does not match pattern /{pat}/",
                path = path_or_root(path)
            ));
        }
        if let Some(Value::Number(n)) = obj.get("minLength")
            && let Some(min) = n.as_u64()
            && (s.len() as u64) < min
        {
            errors.push(format!(
                "{path}: string length {} less than minLength {min}",
                s.len(),
            ));
        }
    }

    if let Some(n) = value.as_f64() {
        if let Some(min) = obj.get("minimum").and_then(Value::as_f64)
            && n < min
        {
            errors.push(format!("{path}: value {n} less than minimum {min}"));
        }
        if let Some(max) = obj.get("maximum").and_then(Value::as_f64)
            && n > max
        {
            errors.push(format!("{path}: value {n} greater than maximum {max}"));
        }
    }

    if let Some(props) = value.as_object() {
        if let Some(Value::Array(required)) = obj.get("required") {
            for key_value in required {
                if let Some(key) = key_value.as_str()
                    && !props.contains_key(key)
                {
                    errors.push(format!(
                        "{path}: missing required property '{key}'",
                        path = path_or_root(path)
                    ));
                }
            }
        }
        if let Some(Value::Object(prop_schemas)) = obj.get("properties") {
            for (key, sub_schema) in prop_schemas {
                if let Some(child) = props.get(key) {
                    let child_path = if path.is_empty() {
                        key.clone()
                    } else {
                        format!("{path}.{key}")
                    };
                    walk(child, sub_schema, &child_path, errors);
                }
            }
        }
    }

    if let Some(items) = value.as_array()
        && let Some(items_schema) = obj.get("items")
    {
        for (i, child) in items.iter().enumerate() {
            let child_path = format!("{path}[{i}]");
            walk(child, items_schema, &child_path, errors);
        }
    }
}

fn check_type(t: &Value, value: &Value, path: &str, errors: &mut Vec<String>) {
    let allowed: Vec<&str> = match t {
        Value::String(s) => vec![s.as_str()],
        Value::Array(arr) => arr.iter().filter_map(Value::as_str).collect(),
        _ => return,
    };
    let actual = match value {
        Value::Null => "null",
        Value::Bool(_) => "boolean",
        Value::Number(_) => "number",
        Value::String(_) => "string",
        Value::Array(_) => "array",
        Value::Object(_) => "object",
    };
    let ok = allowed.iter().any(|a| {
        *a == actual
            || (*a == "integer" && value.as_i64().is_some())
            || (*a == "number" && actual == "number")
    });
    if !ok {
        errors.push(format!(
            "{path}: expected type {allowed:?}, got {actual}",
            path = path_or_root(path)
        ));
    }
}

fn path_or_root(path: &str) -> &str {
    if path.is_empty() { "<root>" } else { path }
}

#[cfg(test)]
mod tests {
    use super::*;
    use serde_json::json;

    #[test]
    fn detect_primitive_reads_schema_tag() {
        let v = json!({"schema": "carina.finding.v0.3", "id": "vf_x"});
        assert_eq!(detect_primitive(&v), Some("finding"));

        let v = json!({"schema": "carina.proof.v0.3"});
        assert_eq!(detect_primitive(&v), Some("proof"));

        let v = json!({"schema": "carina.unknown.v0.3"});
        assert_eq!(detect_primitive(&v), None);

        let v = json!({"no_schema": true});
        assert_eq!(detect_primitive(&v), None);
    }

    #[test]
    fn validate_finding_passes_minimal_shape() {
        let f = json!({
            "schema": "carina.finding.v0.3",
            "id": "vf_abc",
            "assertion": {"text": "x", "type": "mechanistic"},
            "evidence_ids": ["ve_a"],
            "confidence": {"score": 0.5},
            "status": "proposed"
        });
        assert!(validate("finding", &f).is_ok());
    }

    #[test]
    fn validate_finding_rejects_missing_required() {
        let f = json!({
            "schema": "carina.finding.v0.3",
            "id": "vf_abc"
        });
        let err = validate("finding", &f).expect_err("missing required");
        assert!(err.iter().any(|m| m.contains("assertion")));
        assert!(err.iter().any(|m| m.contains("status")));
    }

    #[test]
    fn validate_proof_requires_full_v075_6_shape() {
        // v0.75.6 (Gowers): proof primitive must have all the
        // certification fields.
        let p = json!({
            "schema": "carina.proof.v0.3",
            "id": "vpf_a",
            "tool": "lean4",
            "tool_version": "4.7.0",
            "script_locator": "sha256:bbbb",
            "verifier_output_hash": "sha256:bbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbb",
            "verified_at": "2026-05-09T00:00:00Z",
            "target_finding_id": "vf_x"
        });
        assert!(validate("proof", &p).is_ok());

        let bad_tool = json!({
            "schema": "carina.proof.v0.3",
            "id": "vpf_a",
            "tool": "fancy_new_thing",
            "tool_version": "4.7.0",
            "script_locator": "sha256:bbbb",
            "verifier_output_hash": "sha256:bbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbb",
            "verified_at": "2026-05-09T00:00:00Z",
            "target_finding_id": "vf_x"
        });
        assert!(validate("proof", &bad_tool).is_err());
    }

    #[test]
    fn validate_event_pattern_pins_id_prefix() {
        let bad = json!({
            "schema": "carina.event.v0.3",
            "id": "evt_x",
            "kind": "finding.reviewed",
            "target": {"type": "finding", "id": "vf_x"},
            "actor": {"id": "a", "type": "human"},
            "timestamp": "2026-05-09T00:00:00Z"
        });
        assert!(validate("event", &bad).is_err());
    }

    #[test]
    fn validate_aggregate_primitives_v0_3() {
        // The bundled examples/carina-kernel/primitives.v0.3.json
        // must validate end-to-end against the bundled schemas.
        let path = std::path::PathBuf::from(env!("CARGO_MANIFEST_DIR"))
            .join("../../examples/carina-kernel/primitives.v0.3.json");
        let text = std::fs::read_to_string(&path).expect("read v0.3 primitives example");
        let value: Value = serde_json::from_str(&text).expect("parse v0.3 primitives example");
        let primitives = value
            .get("primitives")
            .and_then(Value::as_object)
            .expect("aggregate has `primitives`");
        for (name, v) in primitives {
            validate(name, v).unwrap_or_else(|errs| panic!("primitive {name} failed: {:#?}", errs));
        }
        // All 14 primitives must be present.
        for name in PRIMITIVE_NAMES {
            assert!(
                primitives.contains_key(*name),
                "v0.3 example missing primitive {name}"
            );
        }
    }

    #[test]
    fn schema_text_covers_all_primitives() {
        for name in PRIMITIVE_NAMES {
            let s = schema_text(name).expect(name);
            // Each schema must be valid JSON.
            let _: Value = serde_json::from_str(s).expect(name);
        }
    }
}