vela-protocol 0.108.0

Core library for the Vela scientific knowledge protocol: replayable frontier state, signed canonical events, and proof packets.
Documentation
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
//! Content-addressed schema/reducer artifacts.
//!
//! Implements the schema-artifact part of `docs/THEORY.md`
//! Section 5.1, where event tuples reference a content-addressed
//! `schema` field that pins the replay semantics:
//!
//! > schema = content-addressed schema and reducer reference
//!
//! And ยง5.5:
//!
//! > Schema and reducer artifacts are fixed by content hash.
//!
//! ## What this module ships
//!
//! - [`SchemaArtifact`]: a typed, content-addressed artifact whose
//!   id is the SHA-256 of its canonical content.
//! - [`SchemaRegistry`]: a registry mapping artifact id to artifact,
//!   used to verify that an event references a known schema before
//!   replay.
//! - Verification primitives that future event-replay code can call
//!   to check schema availability and detect schema drift.
//!
//! ## What this module does NOT do
//!
//! It does not yet replace the existing `StateEvent::schema: String`
//! version-tag field. That replacement is a wider substrate change
//! (target v0.85+) that ripples into canonicalization,
//! event-id derivation, and existing event-set hashes. This module
//! ships the artifact + registry primitive on which that
//! replacement will sit.

use std::collections::BTreeMap;

use serde::{Deserialize, Serialize};
use sha2::{Digest, Sha256};

/// A content-addressed schema or reducer artifact.
///
/// The `id` is derived from the canonical serialization of
/// `(name, version, body)`. Equal artifacts have equal ids;
/// different artifacts have ids that differ except with negligible
/// probability under SHA-256.
#[derive(Debug, Clone, PartialEq, Eq, Serialize, Deserialize)]
pub struct SchemaArtifact {
    /// Content-addressed id, prefixed with `vsa_` (Vela Schema
    /// Artifact). This is `H(canonical(name, version, body))`.
    pub id: String,
    /// Human-readable name (e.g. `vela.event.finding_asserted`).
    pub name: String,
    /// Semver-style version string (e.g. `v0.1`).
    pub version: String,
    /// Body of the artifact: the actual schema or reducer
    /// specification, kept as a JSON value to avoid committing to
    /// any one schema language at the substrate layer.
    pub body: serde_json::Value,
}

impl SchemaArtifact {
    /// Build a new artifact, computing the content-addressed id
    /// from the canonical serialization of `(name, version, body)`.
    pub fn new(
        name: impl Into<String>,
        version: impl Into<String>,
        body: serde_json::Value,
    ) -> Result<Self, String> {
        let name = name.into();
        let version = version.into();
        let id = Self::derive_id(&name, &version, &body)?;
        Ok(Self {
            id,
            name,
            version,
            body,
        })
    }

    /// Derive the content-addressed id without constructing an
    /// artifact. Useful for verifying that a stored artifact's id
    /// matches its content.
    pub fn derive_id(
        name: &str,
        version: &str,
        body: &serde_json::Value,
    ) -> Result<String, String> {
        // Canonical form: a JSON object with sorted keys
        // {body, name, version}. We use BTreeMap to enforce key
        // ordering. The body is a JSON value already, so we
        // canonicalize it via serde_json with sorted keys.
        let canonical = canonical_json(&serde_json::json!({
            "body": body,
            "name": name,
            "version": version,
        }))?;
        let mut hasher = Sha256::new();
        hasher.update(canonical.as_bytes());
        let hash = hasher.finalize();
        Ok(format!("vsa_{}", hex::encode(&hash[..16])))
    }

    /// Verify that this artifact's stored id matches the id derived
    /// from its content.
    pub fn verify_id(&self) -> Result<(), String> {
        let derived = Self::derive_id(&self.name, &self.version, &self.body)?;
        if derived == self.id {
            Ok(())
        } else {
            Err(format!(
                "schema artifact id mismatch: stored={}, derived={}",
                self.id, derived
            ))
        }
    }
}

/// Canonicalize a JSON value: sort all object keys recursively and
/// serialize without whitespace. This is the canonicalization
/// scheme used for content-addressing schema artifacts.
fn canonical_json(value: &serde_json::Value) -> Result<String, String> {
    fn canon(v: &serde_json::Value) -> serde_json::Value {
        match v {
            serde_json::Value::Object(map) => {
                let mut sorted: BTreeMap<String, serde_json::Value> = BTreeMap::new();
                for (k, vv) in map {
                    sorted.insert(k.clone(), canon(vv));
                }
                let mut out = serde_json::Map::new();
                for (k, vv) in sorted {
                    out.insert(k, vv);
                }
                serde_json::Value::Object(out)
            }
            serde_json::Value::Array(items) => {
                serde_json::Value::Array(items.iter().map(canon).collect())
            }
            other => other.clone(),
        }
    }
    serde_json::to_string(&canon(value)).map_err(|e| format!("canonicalize: {e}"))
}

/// A registry of known schema artifacts.
///
/// Replay code uses the registry to verify that every event's
/// schema reference points to an available artifact. Missing
/// artifacts mean the event cannot be replayed deterministically;
/// federation policy must fetch missing artifacts before replay
/// proceeds (analogous to the missing-ancestor case in ยง5.2).
#[derive(Debug, Clone, Default, PartialEq, Eq, Serialize, Deserialize)]
pub struct SchemaRegistry {
    artifacts: BTreeMap<String, SchemaArtifact>,
}

impl SchemaRegistry {
    /// Empty registry.
    #[must_use]
    pub fn empty() -> Self {
        Self::default()
    }

    /// Insert an artifact, verifying its id matches its content.
    ///
    /// Returns an error if the artifact's stored id does not match
    /// its derived id. This catches tampering at registration time
    /// rather than at replay time.
    pub fn insert(&mut self, artifact: SchemaArtifact) -> Result<(), String> {
        artifact.verify_id()?;
        self.artifacts.insert(artifact.id.clone(), artifact);
        Ok(())
    }

    /// Look up an artifact by id.
    pub fn get(&self, id: &str) -> Option<&SchemaArtifact> {
        self.artifacts.get(id)
    }

    /// Whether the registry contains an artifact with the given id.
    #[must_use]
    pub fn contains(&self, id: &str) -> bool {
        self.artifacts.contains_key(id)
    }

    /// Number of artifacts in the registry.
    #[must_use]
    pub fn len(&self) -> usize {
        self.artifacts.len()
    }

    /// Whether the registry is empty.
    #[must_use]
    pub fn is_empty(&self) -> bool {
        self.artifacts.is_empty()
    }

    /// All artifact ids in canonical (sorted) order.
    pub fn ids(&self) -> impl Iterator<Item = &str> {
        self.artifacts.keys().map(String::as_str)
    }

    /// Detect schema artifacts referenced by `referenced` but
    /// missing from the registry. Used at replay-time to determine
    /// whether replay can proceed.
    pub fn missing<I: AsRef<str>>(&self, referenced: &[I]) -> Vec<String> {
        referenced
            .iter()
            .filter(|id| !self.artifacts.contains_key(id.as_ref()))
            .map(|id| id.as_ref().to_string())
            .collect()
    }

    /// Inspect a slice of events and return any
    /// `schema_artifact_id` values that are not present in the
    /// registry. Events with `schema_artifact_id == None` are
    /// skipped (they predate the artifact-registry mechanism per
    /// docs/THEORY.md ยง5.1 and use the legacy string `schema`
    /// field).
    ///
    /// Returned ids are deduplicated and sorted lexically.
    pub fn unknown_event_artifacts(&self, events: &[crate::events::StateEvent]) -> Vec<String> {
        let mut seen = std::collections::BTreeSet::new();
        let mut missing = std::collections::BTreeSet::new();
        for ev in events {
            let Some(id) = ev.schema_artifact_id.as_deref() else {
                continue;
            };
            if seen.contains(id) {
                continue;
            }
            seen.insert(id.to_string());
            if !self.artifacts.contains_key(id) {
                missing.insert(id.to_string());
            }
        }
        missing.into_iter().collect()
    }
}

#[cfg(test)]
mod tests {
    use super::*;
    use serde_json::json;

    fn sample_body() -> serde_json::Value {
        json!({
            "type": "object",
            "required": ["finding_id", "actor"],
            "properties": {
                "finding_id": {"type": "string"},
                "actor":      {"type": "string"},
            }
        })
    }

    #[test]
    fn artifact_id_is_content_addressed() {
        let a = SchemaArtifact::new("event.finding_asserted", "v0.1", sample_body()).unwrap();
        let b = SchemaArtifact::new("event.finding_asserted", "v0.1", sample_body()).unwrap();
        assert_eq!(a.id, b.id);
        assert!(a.id.starts_with("vsa_"));
    }

    #[test]
    fn different_content_yields_different_ids() {
        let a = SchemaArtifact::new("event.finding_asserted", "v0.1", sample_body()).unwrap();
        let b = SchemaArtifact::new("event.finding_asserted", "v0.2", sample_body()).unwrap();
        assert_ne!(a.id, b.id);

        let mut other_body = sample_body();
        other_body["properties"]["new_field"] = json!({"type": "string"});
        let c = SchemaArtifact::new("event.finding_asserted", "v0.1", other_body).unwrap();
        assert_ne!(a.id, c.id);
    }

    #[test]
    fn verify_id_rejects_tampered_artifact() {
        let mut a = SchemaArtifact::new("event.x", "v0.1", json!({"k": "v"})).unwrap();
        assert!(a.verify_id().is_ok());
        // Tamper with the body but keep the id.
        a.body = json!({"k": "v2"});
        assert!(a.verify_id().is_err());
    }

    #[test]
    fn canonical_json_sorts_keys_recursively() {
        let unsorted = json!({"b": 1, "a": {"d": 4, "c": 3}});
        let sorted = json!({"a": {"c": 3, "d": 4}, "b": 1});
        assert_eq!(
            canonical_json(&unsorted).unwrap(),
            canonical_json(&sorted).unwrap()
        );
    }

    #[test]
    fn key_order_does_not_affect_id() {
        let body1 = json!({"a": 1, "b": 2});
        let body2 = json!({"b": 2, "a": 1});
        let a = SchemaArtifact::new("x", "v0.1", body1).unwrap();
        let b = SchemaArtifact::new("x", "v0.1", body2).unwrap();
        assert_eq!(a.id, b.id);
    }

    #[test]
    fn registry_insert_and_lookup() {
        let mut reg = SchemaRegistry::empty();
        let a = SchemaArtifact::new("event.x", "v0.1", sample_body()).unwrap();
        let id = a.id.clone();
        reg.insert(a).unwrap();
        assert!(reg.contains(&id));
        assert!(reg.get(&id).is_some());
        assert_eq!(reg.len(), 1);
    }

    #[test]
    fn registry_rejects_tampered_artifact_at_insert() {
        let mut reg = SchemaRegistry::empty();
        let mut a = SchemaArtifact::new("event.x", "v0.1", sample_body()).unwrap();
        // Tamper with body without updating id.
        a.body = json!({"different": "content"});
        let result = reg.insert(a);
        assert!(result.is_err());
        assert!(reg.is_empty());
    }

    #[test]
    fn missing_returns_unregistered_ids() {
        let mut reg = SchemaRegistry::empty();
        let a = SchemaArtifact::new("event.x", "v0.1", sample_body()).unwrap();
        let known = a.id.clone();
        reg.insert(a).unwrap();

        let referenced = vec![
            known.clone(),
            "vsa_unknown1".to_string(),
            "vsa_unknown2".to_string(),
        ];
        let missing = reg.missing(&referenced);
        assert_eq!(missing, vec!["vsa_unknown1", "vsa_unknown2"]);
    }

    #[test]
    fn missing_returns_empty_when_all_present() {
        let mut reg = SchemaRegistry::empty();
        let a = SchemaArtifact::new("event.x", "v0.1", sample_body()).unwrap();
        let id = a.id.clone();
        reg.insert(a).unwrap();
        assert!(reg.missing(&[id]).is_empty());
    }

    #[test]
    fn registry_serde_round_trip() {
        let mut reg = SchemaRegistry::empty();
        let a = SchemaArtifact::new("event.x", "v0.1", sample_body()).unwrap();
        let b = SchemaArtifact::new("event.y", "v0.2", json!({"different": true})).unwrap();
        reg.insert(a).unwrap();
        reg.insert(b).unwrap();

        let json = serde_json::to_string(&reg).unwrap();
        let restored: SchemaRegistry = serde_json::from_str(&json).unwrap();
        assert_eq!(restored, reg);
    }

    #[test]
    fn id_uses_vsa_prefix_and_hex() {
        let a = SchemaArtifact::new("x", "v0.1", json!({})).unwrap();
        assert!(a.id.starts_with("vsa_"));
        let hex_part = &a.id[4..];
        assert_eq!(hex_part.len(), 32); // 16 bytes * 2 hex chars
        assert!(hex_part.chars().all(|c| c.is_ascii_hexdigit()));
    }

    #[test]
    fn ids_are_returned_in_canonical_order() {
        let mut reg = SchemaRegistry::empty();
        // Insert in non-canonical order
        for n in ["zeta", "alpha", "beta"] {
            let a = SchemaArtifact::new(n, "v0.1", json!({"n": n})).unwrap();
            reg.insert(a).unwrap();
        }
        // Ids are returned sorted, regardless of insertion order
        let ids: Vec<&str> = reg.ids().collect();
        let mut sorted = ids.clone();
        sorted.sort();
        assert_eq!(ids, sorted);
    }

    fn sample_event(id_seed: &str, artifact: Option<&str>) -> crate::events::StateEvent {
        use crate::events::{StateActor, StateEvent, StateTarget};
        StateEvent {
            schema: "vela.event.v0.1".into(),
            id: format!("vev_{}", id_seed),
            kind: "test.event".into(),
            target: StateTarget {
                r#type: "finding".into(),
                id: "vf_x".into(),
            },
            actor: StateActor {
                id: "test".into(),
                r#type: "system".into(),
            },
            timestamp: "2026-05-09T00:00:00Z".into(),
            reason: "test".into(),
            before_hash: String::new(),
            after_hash: String::new(),
            payload: json!(null),
            caveats: vec![],
            signature: None,
            schema_artifact_id: artifact.map(String::from),
        }
    }

    #[test]
    fn unknown_event_artifacts_returns_only_missing_referenced_ids() {
        let mut reg = SchemaRegistry::empty();
        let known_artifact = SchemaArtifact::new("event.x", "v0.1", json!({})).unwrap();
        let known_id = known_artifact.id.clone();
        reg.insert(known_artifact).unwrap();

        let events = vec![
            // Event referencing a known artifact: not missing.
            sample_event("001", Some(&known_id)),
            // Event referencing an unknown artifact: missing.
            sample_event("002", Some("vsa_unknown")),
            // Event without an artifact reference: skipped.
            sample_event("003", None),
        ];
        let missing = reg.unknown_event_artifacts(&events);
        assert_eq!(missing, vec!["vsa_unknown"]);
    }

    #[test]
    fn unknown_event_artifacts_deduplicates() {
        let reg = SchemaRegistry::empty();
        let events = vec![
            sample_event("001", Some("vsa_missing")),
            sample_event("002", Some("vsa_missing")),
            sample_event("003", Some("vsa_missing")),
        ];
        let missing = reg.unknown_event_artifacts(&events);
        assert_eq!(missing, vec!["vsa_missing"]);
    }

    #[test]
    fn schema_artifact_id_does_not_affect_event_id() {
        // Critical invariant: setting schema_artifact_id must NOT
        // change event.id. Otherwise existing events would be
        // forced to migrate, breaking replay determinism on every
        // historical hub.
        use crate::events::compute_event_id;
        let without = sample_event("001", None);
        let with = sample_event("001", Some("vsa_someartifact"));
        // Compute fresh event ids from both (sample_event sets
        // a placeholder; compute_event_id ignores it and rederives).
        let id_without = compute_event_id(&without);
        let id_with = compute_event_id(&with);
        assert_eq!(
            id_without, id_with,
            "schema_artifact_id must not be part of canonical event-id preimage"
        );
    }

    #[test]
    fn pre_v0_89_events_serialize_byte_identically() {
        // An event with schema_artifact_id=None must serialize
        // without the new field, so pre-v0.89 frontiers round-trip.
        let event = sample_event("001", None);
        let json = serde_json::to_string(&event).unwrap();
        assert!(
            !json.contains("schema_artifact_id"),
            "schema_artifact_id should be skipped when None; full json: {json}"
        );
    }

    #[test]
    fn v0_89_event_with_artifact_includes_field() {
        let event = sample_event("001", Some("vsa_test"));
        let json = serde_json::to_string(&event).unwrap();
        assert!(
            json.contains("schema_artifact_id"),
            "schema_artifact_id should appear when Some"
        );
        assert!(
            json.contains("vsa_test"),
            "the artifact id value should appear"
        );
    }
}