Skip to main content

neleus_db/
commit.rs

1use anyhow::{Result, anyhow};
2use serde::{Deserialize, Serialize};
3
4use crate::canonical::to_cbor;
5use crate::clock::now_unix;
6use crate::hash::{Hash, hash_typed};
7use crate::object_store::ObjectStore;
8use crate::state::StateRoot;
9
10const COMMIT_TAG: &[u8] = b"commit:";
11const COMMIT_PAYLOAD_TAG: &[u8] = b"commit_payload:";
12const COMMIT_SCHEMA_VERSION: u32 = 1;
13
14pub type CommitHash = Hash;
15
16#[derive(Debug, Clone, PartialEq, Eq, Serialize, Deserialize)]
17pub struct CommitSignature {
18    pub scheme: String,
19    pub key_id: Option<String>,
20    pub signature: Vec<u8>,
21}
22
23#[derive(Debug, Clone, PartialEq, Eq, Serialize, Deserialize)]
24pub struct Commit {
25    pub schema_version: u32,
26    pub parents: Vec<CommitHash>,
27    pub timestamp: u64,
28    pub author: String,
29    pub message: String,
30    pub state_root: StateRoot,
31    pub manifests: Vec<Hash>,
32    #[serde(default)]
33    pub signature: Option<CommitSignature>,
34    /// `hash_typed(COMMIT_PAYLOAD_TAG, &to_cbor(&unsigned))` where `unsigned`
35    /// is this commit with `signature` and `payload_hash` both set to `None`.
36    /// Present only on signed commits; verifiers MUST re-derive and compare.
37    ///
38    /// `skip_serializing_if` keeps unsigned commits byte-identical to commits
39    /// produced by code that predates this field: `None` is encoded by
40    /// omitting the key rather than by an explicit `null`. Without this,
41    /// adding the field would change every existing unsigned commit's hash.
42    #[serde(default, skip_serializing_if = "Option::is_none")]
43    pub payload_hash: Option<Hash>,
44}
45
46impl Commit {
47    /// Compute the payload hash that a signer signs / a verifier checks.
48    /// Strips `signature` and `payload_hash` from `self` to reconstruct the
49    /// canonical unsigned form, then hashes its DAG-CBOR encoding.
50    pub fn unsigned_payload_hash(&self) -> Result<Hash> {
51        let unsigned = Commit {
52            signature: None,
53            payload_hash: None,
54            ..self.clone()
55        };
56        Ok(hash_typed(COMMIT_PAYLOAD_TAG, &to_cbor(&unsigned)?))
57    }
58}
59
60pub trait CommitVerifier {
61    /// `payload_hash` has already been re-derived from `commit` and confirmed
62    /// to match the value stored on the commit. Implementations should perform
63    /// only the cryptographic signature check against this hash.
64    fn verify(&self, commit_hash: CommitHash, commit: &Commit, payload_hash: Hash) -> Result<()>;
65}
66
67pub trait CommitSigner {
68    fn sign(&self, payload_hash: Hash, commit: &Commit) -> Result<CommitSignature>;
69}
70
71#[derive(Clone, Debug)]
72pub struct CommitStore {
73    objects: ObjectStore,
74}
75
76impl CommitStore {
77    pub fn new(objects: ObjectStore) -> Self {
78        Self { objects }
79    }
80
81    pub fn create_commit(
82        &self,
83        parents: Vec<CommitHash>,
84        state_root: StateRoot,
85        manifests: Vec<Hash>,
86        author: String,
87        message: String,
88    ) -> Result<CommitHash> {
89        self.validate_references(state_root, &manifests)?;
90        let commit = Commit {
91            schema_version: COMMIT_SCHEMA_VERSION,
92            parents,
93            timestamp: now_unix()?,
94            author,
95            message,
96            state_root,
97            manifests,
98            signature: None,
99            payload_hash: None,
100        };
101        self.objects.put_serialized(COMMIT_TAG, &commit)
102    }
103
104    pub fn create_signed_commit<S: CommitSigner>(
105        &self,
106        signer: &S,
107        parents: Vec<CommitHash>,
108        state_root: StateRoot,
109        manifests: Vec<Hash>,
110        author: String,
111        message: String,
112    ) -> Result<CommitHash> {
113        self.validate_references(state_root, &manifests)?;
114        let unsigned = Commit {
115            schema_version: COMMIT_SCHEMA_VERSION,
116            parents,
117            timestamp: now_unix()?,
118            author,
119            message,
120            state_root,
121            manifests,
122            signature: None,
123            payload_hash: None,
124        };
125        let payload_hash = unsigned.unsigned_payload_hash()?;
126        let signature = signer.sign(payload_hash, &unsigned)?;
127
128        let signed = Commit {
129            signature: Some(signature),
130            payload_hash: Some(payload_hash),
131            ..unsigned
132        };
133        self.objects.put_serialized(COMMIT_TAG, &signed)
134    }
135
136    pub fn get_commit(&self, hash: CommitHash) -> Result<Commit> {
137        let commit: Commit = self.objects.get_deserialized_typed(COMMIT_TAG, hash)?;
138        Ok(commit)
139    }
140
141    pub fn verify_commit_with<V: CommitVerifier>(
142        &self,
143        hash: CommitHash,
144        verifier: &V,
145    ) -> Result<()> {
146        let commit = self.get_commit(hash)?;
147        let stored = commit
148            .payload_hash
149            .ok_or_else(|| anyhow!("commit {} is not signed (missing payload_hash)", hash))?;
150        let expected = commit.unsigned_payload_hash()?;
151        if stored != expected {
152            return Err(anyhow!(
153                "commit {} payload_hash inconsistent with commit body",
154                hash
155            ));
156        }
157        verifier.verify(hash, &commit, expected)
158    }
159
160    fn validate_references(&self, state_root: StateRoot, manifests: &[Hash]) -> Result<()> {
161        if !self.objects.exists(state_root) {
162            return Err(anyhow!("state_root {} does not exist", state_root));
163        }
164        for manifest in manifests {
165            if !self.objects.exists(*manifest) {
166                return Err(anyhow!("manifest {} does not exist", manifest));
167            }
168        }
169        Ok(())
170    }
171}
172
173pub fn create_commit(
174    store: &CommitStore,
175    parents: Vec<CommitHash>,
176    state_root: StateRoot,
177    manifests: Vec<Hash>,
178    author: String,
179    message: String,
180) -> Result<CommitHash> {
181    store.create_commit(parents, state_root, manifests, author, message)
182}
183
184pub fn get_commit(store: &CommitStore, hash: CommitHash) -> Result<Commit> {
185    store.get_commit(hash)
186}
187
188#[cfg(test)]
189mod tests {
190    use tempfile::TempDir;
191
192    use super::*;
193    use crate::blob_store::BlobStore;
194    use crate::object_store::ObjectStore;
195    use crate::state::StateStore;
196    use crate::wal::Wal;
197
198    fn stores(tmp: &TempDir) -> (CommitStore, StateStore, BlobStore) {
199        let objects = ObjectStore::new(tmp.path().join("objects"));
200        objects.ensure_dir().unwrap();
201        let commit_store = CommitStore::new(objects.clone());
202
203        let blobs = BlobStore::new(tmp.path().join("blobs"));
204        blobs.ensure_dir().unwrap();
205
206        let state = StateStore::new(objects, blobs.clone(), Wal::new(tmp.path().join("wal")));
207        (commit_store, state, blobs)
208    }
209
210    /// Regression: adding `payload_hash` to `Commit` must NOT change the
211    /// CBOR encoding of an unsigned commit, because that would change every
212    /// existing commit hash on disk. `skip_serializing_if = "Option::is_none"`
213    /// on `payload_hash` keeps the unsigned shape byte-identical to commits
214    /// produced by code that predated the field.
215    #[test]
216    fn unsigned_commit_cbor_omits_payload_hash() {
217        #[derive(serde::Serialize)]
218        struct LegacyCommit {
219            schema_version: u32,
220            parents: Vec<CommitHash>,
221            timestamp: u64,
222            author: String,
223            message: String,
224            state_root: StateRoot,
225            manifests: Vec<Hash>,
226            signature: Option<CommitSignature>,
227        }
228
229        let state_root = hash_typed(b"any:", b"r");
230        let manifest = hash_typed(b"any:", b"m");
231
232        let new_form = Commit {
233            schema_version: COMMIT_SCHEMA_VERSION,
234            parents: vec![],
235            timestamp: 1700000000,
236            author: "agent".into(),
237            message: "msg".into(),
238            state_root,
239            manifests: vec![manifest],
240            signature: None,
241            payload_hash: None,
242        };
243
244        let legacy_form = LegacyCommit {
245            schema_version: COMMIT_SCHEMA_VERSION,
246            parents: vec![],
247            timestamp: 1700000000,
248            author: "agent".into(),
249            message: "msg".into(),
250            state_root,
251            manifests: vec![manifest],
252            signature: None,
253        };
254
255        let new_bytes = to_cbor(&new_form).unwrap();
256        let legacy_bytes = to_cbor(&legacy_form).unwrap();
257        assert_eq!(
258            new_bytes, legacy_bytes,
259            "unsigned commit encoding must match the pre-payload_hash shape"
260        );
261    }
262
263    #[test]
264    fn commit_create_get_roundtrip() {
265        let tmp = TempDir::new().unwrap();
266        let (cs, state, _) = stores(&tmp);
267        let root = state.empty_root().unwrap();
268        let h = cs
269            .create_commit(vec![], root, vec![], "agent".into(), "msg".into())
270            .unwrap();
271        let c = cs.get_commit(h).unwrap();
272        assert_eq!(c.author, "agent");
273        assert_eq!(c.message, "msg");
274        assert_eq!(c.schema_version, COMMIT_SCHEMA_VERSION);
275    }
276
277    #[test]
278    fn commit_hash_changes_with_message() {
279        let tmp = TempDir::new().unwrap();
280        let (cs, state, _) = stores(&tmp);
281        let root = state.empty_root().unwrap();
282        let a = cs
283            .create_commit(vec![], root, vec![], "a".into(), "m1".into())
284            .unwrap();
285        let b = cs
286            .create_commit(vec![], root, vec![], "a".into(), "m2".into())
287            .unwrap();
288        assert_ne!(a, b);
289    }
290
291    #[test]
292    fn commit_parent_reference_preserved() {
293        let tmp = TempDir::new().unwrap();
294        let (cs, state, _) = stores(&tmp);
295        let root = state.empty_root().unwrap();
296        let p = cs
297            .create_commit(vec![], root, vec![], "a".into(), "p".into())
298            .unwrap();
299        let c = cs
300            .create_commit(vec![p], root, vec![], "a".into(), "c".into())
301            .unwrap();
302        let out = cs.get_commit(c).unwrap();
303        assert_eq!(out.parents, vec![p]);
304    }
305
306    #[test]
307    fn commit_can_reference_manifests() {
308        let tmp = TempDir::new().unwrap();
309        let (cs, state, _blobs) = stores(&tmp);
310        let root = state.empty_root().unwrap();
311        let manifest_hash = state.set(root, b"manifest", b"ref").unwrap();
312        let c = cs
313            .create_commit(
314                vec![],
315                root,
316                vec![manifest_hash],
317                "agent".into(),
318                "with manifest".into(),
319            )
320            .unwrap();
321        let out = cs.get_commit(c).unwrap();
322        assert_eq!(out.manifests, vec![manifest_hash]);
323    }
324
325    #[test]
326    fn commit_timestamp_nonzero() {
327        let tmp = TempDir::new().unwrap();
328        let (cs, state, _) = stores(&tmp);
329        let root = state.empty_root().unwrap();
330        let h = cs
331            .create_commit(vec![], root, vec![], "agent".into(), "msg".into())
332            .unwrap();
333        let c = cs.get_commit(h).unwrap();
334        assert!(c.timestamp > 0);
335    }
336
337    #[test]
338    fn commit_free_functions_work() {
339        let tmp = TempDir::new().unwrap();
340        let (cs, state, _) = stores(&tmp);
341        let root = state.empty_root().unwrap();
342
343        let h = super::create_commit(&cs, vec![], root, vec![], "a".into(), "m".into()).unwrap();
344        let c = super::get_commit(&cs, h).unwrap();
345        assert_eq!(c.message, "m");
346    }
347
348    /// Real signing scheme used in tests: the signature IS the payload_hash bytes,
349    /// keyed by `k1`. A verifier must reject any tampered commit because the
350    /// re-derived `payload_hash` changes when the commit body does.
351    struct HashEchoSigner;
352
353    impl CommitSigner for HashEchoSigner {
354        fn sign(&self, payload_hash: Hash, _commit: &Commit) -> Result<CommitSignature> {
355            Ok(CommitSignature {
356                scheme: "hash-echo".into(),
357                key_id: Some("k1".into()),
358                signature: payload_hash.as_bytes().to_vec(),
359            })
360        }
361    }
362
363    struct HashEchoVerifier;
364
365    impl CommitVerifier for HashEchoVerifier {
366        fn verify(&self, _hash: CommitHash, commit: &Commit, payload_hash: Hash) -> Result<()> {
367            let sig = commit
368                .signature
369                .as_ref()
370                .ok_or_else(|| anyhow::anyhow!("missing signature"))?;
371            if sig.scheme != "hash-echo" {
372                return Err(anyhow::anyhow!("unexpected scheme {}", sig.scheme));
373            }
374            if sig.signature.as_slice() != payload_hash.as_bytes() {
375                return Err(anyhow::anyhow!("signature does not match payload_hash"));
376            }
377            Ok(())
378        }
379    }
380
381    #[test]
382    fn signed_commit_round_trip_verifies() {
383        let tmp = TempDir::new().unwrap();
384        let (cs, state, _) = stores(&tmp);
385        let root = state.empty_root().unwrap();
386        let h = cs
387            .create_signed_commit(
388                &HashEchoSigner,
389                vec![],
390                root,
391                vec![],
392                "agent".into(),
393                "msg".into(),
394            )
395            .unwrap();
396        let c = cs.get_commit(h).unwrap();
397        assert!(c.signature.is_some());
398        assert!(c.payload_hash.is_some());
399        cs.verify_commit_with(h, &HashEchoVerifier).unwrap();
400    }
401
402    #[test]
403    fn verifier_rejects_unsigned_commit() {
404        let tmp = TempDir::new().unwrap();
405        let (cs, state, _) = stores(&tmp);
406        let root = state.empty_root().unwrap();
407        let h = cs
408            .create_commit(vec![], root, vec![], "agent".into(), "msg".into())
409            .unwrap();
410        let err = cs.verify_commit_with(h, &HashEchoVerifier).unwrap_err();
411        assert!(err.to_string().contains("not signed"));
412    }
413
414    /// Tampering with any field of a signed commit must invalidate it.
415    /// Demonstrates that storing `payload_hash` alongside the signature does
416    /// not allow an attacker to forge by mutating other fields.
417    #[test]
418    fn verifier_rejects_tampered_commit_body() {
419        let tmp = TempDir::new().unwrap();
420        let (cs, state, _) = stores(&tmp);
421        let root = state.empty_root().unwrap();
422        let h = cs
423            .create_signed_commit(
424                &HashEchoSigner,
425                vec![],
426                root,
427                vec![],
428                "agent".into(),
429                "original message".into(),
430            )
431            .unwrap();
432
433        let mut c = cs.get_commit(h).unwrap();
434        let original_payload_hash = c.payload_hash.unwrap();
435        c.message = "tampered message".into();
436
437        // payload_hash is now stale relative to the body. The verifier path's
438        // consistency check (inside verify_commit_with) is what catches this;
439        // here we simulate by calling the helper directly.
440        let recomputed = c.unsigned_payload_hash().unwrap();
441        assert_ne!(recomputed, original_payload_hash);
442    }
443
444    /// Stored payload_hash MUST equal the re-derived hash, even if a signature
445    /// would still verify against the stored payload_hash. Otherwise an attacker
446    /// could craft a Commit whose body says X but whose signed payload_hash
447    /// covers a different X'.
448    #[test]
449    fn verifier_rejects_inconsistent_payload_hash() {
450        use crate::object_store::ObjectStore;
451
452        let tmp = TempDir::new().unwrap();
453        let objects = ObjectStore::new(tmp.path().join("objects"));
454        objects.ensure_dir().unwrap();
455        let cs = CommitStore::new(objects.clone());
456        let state_objects = ObjectStore::new(tmp.path().join("objects"));
457        let blobs = BlobStore::new(tmp.path().join("blobs"));
458        blobs.ensure_dir().unwrap();
459        let state = StateStore::new(state_objects, blobs, Wal::new(tmp.path().join("wal")));
460        let root = state.empty_root().unwrap();
461
462        // Hand-craft a forged commit: body says "real" but payload_hash
463        // covers a different (signed) payload.
464        let forged = Commit {
465            schema_version: COMMIT_SCHEMA_VERSION,
466            parents: vec![],
467            timestamp: 1,
468            author: "attacker".into(),
469            message: "real".into(),
470            state_root: root,
471            manifests: vec![],
472            signature: Some(CommitSignature {
473                scheme: "hash-echo".into(),
474                key_id: Some("k1".into()),
475                signature: hash_typed(b"other:", b"x").as_bytes().to_vec(),
476            }),
477            payload_hash: Some(hash_typed(b"other:", b"x")),
478        };
479        let h = objects.put_serialized(COMMIT_TAG, &forged).unwrap();
480        let err = cs.verify_commit_with(h, &HashEchoVerifier).unwrap_err();
481        assert!(err.to_string().contains("inconsistent"));
482    }
483
484    #[test]
485    fn create_commit_rejects_missing_state_root() {
486        let tmp = TempDir::new().unwrap();
487        let (cs, _state, _) = stores(&tmp);
488        let missing_root = hash_typed(b"missing:", b"state");
489        let err = cs
490            .create_commit(vec![], missing_root, vec![], "agent".into(), "msg".into())
491            .unwrap_err();
492        assert!(err.to_string().contains("state_root"));
493    }
494
495    #[test]
496    fn create_commit_rejects_missing_manifest() {
497        let tmp = TempDir::new().unwrap();
498        let (cs, state, _) = stores(&tmp);
499        let root = state.empty_root().unwrap();
500        let missing_manifest = hash_typed(b"missing:", b"manifest");
501        let err = cs
502            .create_commit(
503                vec![],
504                root,
505                vec![missing_manifest],
506                "agent".into(),
507                "msg".into(),
508            )
509            .unwrap_err();
510        assert!(err.to_string().contains("manifest"));
511    }
512}