Skip to main content

bones_core/dag/
hash.rs

1//! Content-addressed event hashing for the Merkle-DAG.
2//!
3//! This module provides hash verification and parent chain validation on top of
4//! the core hash computation in [`crate::event::writer::compute_event_hash`].
5//!
6//! # Merkle-DAG Properties
7//!
8//! - Each event's hash covers its content AND its parent hashes.
9//! - Modifying any event invalidates the hashes of all its descendants.
10//! - Parent hashes are sorted lexicographically before hashing.
11//! - Hash format is text-prefixed (`blake3:<payload>`) with canonical base64url
12//!   writes and legacy hex accepted for validation.
13
14use std::collections::HashSet;
15
16use crate::event::Event;
17use crate::event::hash_text::decode_blake3_hash;
18use crate::event::writer::{WriteError, compute_event_hash};
19
20// ---------------------------------------------------------------------------
21// Machine-readable error codes
22// ---------------------------------------------------------------------------
23
24/// Machine-readable codes for [`HashError`].
25#[derive(Debug, Clone, Copy, PartialEq, Eq)]
26pub enum HashErrorCode {
27    /// The stored hash on an event does not match its computed hash.
28    HashMismatch,
29    /// An event references a parent hash not found in the event set.
30    UnknownParent,
31    /// The hash could not be computed (e.g., serialization failure).
32    ComputeFailure,
33}
34
35// ---------------------------------------------------------------------------
36// Errors
37// ---------------------------------------------------------------------------
38
39/// Errors from DAG hash verification.
40#[derive(Debug, thiserror::Error)]
41pub enum HashError {
42    /// The stored event hash does not match the recomputed hash.
43    #[error("event hash mismatch: stored={stored} expected={expected}")]
44    HashMismatch {
45        /// The hash stored on the event (which is wrong).
46        stored: String,
47        /// The hash we computed from the event's fields (the correct value).
48        expected: String,
49    },
50
51    /// A parent hash referenced by an event is not found in the event set.
52    #[error("event {event_hash} references unknown parent {parent_hash}")]
53    UnknownParent {
54        /// The hash of the event that has the bad parent reference.
55        event_hash: String,
56        /// The parent hash that was not found in the event set.
57        parent_hash: String,
58    },
59
60    /// Failed to compute the hash of an event.
61    #[error("failed to compute event hash: {0}")]
62    Compute(#[from] WriteError),
63}
64
65impl HashError {
66    /// Return the machine-readable error code for this error.
67    #[must_use]
68    pub const fn code(&self) -> HashErrorCode {
69        match self {
70            Self::HashMismatch { .. } => HashErrorCode::HashMismatch,
71            Self::UnknownParent { .. } => HashErrorCode::UnknownParent,
72            Self::Compute(_) => HashErrorCode::ComputeFailure,
73        }
74    }
75}
76
77// ---------------------------------------------------------------------------
78// Public API
79// ---------------------------------------------------------------------------
80
81/// Verify that an event's stored hash matches the hash of its content.
82///
83/// The hash is recomputed from the event's fields 1-7 (`wall_ts_us`, `agent`,
84/// `itc`, `parents`, `event_type`, `item_id`, `data`) using BLAKE3, and compared
85/// against the `event_hash` field stored on the event.
86///
87/// Returns `true` if the stored hash is valid, `false` otherwise.
88///
89/// # Errors
90///
91/// Returns [`HashError::Compute`] if the hash cannot be computed (e.g.,
92/// data serialization failure).
93pub fn verify_event_hash(event: &Event) -> Result<bool, HashError> {
94    let expected = compute_event_hash(event)?;
95    let Some(stored) = decode_blake3_hash(&event.event_hash) else {
96        return Ok(false);
97    };
98    let Some(computed) = decode_blake3_hash(&expected) else {
99        return Ok(false);
100    };
101    Ok(stored == computed)
102}
103
104/// Verify the Merkle-DAG integrity of a collection of events.
105///
106/// This function performs two checks for each event in the collection:
107///
108/// 1. **Hash integrity**: the event's stored `event_hash` matches the BLAKE3
109///    hash computed from its fields. If any ancestor was modified, its hash
110///    will no longer match, causing this check to fail.
111///
112/// 2. **Parent resolution**: every parent hash referenced by an event must
113///    appear as another event's `event_hash` in the provided collection.
114///
115/// Together, these checks enforce the Merkle property: modifying any event
116/// invalidates the hashes of all its descendants (because a descendant's hash
117/// covers its `parents` field, which encodes ancestor hashes).
118///
119/// Events may be provided in any order; all hashes are collected into a lookup
120/// table before validation begins.
121///
122/// # Errors
123///
124/// Returns the first [`HashError`] encountered. The order in which events
125/// are checked is not guaranteed.
126pub fn verify_chain(events: &[&Event]) -> Result<(), HashError> {
127    // Build a lookup table of all known event hashes in this collection.
128    let known: HashSet<&str> = events.iter().map(|e| e.event_hash.as_str()).collect();
129
130    for event in events {
131        // 1. Verify each event's stored hash matches its computed hash.
132        let expected = compute_event_hash(event)?;
133        let stored =
134            decode_blake3_hash(&event.event_hash).ok_or_else(|| HashError::HashMismatch {
135                stored: event.event_hash.clone(),
136                expected: expected.clone(),
137            })?;
138        let computed = decode_blake3_hash(&expected).ok_or_else(|| HashError::HashMismatch {
139            stored: event.event_hash.clone(),
140            expected: expected.clone(),
141        })?;
142        if stored != computed {
143            return Err(HashError::HashMismatch {
144                stored: event.event_hash.clone(),
145                expected,
146            });
147        }
148
149        // 2. Verify every parent reference resolves to a known event.
150        for parent in &event.parents {
151            if !known.contains(parent.as_str()) {
152                return Err(HashError::UnknownParent {
153                    event_hash: event.event_hash.clone(),
154                    parent_hash: parent.clone(),
155                });
156            }
157        }
158    }
159
160    Ok(())
161}
162
163// ---------------------------------------------------------------------------
164// Tests
165// ---------------------------------------------------------------------------
166
167#[cfg(test)]
168mod tests {
169    use std::collections::BTreeMap;
170
171    use super::*;
172    use crate::event::Event;
173    use crate::event::data::{CreateData, EventData, MoveData};
174    use crate::event::types::EventType;
175    use crate::event::writer::write_event;
176    use crate::model::item::{Kind, State, Urgency};
177    use crate::model::item_id::ItemId;
178
179    // -------------------------------------------------------------------
180    // Helpers
181    // -------------------------------------------------------------------
182
183    /// Build a root event with the given timestamp and compute its hash.
184    fn make_root(wall_ts_us: i64) -> Event {
185        let mut event = Event {
186            wall_ts_us,
187            agent: "agent-a".into(),
188            itc: "itc:AQ".into(),
189            parents: vec![],
190            event_type: EventType::Create,
191            item_id: ItemId::new_unchecked("bn-a1b2"),
192            data: EventData::Create(CreateData {
193                title: "Root event".into(),
194                kind: Kind::Task,
195                size: None,
196                urgency: Urgency::Default,
197                labels: vec![],
198                parent: None,
199                causation: None,
200                description: None,
201                extra: BTreeMap::new(),
202            }),
203            event_hash: "blake3:placeholder".into(),
204        };
205        // Compute and stamp the correct hash.
206        write_event(&mut event).expect("write_event should not fail");
207        event
208    }
209
210    /// Build a child event that references `parent_hash`, then compute its hash.
211    fn make_child(wall_ts_us: i64, parent_hash: &str) -> Event {
212        let mut event = Event {
213            wall_ts_us,
214            agent: "agent-b".into(),
215            itc: "itc:AQ.1".into(),
216            parents: vec![parent_hash.to_owned()],
217            event_type: EventType::Move,
218            item_id: ItemId::new_unchecked("bn-a1b2"),
219            data: EventData::Move(MoveData {
220                state: State::Doing,
221                reason: None,
222                extra: BTreeMap::new(),
223            }),
224            event_hash: "blake3:placeholder".into(),
225        };
226        write_event(&mut event).expect("write_event should not fail");
227        event
228    }
229
230    // -------------------------------------------------------------------
231    // verify_event_hash
232    // -------------------------------------------------------------------
233
234    #[test]
235    fn test_verify_event_hash_valid() {
236        let event = make_root(1_000_000);
237        let result = verify_event_hash(&event).expect("should compute");
238        assert!(result, "freshly written event should have valid hash");
239    }
240
241    #[test]
242    fn test_verify_event_hash_tampered_content() {
243        let mut event = make_root(1_000_000);
244        // Tamper: change the timestamp but leave the stored hash alone.
245        event.wall_ts_us += 1;
246        let result = verify_event_hash(&event).expect("should compute");
247        assert!(!result, "tampered event should fail hash check");
248    }
249
250    #[test]
251    fn test_verify_event_hash_tampered_agent() {
252        let mut event = make_root(1_000_000);
253        event.agent = "evil-agent".into();
254        let result = verify_event_hash(&event).expect("should compute");
255        assert!(!result, "event with modified agent should fail hash check");
256    }
257
258    #[test]
259    fn test_verify_event_hash_tampered_parents() {
260        let root = make_root(1_000_000);
261        let mut child = make_child(2_000_000, &root.event_hash);
262        // Tamper: change the parent reference while leaving the stored hash alone.
263        child.parents[0] = "blake3:forged_parent_hash".into();
264        let result = verify_event_hash(&child).expect("should compute");
265        assert!(!result, "event with forged parent should fail hash check");
266    }
267
268    #[test]
269    fn test_verify_event_hash_deterministic() {
270        let event = make_root(42_000_000);
271        // Calling verify multiple times on the same event should always agree.
272        let r1 = verify_event_hash(&event).expect("first call");
273        let r2 = verify_event_hash(&event).expect("second call");
274        assert_eq!(r1, r2, "verify_event_hash must be deterministic");
275        assert!(r1, "should be valid");
276    }
277
278    // -------------------------------------------------------------------
279    // verify_chain
280    // -------------------------------------------------------------------
281
282    #[test]
283    fn test_verify_chain_single_root_event() {
284        let root = make_root(1_000_000);
285        verify_chain(&[&root]).expect("single valid root event should pass");
286    }
287
288    #[test]
289    fn test_verify_chain_linear_chain() {
290        let root = make_root(1_000_000);
291        let child = make_child(2_000_000, &root.event_hash);
292        let grandchild = make_child(3_000_000, &child.event_hash);
293
294        verify_chain(&[&root, &child, &grandchild]).expect("valid 3-event chain should pass");
295    }
296
297    #[test]
298    fn test_verify_chain_order_independent() {
299        // Provide events in reverse order — should still pass.
300        let root = make_root(1_000_000);
301        let child = make_child(2_000_000, &root.event_hash);
302
303        verify_chain(&[&child, &root]).expect("order should not matter for verify_chain");
304    }
305
306    #[test]
307    fn test_verify_chain_tampered_root_detected() {
308        let mut root = make_root(1_000_000);
309        let child = make_child(2_000_000, &root.event_hash);
310
311        // Tamper root content (leave its stored hash unchanged).
312        root.wall_ts_us += 999;
313
314        let err =
315            verify_chain(&[&root, &child]).expect_err("tampered root should cause chain failure");
316        assert_eq!(err.code(), HashErrorCode::HashMismatch);
317    }
318
319    #[test]
320    fn test_verify_chain_tampered_child_detected() {
321        let root = make_root(1_000_000);
322        let mut child = make_child(2_000_000, &root.event_hash);
323
324        // Tamper child content (leave its stored hash unchanged).
325        child.agent = "impersonator".into();
326
327        let err =
328            verify_chain(&[&root, &child]).expect_err("tampered child should cause chain failure");
329        assert_eq!(err.code(), HashErrorCode::HashMismatch);
330    }
331
332    #[test]
333    fn test_verify_chain_unknown_parent_detected() {
334        let child = make_child(2_000_000, "blake3:nonexistent_parent_hash");
335        // Note: child.event_hash is valid for child's own fields; only its
336        // parent reference is unresolvable.
337
338        let err =
339            verify_chain(&[&child]).expect_err("unresolvable parent should cause chain failure");
340        assert_eq!(err.code(), HashErrorCode::UnknownParent);
341    }
342
343    #[test]
344    fn test_verify_chain_merkle_cascade_property() {
345        // Create a 3-event chain: root → child → grandchild.
346        let root = make_root(1_000_000);
347        let child = make_child(2_000_000, &root.event_hash);
348        let grandchild = make_child(3_000_000, &child.event_hash);
349
350        // Chain is valid before tampering.
351        verify_chain(&[&root, &child, &grandchild]).expect("chain is initially valid");
352
353        // Now simulate an ancestor modification:
354        // Modify root and correctly recompute its hash (as an attacker would).
355        let mut modified_root = root.clone();
356        modified_root.wall_ts_us += 1;
357        let new_root_hash = compute_event_hash(&modified_root).expect("hash compute");
358        modified_root.event_hash = new_root_hash.clone();
359
360        // modified_root now has a valid-looking hash, but child still references
361        // the OLD root hash. The Merkle property means this breaks the chain:
362        // child.parents[0] == old root hash, which no longer exists in the set.
363        let err = verify_chain(&[&modified_root, &child, &grandchild])
364            .expect_err("Merkle cascade: ancestor modification breaks descendants");
365        // Either the child's parent is now unresolvable (old root hash gone)
366        // or the root's hash mismatches — both demonstrate the Merkle property.
367        assert!(
368            matches!(
369                err.code(),
370                HashErrorCode::UnknownParent | HashErrorCode::HashMismatch
371            ),
372            "expected Merkle violation error, got: {:?}",
373            err
374        );
375    }
376
377    #[test]
378    fn test_verify_chain_empty_slice() {
379        // Empty slice should pass trivially.
380        verify_chain(&[]).expect("empty slice should be valid");
381    }
382
383    #[test]
384    fn test_verify_chain_merge_event_two_parents() {
385        // Test a DAG with a merge point (two parents).
386        let root_a = make_root(1_000_000);
387        let root_b = make_root(1_100_000); // different timestamp → different hash
388
389        // Build a merge event with two parents (sorted lexicographically).
390        let mut parents = vec![root_a.event_hash.clone(), root_b.event_hash.clone()];
391        parents.sort();
392
393        let mut merge_event = Event {
394            wall_ts_us: 2_000_000,
395            agent: "agent-c".into(),
396            itc: "itc:AQ.2".into(),
397            parents,
398            event_type: EventType::Move,
399            item_id: ItemId::new_unchecked("bn-a1b2"),
400            data: EventData::Move(MoveData {
401                state: State::Done,
402                reason: Some("merged".into()),
403                extra: BTreeMap::new(),
404            }),
405            event_hash: "blake3:placeholder".into(),
406        };
407        write_event(&mut merge_event).expect("write merge event");
408
409        verify_chain(&[&root_a, &root_b, &merge_event])
410            .expect("DAG with merge point should be valid");
411    }
412
413    // -------------------------------------------------------------------
414    // HashError machine-readable codes
415    // -------------------------------------------------------------------
416
417    #[test]
418    fn test_hash_error_code_mismatch() {
419        let err = HashError::HashMismatch {
420            stored: "blake3:wrong".into(),
421            expected: "blake3:right".into(),
422        };
423        assert_eq!(err.code(), HashErrorCode::HashMismatch);
424    }
425
426    #[test]
427    fn test_hash_error_code_unknown_parent() {
428        let err = HashError::UnknownParent {
429            event_hash: "blake3:child".into(),
430            parent_hash: "blake3:missing".into(),
431        };
432        assert_eq!(err.code(), HashErrorCode::UnknownParent);
433    }
434}