solid_pod_rs/provenance.rs
1//! Provenance primitives — composable, cost-tiered traceability for pod writes.
2//!
3//! Implements the data model and traits from
4//! [`docs/design/provenance-upgrade-master-plan.md`](../../docs/design/provenance-upgrade-master-plan.md)
5//! §2 and [ADR-059](../../docs/adr/ADR-059-provenance-primitives-block-trails-git-marks.md)
6//! (D1, D2, D4, D6). Two tiers compose into one chain:
7//!
8//! - **git-mark** (cheap, always-on): every pod write becomes a git commit;
9//! the commit SHA is captured as a [`GitMark`]. Content-addressed,
10//! append-only, tamper-evident ordering for free. The native implementation
11//! of [`GitMarker`] lives in `solid-pod-rs-git::mark` (it shells to `git`);
12//! wasm consumers compile against a no-op marker.
13//! - **block-trail anchor** (expensive, opt-in): a Bitcoin-anchored MRC20 state
14//! whose taproot UTXO externally timestamps a record ([`BlockTrailAnchor`]).
15//! Reserved for high-value records. The [`BlockAnchorer`] trait is defined
16//! here; a real implementation lands in Phase 4 (`bitcoin_tx.rs` + mempool).
17//!
18//! A [`ProvenanceMark`] always carries a [`GitMark`] and *optionally* a
19//! [`BlockTrailAnchor`]. The anchor's `state_hash` commits to the git SHA (or an
20//! epoch Merkle root over many commits), binding both tiers into one chain.
21//!
22//! ## wasm32 safety
23//!
24//! Everything in this module — the types and [`prov_ttl`] — is pure logic and
25//! compiles for `wasm32-unknown-unknown`. The traits are `?Send` (matching the
26//! crate's existing [`crate::payments::PaymentStore`] pattern) so a wasm
27//! single-threaded executor can implement them. No `tokio`, no process spawning,
28//! no I/O leaks into this surface.
29
30use std::path::Path;
31use std::sync::Arc;
32
33use serde::{Deserialize, Serialize};
34use sha2::{Digest, Sha256};
35
36// ---------------------------------------------------------------------------
37// Data model (§2.1)
38// ---------------------------------------------------------------------------
39
40/// A provenance mark over a pod resource write.
41///
42/// Always carries a git commit ([`GitMark`]); optionally upgraded with a
43/// Bitcoin block-trail anchor ([`BlockTrailAnchor`]) for high-value records.
44#[derive(Debug, Clone, PartialEq, Eq, Serialize, Deserialize)]
45pub struct ProvenanceMark {
46 /// Pod-relative path of the resource the write targeted.
47 pub resource: String,
48 /// The git commit the write produced — **always present** (cheap tier).
49 pub git: GitMark,
50 /// Optional Bitcoin block-trail anchor — **opt-in** (expensive tier).
51 #[serde(default, skip_serializing_if = "Option::is_none")]
52 pub anchor: Option<BlockTrailAnchor>,
53 /// `did:nostr` of the writer (NIP-98 authenticated principal), or an
54 /// anonymous marker when the write was unauthenticated.
55 pub agent_did: String,
56 /// Unix seconds at which the mark was produced.
57 pub created: u64,
58}
59
60/// The cheap-tier git commit captured for a pod write.
61#[derive(Debug, Clone, PartialEq, Eq, Serialize, Deserialize)]
62pub struct GitMark {
63 /// Git SHA-1 of the commit the write produced.
64 pub commit_sha: String,
65 /// Pod repo slug (the pod's first path segment / pubkey).
66 pub repo: String,
67 /// Branch the commit landed on. Pinned to `"main"` by `init.rs`.
68 pub branch: String,
69 /// Prior commit SHA (the append-only chain link), or `None` for the
70 /// genesis commit of a freshly-initialised repo.
71 #[serde(default, skip_serializing_if = "Option::is_none")]
72 pub parent: Option<String>,
73}
74
75/// The expensive-tier Bitcoin anchor for a record.
76///
77/// Reuses the existing [`crate::mrc20`] crypto (`Mrc20State`, `bt_address`,
78/// `verify_mrc20_anchor`) — no crypto is re-implemented here. The
79/// `state_strings` carry the portable, independently-verifiable proof.
80#[derive(Debug, Clone, PartialEq, Eq, Serialize, Deserialize)]
81pub struct BlockTrailAnchor {
82 /// Trail ticker / identifier.
83 pub ticker: String,
84 /// `sha256_hex(jcs(state))` — links into the MRC20 trail and commits to
85 /// the git SHA (or an epoch Merkle root).
86 pub state_hash: String,
87 /// Bitcoin transaction id of the anchoring UTXO.
88 pub txid: String,
89 /// Output index of the anchoring UTXO.
90 pub vout: u32,
91 /// Derived P2TR address (`mrc20::bt_address`).
92 pub address: String,
93 /// `"testnet4"` | `"mainnet"` (or any network the operator configures).
94 pub network: String,
95 /// Confirmation height; `None` until the anchoring tx confirms.
96 #[serde(default, skip_serializing_if = "Option::is_none")]
97 pub blockheight: Option<u64>,
98 /// Portable, independently-verifiable proof — the serialised states.
99 #[serde(default)]
100 pub state_strings: Vec<String>,
101 /// Issuer's compressed pubkey (66-char hex). Together with
102 /// `state_strings` it re-derives the taproot `address` via
103 /// `mrc20::bt_address` — the read-side check
104 /// ([`BlockAnchorer::verify`](crate::provenance::BlockAnchorer::verify))
105 /// needs it to confirm `address` was not forged. `None` on legacy /
106 /// partially-populated anchors (verify then has nothing to re-derive
107 /// against and reports `false`).
108 #[serde(default, skip_serializing_if = "Option::is_none")]
109 pub pubkey: Option<String>,
110}
111
112// ---------------------------------------------------------------------------
113// Errors
114// ---------------------------------------------------------------------------
115
116/// Failures surfaced by the provenance primitives.
117///
118/// Hand-rolled (no `thiserror` derive) so the type compiles on `wasm32`
119/// without pulling proc-macro evaluation into the pure surface; the variants
120/// mirror the crate's error-message style.
121#[derive(Debug, Clone, PartialEq, Eq)]
122pub enum ProvenanceError {
123 /// The underlying git operation failed (spawn, commit, rev-parse, …).
124 Git(String),
125 /// The Bitcoin anchor operation failed (mempool, tx-build, verify, …).
126 Anchor(String),
127 /// The resource path was rejected (traversal, sidecar suffix, …).
128 InvalidPath(String),
129 /// Persisting or emitting the mark failed.
130 Store(String),
131}
132
133impl std::fmt::Display for ProvenanceError {
134 fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
135 match self {
136 ProvenanceError::Git(m) => write!(f, "git-mark: {m}"),
137 ProvenanceError::Anchor(m) => write!(f, "block-anchor: {m}"),
138 ProvenanceError::InvalidPath(m) => write!(f, "invalid provenance path: {m}"),
139 ProvenanceError::Store(m) => write!(f, "provenance store: {m}"),
140 }
141 }
142}
143
144impl std::error::Error for ProvenanceError {}
145
146// ---------------------------------------------------------------------------
147// Traits (§2.2)
148// ---------------------------------------------------------------------------
149
150/// Cheap tier. Implemented by `solid-pod-rs-git` (shells to `git`).
151///
152/// `?Send` for wasm32-safety, matching the crate's [`crate::payments::PaymentStore`]
153/// pattern. The wasm `core` consumer compiles against a no-op marker.
154#[async_trait::async_trait(?Send)]
155pub trait GitMarker: Send + Sync {
156 /// Stage `path` and commit it, returning the resulting [`GitMark`].
157 ///
158 /// `repo` is the absolute filesystem path to the (non-bare) pod repo;
159 /// `path` is the repo-relative path written; `agent_did` is recorded as
160 /// the commit author email; `message` is the commit subject. When there
161 /// is nothing to commit the implementation returns a mark referencing the
162 /// current HEAD without erroring.
163 async fn mark_write(
164 &self,
165 repo: &Path,
166 path: &str,
167 agent_did: &str,
168 message: &str,
169 ) -> Result<GitMark, ProvenanceError>;
170
171 /// Return the current HEAD commit SHA, or `None` for an unborn branch.
172 async fn head(&self, repo: &Path) -> Result<Option<String>, ProvenanceError>;
173}
174
175/// Expensive tier. Server-side (mempool + Bitcoin TX), behind feature `mrc20`.
176///
177/// Defined here; a real implementation lands in Phase 4 (`bitcoin_tx.rs`).
178#[async_trait::async_trait(?Send)]
179pub trait BlockAnchorer: Send + Sync {
180 /// Anchor `state_hash` under `ticker` on `network`, returning the produced
181 /// [`BlockTrailAnchor`]. Implemented by
182 /// `solid-pod-rs-server::mempool::MempoolBlockAnchorer` (builds + broadcasts
183 /// a taproot MRC20 anchoring tx via `bitcoin_tx.rs`).
184 async fn anchor(
185 &self,
186 ticker: &str,
187 state_hash: &str,
188 network: &str,
189 ) -> Result<BlockTrailAnchor, ProvenanceError>;
190
191 /// Verify a previously-produced anchor against the chain / fixtures
192 /// (re-derives the taproot address from the portable proof, then confirms a
193 /// UTXO sits at it).
194 async fn verify(&self, anchor: &BlockTrailAnchor) -> Result<bool, ProvenanceError>;
195}
196
197// ---------------------------------------------------------------------------
198// Composition policy (§2.3, ADR-059 D1/D5)
199// ---------------------------------------------------------------------------
200
201/// When a [`ProvenanceLog::record`] write should incur the expensive Bitcoin
202/// block-trail anchor on top of the always-on git-mark.
203///
204/// The cheap tier (git-mark) runs for *every* policy — these variants only
205/// govern the **opt-in** anchor. Pure, `Copy`, wasm-safe: it carries no I/O.
206///
207/// | Variant | Anchor behaviour |
208/// |--------------|--------------------------------------------------------|
209/// | [`Never`](AnchorPolicy::Never) | git-mark only — no on-chain cost. The default for ordinary writes. |
210/// | [`Always`](AnchorPolicy::Always) | anchor **every** write (commits the git SHA on-chain). Expensive; only for trails where every state must be externally timestamped. |
211/// | [`HighValue`](AnchorPolicy::HighValue) | anchor iff the resource is flagged anchor-worthy (its ACL carries a `ProvenanceAnchor` condition / the caller passes the high-value flag). Settlement receipts, elevation/ACSP decisions. |
212/// | [`Epoch`](AnchorPolicy::Epoch) | accumulate the git SHA into an [`EpochAccumulator`]; the batch root is anchored **once** on epoch close (one Bitcoin tx notarises many commits — ADR-059 D5). |
213///
214/// An anchor is attempted only when the policy says so **and** the
215/// [`ProvenanceLog`] was built with an anchorer ([`ProvenanceLog::anchorer`]
216/// is `Some`). With `anchorer: None` (the wasm / no-Bitcoin pod) every policy
217/// degrades to git-mark-only, silently.
218#[derive(Debug, Clone, Copy, PartialEq, Eq, Default, Serialize, Deserialize)]
219pub enum AnchorPolicy {
220 /// git-mark only; never anchor. The default for ordinary pod writes.
221 #[default]
222 Never,
223 /// Anchor every write — the git commit SHA is committed on-chain each time.
224 Always,
225 /// Anchor only when the resource is flagged high-value (ACL carries a
226 /// `ProvenanceAnchor` condition). Otherwise git-mark only.
227 HighValue,
228 /// Accumulate the commit into the current epoch; the epoch's Merkle root is
229 /// anchored once on close (amortised on-chain cost — ADR-059 D5).
230 Epoch,
231}
232
233impl AnchorPolicy {
234 /// Whether *this write* should be anchored inline (i.e. produce a
235 /// [`BlockTrailAnchor`] on the returned mark), given whether the resource
236 /// is flagged high-value.
237 ///
238 /// - `Never` / `Epoch` ⇒ never inline (`Epoch` defers to the accumulator).
239 /// - `Always` ⇒ always inline.
240 /// - `HighValue` ⇒ inline iff `high_value`.
241 #[must_use]
242 pub fn anchors_inline(self, high_value: bool) -> bool {
243 match self {
244 AnchorPolicy::Never | AnchorPolicy::Epoch => false,
245 AnchorPolicy::Always => true,
246 AnchorPolicy::HighValue => high_value,
247 }
248 }
249}
250
251// ---------------------------------------------------------------------------
252// Composition log (§2.2 `ProvenanceLog`, §2.3 composition rule)
253// ---------------------------------------------------------------------------
254
255/// The composition point for the two provenance tiers (master-plan §2.2/§2.3,
256/// ADR-059 D1).
257///
258/// A `ProvenanceLog` always holds the cheap-tier [`GitMarker`] and *optionally*
259/// the expensive-tier [`BlockAnchorer`]. [`record`](ProvenanceLog::record)
260/// implements the **cheap-always, expensive-opt-in** rule:
261///
262/// 1. **Always** `marker.mark_write()` → [`GitMark`] (every write becomes a
263/// commit; we capture the SHA).
264/// 2. **Conditionally** `anchorer.anchor()` when the [`AnchorPolicy`] says this
265/// write anchors inline AND an anchorer is present. The anchor's
266/// `state_hash` is set to the git commit SHA — so the Bitcoin UTXO commits
267/// to the git history, **binding the two tiers into one chain** (§2.3).
268///
269/// The returned [`ProvenanceMark`] carries the git-mark always and the anchor
270/// when one was produced. Persisting the PROV-O sidecar and emitting the
271/// `Updates-via` notification (step 3) is the server's job — kept out of this
272/// pure surface so it compiles for wasm.
273///
274/// ## wasm32 safety
275///
276/// `Arc<dyn GitMarker>` / `Arc<dyn BlockAnchorer>` are `?Send` trait objects;
277/// the type holds no runtime. On wasm the pod constructs it with a no-op marker
278/// and `anchorer: None`, so `record` is git-mark-only and never reaches any
279/// Bitcoin I/O.
280#[derive(Clone)]
281pub struct ProvenanceLog {
282 /// Cheap tier — always invoked. The native server injects
283 /// `solid-pod-rs-git`'s `ShellGitMarker`; wasm injects a no-op.
284 pub marker: Arc<dyn GitMarker>,
285 /// Expensive tier — optional. `None` in pods that do not pay for Bitcoin
286 /// anchoring (and always `None` on wasm).
287 pub anchorer: Option<Arc<dyn BlockAnchorer>>,
288}
289
290impl std::fmt::Debug for ProvenanceLog {
291 fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
292 f.debug_struct("ProvenanceLog")
293 .field("marker", &"Arc<dyn GitMarker>")
294 .field("anchorer", &self.anchorer.as_ref().map(|_| "Arc<dyn BlockAnchorer>"))
295 .finish()
296 }
297}
298
299/// Descriptor of a single pod write passed to [`ProvenanceLog::record`].
300///
301/// Borrowed (no allocation on the hot path), mirroring
302/// [`crate::wac::conditions::RequestContext`]. Bundles the write identity
303/// (repo/path/agent/message), the expensive-tier [`AnchorPolicy`] + its
304/// `high_value` flag, and the trail coordinates (`ticker`/`network`) an anchor
305/// targets. The trail fields are ignored unless the policy actually anchors.
306#[derive(Debug, Clone, Copy)]
307pub struct WriteRecord<'a> {
308 /// Absolute filesystem path to the (non-bare) pod repo.
309 pub repo: &'a Path,
310 /// Repo-relative path of the resource written.
311 pub path: &'a str,
312 /// `did:nostr` of the writer (NIP-98 principal), or an anonymous marker.
313 pub agent_did: &'a str,
314 /// Commit subject (the LDP method + path).
315 pub message: &'a str,
316 /// Expensive-tier policy (see [`AnchorPolicy`]).
317 pub policy: AnchorPolicy,
318 /// Whether the resource is flagged high-value (ACL `ProvenanceAnchor`).
319 pub high_value: bool,
320 /// Trail ticker to anchor against (used only when anchoring).
321 pub ticker: &'a str,
322 /// Bitcoin network of the trail (used only when anchoring).
323 pub network: &'a str,
324 /// Unix seconds stamped onto the produced mark.
325 pub created: u64,
326}
327
328impl ProvenanceLog {
329 /// Construct a git-mark-only log (no Bitcoin tier). The common case for
330 /// ordinary pods and the only shape available on wasm.
331 #[must_use]
332 pub fn new(marker: Arc<dyn GitMarker>) -> Self {
333 Self { marker, anchorer: None }
334 }
335
336 /// Construct a log with both tiers wired.
337 #[must_use]
338 pub fn with_anchorer(marker: Arc<dyn GitMarker>, anchorer: Arc<dyn BlockAnchorer>) -> Self {
339 Self {
340 marker,
341 anchorer: Some(anchorer),
342 }
343 }
344
345 /// Record a pod resource write across both tiers (the composition rule).
346 ///
347 /// The write is described by a [`WriteRecord`]. Always commits (cheap tier).
348 /// Then, iff `policy.anchors_inline(high_value)` — both carried by the
349 /// [`WriteRecord`] — AND an anchorer is present, anchors the **git commit
350 /// SHA** under the record's `ticker`/`network`, attaching the
351 /// [`BlockTrailAnchor`] to the returned mark. The anchor's `state_hash` is
352 /// the commit SHA, binding git ↔ Bitcoin (master-plan §2.3).
353 ///
354 /// For [`AnchorPolicy::Epoch`] this method never anchors inline — the caller
355 /// feeds the returned `git.commit_sha` into an [`EpochAccumulator`] and
356 /// anchors the batch root on epoch close.
357 ///
358 /// Errors from the **cheap** tier propagate (the git-mark is the contract).
359 /// Errors from the **expensive** tier are returned too, so the caller can
360 /// decide its own best-effort policy — the server hook logs+swallows them
361 /// (a failed anchor must never fail the LDP write), exactly as it does for
362 /// the git-mark.
363 pub async fn record(&self, rec: WriteRecord<'_>) -> Result<ProvenanceMark, ProvenanceError> {
364 // 1. Cheap tier — ALWAYS. A failure here is a hard error: the git-mark
365 // is the always-on contract.
366 let git = self
367 .marker
368 .mark_write(rec.repo, rec.path, rec.agent_did, rec.message)
369 .await?;
370
371 // 2. Expensive tier — opt-in. Only when the policy anchors this write
372 // inline AND an anchorer is wired. The anchored state_hash IS the
373 // git commit SHA — the Bitcoin UTXO now commits to the git history
374 // (master-plan §2.3 "binds both primitives into one chain").
375 let anchor = if rec.policy.anchors_inline(rec.high_value) {
376 match &self.anchorer {
377 Some(a) => Some(a.anchor(rec.ticker, &git.commit_sha, rec.network).await?),
378 None => None,
379 }
380 } else {
381 None
382 };
383
384 Ok(ProvenanceMark {
385 resource: path_to_resource(rec.path),
386 git,
387 anchor,
388 agent_did: rec.agent_did.to_string(),
389 created: rec.created,
390 })
391 }
392}
393
394/// Normalise a repo-relative `path` into the pod-relative `resource` form a
395/// [`ProvenanceMark`] records (leading slash). Idempotent for already-absolute
396/// inputs.
397fn path_to_resource(path: &str) -> String {
398 if path.starts_with('/') {
399 path.to_string()
400 } else {
401 format!("/{path}")
402 }
403}
404
405// ---------------------------------------------------------------------------
406// Epoch Merkle-root anchoring (§2.3, ADR-059 D5) — pure, wasm-safe
407// ---------------------------------------------------------------------------
408
409/// Compute a binary SHA-256 Merkle root over `leaves` (each a 32-byte digest),
410/// duplicating the last node on an odd level (Bitcoin-style). Returns the
411/// all-zero digest for an empty input.
412///
413/// Pure and wasm-safe — uses only the always-compiled `sha2` dependency. Leaves
414/// are hashed *as given*; callers pass `sha256(commit_sha)` so the tree commits
415/// to the exact commit identifiers.
416fn merkle_root(leaves: &[[u8; 32]]) -> [u8; 32] {
417 if leaves.is_empty() {
418 return [0u8; 32];
419 }
420 let mut level: Vec<[u8; 32]> = leaves.to_vec();
421 while level.len() > 1 {
422 let mut next = Vec::with_capacity(level.len().div_ceil(2));
423 let mut i = 0;
424 while i < level.len() {
425 let left = level[i];
426 // Duplicate the last node when the level is odd.
427 let right = if i + 1 < level.len() { level[i + 1] } else { left };
428 let mut h = Sha256::new();
429 h.update(left);
430 h.update(right);
431 next.push(h.finalize().into());
432 i += 2;
433 }
434 level = next;
435 }
436 level[0]
437}
438
439/// Hash one leaf value (a git commit SHA, as text) into the Merkle leaf digest.
440fn merkle_leaf(commit_sha: &str) -> [u8; 32] {
441 Sha256::digest(commit_sha.as_bytes()).into()
442}
443
444/// A Merkle inclusion proof: the sibling digests from leaf to root, each tagged
445/// with whether the sibling sits on the **right** of the running hash at that
446/// level. Verified with [`EpochAccumulator::verify_inclusion`].
447#[derive(Debug, Clone, PartialEq, Eq, Serialize, Deserialize)]
448pub struct MerkleProof {
449 /// Hex of the leaf value's digest (`sha256(commit_sha)`).
450 pub leaf: String,
451 /// Sibling steps from the leaf upward: `(sibling_hex, sibling_is_right)`.
452 pub siblings: Vec<(String, bool)>,
453}
454
455/// Accumulates git commit SHAs into an epoch and, on close, yields the single
456/// Merkle root to anchor (ADR-059 D5 — *one Bitcoin tx notarises many
457/// commits*).
458///
459/// Writes whose [`AnchorPolicy`] is [`Epoch`](AnchorPolicy::Epoch) call
460/// [`push`](EpochAccumulator::push) with the commit SHA the git-mark produced.
461/// When the configured commit-count threshold is reached
462/// ([`is_full`](EpochAccumulator::is_full)), the caller [`close`s](EpochAccumulator::close)
463/// the epoch to obtain the root (hex) and the batched SHAs, anchors the root
464/// **once** via a [`BlockAnchorer`], and starts a fresh epoch. A per-commit
465/// [`inclusion_proof`](EpochAccumulator::inclusion_proof) lets any commit be
466/// proven against the anchored root without re-anchoring.
467///
468/// Pure and wasm-safe: the accumulator and Merkle maths carry no I/O; the
469/// single anchor call is the caller's, via the (optional) anchorer.
470#[derive(Debug, Clone)]
471pub struct EpochAccumulator {
472 /// Commit SHAs collected so far this epoch (insertion order = leaf order).
473 commits: Vec<String>,
474 /// Commit-count threshold at which the epoch is considered full. Operator
475 /// policy (master-plan §5: "ACL writes epoch-only to bound cost").
476 threshold: usize,
477}
478
479/// The sealed result of closing an epoch: the Merkle root to anchor plus the
480/// batch of commit SHAs it commits to.
481#[derive(Debug, Clone, PartialEq, Eq)]
482pub struct ClosedEpoch {
483 /// Hex SHA-256 Merkle root over the epoch's commit-SHA leaves — the single
484 /// value anchored on-chain for the whole batch.
485 pub root: String,
486 /// The commit SHAs this root notarises (leaf order).
487 pub commits: Vec<String>,
488}
489
490impl EpochAccumulator {
491 /// New, empty epoch with a close `threshold` (clamped to ≥ 1).
492 #[must_use]
493 pub fn new(threshold: usize) -> Self {
494 Self {
495 commits: Vec::new(),
496 threshold: threshold.max(1),
497 }
498 }
499
500 /// Add a git commit SHA to the current epoch.
501 pub fn push(&mut self, commit_sha: impl Into<String>) {
502 self.commits.push(commit_sha.into());
503 }
504
505 /// Number of commits accumulated this epoch.
506 #[must_use]
507 pub fn len(&self) -> usize {
508 self.commits.len()
509 }
510
511 /// Whether the epoch holds no commits.
512 #[must_use]
513 pub fn is_empty(&self) -> bool {
514 self.commits.is_empty()
515 }
516
517 /// The configured close threshold.
518 #[must_use]
519 pub fn threshold(&self) -> usize {
520 self.threshold
521 }
522
523 /// Whether the epoch has reached its close threshold (time to anchor).
524 #[must_use]
525 pub fn is_full(&self) -> bool {
526 self.commits.len() >= self.threshold
527 }
528
529 /// The current Merkle root (hex) over the accumulated commits, without
530 /// draining. Returns `None` for an empty epoch (nothing to anchor).
531 #[must_use]
532 pub fn root(&self) -> Option<String> {
533 if self.commits.is_empty() {
534 return None;
535 }
536 let leaves: Vec<[u8; 32]> = self.commits.iter().map(|c| merkle_leaf(c)).collect();
537 Some(hex::encode(merkle_root(&leaves)))
538 }
539
540 /// Seal the epoch: compute the root, return it with the batched commit SHAs,
541 /// and **drain** the accumulator so a fresh epoch begins. Returns `None`
542 /// (and drains nothing) for an empty epoch.
543 pub fn close(&mut self) -> Option<ClosedEpoch> {
544 if self.commits.is_empty() {
545 return None;
546 }
547 let commits = std::mem::take(&mut self.commits);
548 let leaves: Vec<[u8; 32]> = commits.iter().map(|c| merkle_leaf(c)).collect();
549 let root = hex::encode(merkle_root(&leaves));
550 Some(ClosedEpoch { root, commits })
551 }
552
553 /// Produce an inclusion proof for the commit at leaf `index` against the
554 /// *current* set of accumulated commits. `None` if `index` is out of range.
555 ///
556 /// The proof verifies against the root produced by [`root`](Self::root) /
557 /// [`close`](Self::close) over the same commit set — i.e. against the value
558 /// anchored on-chain.
559 #[must_use]
560 pub fn inclusion_proof(&self, index: usize) -> Option<MerkleProof> {
561 let n = self.commits.len();
562 if index >= n {
563 return None;
564 }
565 let mut level: Vec<[u8; 32]> = self.commits.iter().map(|c| merkle_leaf(c)).collect();
566 let leaf_hex = hex::encode(level[index]);
567 let mut idx = index;
568 let mut siblings: Vec<(String, bool)> = Vec::new();
569 while level.len() > 1 {
570 let sibling_idx = if idx % 2 == 0 { idx + 1 } else { idx - 1 };
571 // On an odd level the rightmost node is paired with itself.
572 let sib = if sibling_idx < level.len() {
573 level[sibling_idx]
574 } else {
575 level[idx]
576 };
577 let sibling_is_right = idx % 2 == 0;
578 siblings.push((hex::encode(sib), sibling_is_right));
579
580 // Build the next level.
581 let mut next = Vec::with_capacity(level.len().div_ceil(2));
582 let mut i = 0;
583 while i < level.len() {
584 let left = level[i];
585 let right = if i + 1 < level.len() { level[i + 1] } else { left };
586 let mut h = Sha256::new();
587 h.update(left);
588 h.update(right);
589 next.push(h.finalize().into());
590 i += 2;
591 }
592 level = next;
593 idx /= 2;
594 }
595 Some(MerkleProof {
596 leaf: leaf_hex,
597 siblings,
598 })
599 }
600
601 /// Verify a [`MerkleProof`] against an expected `root_hex` (the anchored
602 /// root). Recomputes the path and compares — no accumulator state needed,
603 /// so a verifier can check inclusion with only the proof + the on-chain
604 /// root.
605 #[must_use]
606 pub fn verify_inclusion(proof: &MerkleProof, root_hex: &str) -> bool {
607 let Ok(mut acc) = hex::decode(&proof.leaf) else {
608 return false;
609 };
610 if acc.len() != 32 {
611 return false;
612 }
613 for (sib_hex, sib_is_right) in &proof.siblings {
614 let Ok(sib) = hex::decode(sib_hex) else {
615 return false;
616 };
617 if sib.len() != 32 {
618 return false;
619 }
620 let mut h = Sha256::new();
621 if *sib_is_right {
622 h.update(&acc);
623 h.update(&sib);
624 } else {
625 h.update(&sib);
626 h.update(&acc);
627 }
628 acc = h.finalize().to_vec();
629 }
630 hex::encode(acc) == root_hex
631 }
632}
633
634// ---------------------------------------------------------------------------
635// PROV-O serialiser (§2.3 step 3, D7)
636// ---------------------------------------------------------------------------
637
638/// Escape a string for inclusion inside a Turtle double-quoted literal
639/// (RDF 1.1 Turtle §2.5.3 / §6.4 string escapes).
640fn ttl_escape(s: &str) -> String {
641 let mut out = String::with_capacity(s.len());
642 for c in s.chars() {
643 match c {
644 '\\' => out.push_str("\\\\"),
645 '"' => out.push_str("\\\""),
646 '\n' => out.push_str("\\n"),
647 '\r' => out.push_str("\\r"),
648 '\t' => out.push_str("\\t"),
649 _ => out.push(c),
650 }
651 }
652 out
653}
654
655/// Render `secs` (Unix seconds) as an `xsd:dateTime` literal in UTC.
656///
657/// Pure, allocation-light, and wasm-safe — avoids dragging `chrono`'s
658/// formatting into the pure surface (the crate already depends on `chrono`
659/// but we keep this self-contained and deterministic for the golden test).
660fn xsd_datetime(secs: u64) -> String {
661 // Civil-from-days (Howard Hinnant's algorithm) — exact, no leap tables.
662 let days = (secs / 86_400) as i64;
663 let rem = (secs % 86_400) as i64;
664 let (hh, mm, ss) = (rem / 3600, (rem % 3600) / 60, rem % 60);
665
666 let z = days + 719_468;
667 let era = if z >= 0 { z } else { z - 146_096 } / 146_097;
668 let doe = z - era * 146_097;
669 let yoe = (doe - doe / 1460 + doe / 36_524 - doe / 146_096) / 365;
670 let y = yoe + era * 400;
671 let doy = doe - (365 * yoe + yoe / 4 - yoe / 100);
672 let mp = (5 * doy + 2) / 153;
673 let d = doy - (153 * mp + 2) / 5 + 1;
674 let m = if mp < 10 { mp + 3 } else { mp - 9 };
675 let y = if m <= 2 { y + 1 } else { y };
676
677 format!("{y:04}-{m:02}-{d:02}T{hh:02}:{mm:02}:{ss:02}Z")
678}
679
680/// Produce a minimal, correct PROV-O Turtle sidecar for a [`ProvenanceMark`].
681///
682/// The mark is modelled as a `prov:Activity` (the write) that
683/// `prov:generated` the resource entity, was performed by the agent
684/// (`prov:wasAssociatedWith`), and is identified by its git commit SHA. The
685/// resource entity records `prov:wasGeneratedBy` the activity. When a
686/// block-trail anchor is present it is emitted as an associated entity bearing
687/// the txid/state-hash so the sidecar carries both tiers.
688///
689/// Kept deliberately small: stable prefix block, one activity, one entity, one
690/// agent, optional anchor entity. Round-trip-safe with the unit tests below.
691pub fn prov_ttl(mark: &ProvenanceMark) -> String {
692 let sha = &mark.git.commit_sha;
693 let resource = ttl_escape(&mark.resource);
694 let agent = ttl_escape(&mark.agent_did);
695 let branch = ttl_escape(&mark.git.branch);
696 let repo = ttl_escape(&mark.git.repo);
697 let when = xsd_datetime(mark.created);
698
699 let mut ttl = String::new();
700 ttl.push_str("@prefix prov: <http://www.w3.org/ns/prov#> .\n");
701 ttl.push_str("@prefix xsd: <http://www.w3.org/2001/XMLSchema#> .\n");
702 ttl.push_str("@prefix git: <https://w3id.org/git#> .\n");
703 ttl.push_str("@prefix bt: <https://blocktrails.org/ns#> .\n\n");
704
705 // Activity: the write, identified by the commit it produced.
706 ttl.push_str(&format!("<urn:git:commit:{sha}> a prov:Activity ;\n"));
707 ttl.push_str(&format!(" prov:generated <{resource}> ;\n"));
708 ttl.push_str(&format!(" prov:wasAssociatedWith <{agent}> ;\n"));
709 ttl.push_str(&format!(" prov:endedAtTime \"{when}\"^^xsd:dateTime ;\n"));
710 ttl.push_str(&format!(" git:commit \"{sha}\" ;\n"));
711 ttl.push_str(&format!(" git:branch \"{branch}\" ;\n"));
712 ttl.push_str(&format!(" git:repo \"{repo}\" "));
713 if let Some(parent) = &mark.git.parent {
714 let parent = ttl_escape(parent);
715 ttl.push_str(&format!(";\n git:parent \"{parent}\" .\n"));
716 } else {
717 ttl.push_str(".\n");
718 }
719
720 // Entity: the generated resource.
721 ttl.push('\n');
722 ttl.push_str(&format!("<{resource}> a prov:Entity ;\n"));
723 ttl.push_str(&format!(
724 " prov:wasGeneratedBy <urn:git:commit:{sha}> ;\n"
725 ));
726 ttl.push_str(&format!(
727 " prov:wasAttributedTo <{agent}> .\n"
728 ));
729
730 // Agent.
731 ttl.push('\n');
732 ttl.push_str(&format!("<{agent}> a prov:Agent .\n"));
733
734 // Optional anchor entity (expensive tier).
735 if let Some(a) = &mark.anchor {
736 let txid = ttl_escape(&a.txid);
737 let ticker = ttl_escape(&a.ticker);
738 let state_hash = ttl_escape(&a.state_hash);
739 let network = ttl_escape(&a.network);
740 ttl.push('\n');
741 ttl.push_str(&format!("<urn:bt:tx:{txid}:{}> a prov:Entity ;\n", a.vout));
742 ttl.push_str(&format!(
743 " prov:wasDerivedFrom <urn:git:commit:{sha}> ;\n"
744 ));
745 ttl.push_str(&format!(" bt:ticker \"{ticker}\" ;\n"));
746 ttl.push_str(&format!(" bt:stateHash \"{state_hash}\" ;\n"));
747 ttl.push_str(&format!(" bt:network \"{network}\" ;\n"));
748 ttl.push_str(&format!(" bt:txid \"{txid}\" ;\n"));
749 ttl.push_str(&format!(" bt:vout \"{}\"^^xsd:integer ", a.vout));
750 if let Some(h) = a.blockheight {
751 ttl.push_str(&format!(";\n bt:blockheight \"{h}\"^^xsd:integer .\n"));
752 } else {
753 ttl.push_str(".\n");
754 }
755 }
756
757 ttl
758}
759
760// ---------------------------------------------------------------------------
761// Tests
762// ---------------------------------------------------------------------------
763
764#[cfg(test)]
765mod tests {
766 use super::*;
767
768 fn sample_git() -> GitMark {
769 GitMark {
770 commit_sha: "a1b2c3d4e5f60718293a4b5c6d7e8f9001122334".into(),
771 repo: "deadbeef".into(),
772 branch: "main".into(),
773 parent: Some("00112233445566778899aabbccddeeff00112233".into()),
774 }
775 }
776
777 fn sample_mark() -> ProvenanceMark {
778 ProvenanceMark {
779 resource: "/notes/hello.ttl".into(),
780 git: sample_git(),
781 anchor: None,
782 agent_did: "did:nostr:abcdef".into(),
783 created: 1_750_000_000,
784 }
785 }
786
787 #[test]
788 fn git_mark_round_trips() {
789 let g = sample_git();
790 let json = serde_json::to_string(&g).unwrap();
791 let back: GitMark = serde_json::from_str(&json).unwrap();
792 assert_eq!(g, back);
793 }
794
795 #[test]
796 fn provenance_mark_round_trips_without_anchor() {
797 let m = sample_mark();
798 let json = serde_json::to_string(&m).unwrap();
799 // `anchor: None` must be omitted by skip_serializing_if.
800 assert!(!json.contains("anchor"));
801 let back: ProvenanceMark = serde_json::from_str(&json).unwrap();
802 assert_eq!(m, back);
803 }
804
805 #[test]
806 fn provenance_mark_round_trips_with_anchor() {
807 let mut m = sample_mark();
808 m.anchor = Some(BlockTrailAnchor {
809 ticker: "PROV".into(),
810 state_hash: "ff".repeat(32),
811 txid: "ab".repeat(32),
812 vout: 1,
813 address: "tb1pexample".into(),
814 network: "testnet4".into(),
815 blockheight: Some(840_000),
816 state_strings: vec!["{\"seq\":0}".into(), "{\"seq\":1}".into()],
817 pubkey: Some("02".to_string() + &"ab".repeat(32)),
818 });
819 let json = serde_json::to_string(&m).unwrap();
820 let back: ProvenanceMark = serde_json::from_str(&json).unwrap();
821 assert_eq!(m, back);
822 }
823
824 #[test]
825 fn block_trail_anchor_defaults_state_strings() {
826 // state_strings missing in JSON must deserialise to an empty vec.
827 let json = r#"{
828 "ticker":"PROV","state_hash":"00","txid":"00","vout":0,
829 "address":"tb1p","network":"testnet4"
830 }"#;
831 let a: BlockTrailAnchor = serde_json::from_str(json).unwrap();
832 assert!(a.state_strings.is_empty());
833 assert!(a.blockheight.is_none());
834 }
835
836 #[test]
837 fn prov_ttl_contains_core_triples() {
838 let ttl = prov_ttl(&sample_mark());
839 assert!(ttl.contains("@prefix prov: <http://www.w3.org/ns/prov#> ."));
840 assert!(ttl.contains("a prov:Activity"));
841 assert!(ttl.contains("prov:wasGeneratedBy"));
842 assert!(ttl.contains("prov:wasAssociatedWith <did:nostr:abcdef>"));
843 assert!(ttl.contains("a prov:Agent"));
844 // Commit sha appears as the activity id + git:commit literal.
845 assert!(ttl.contains("<urn:git:commit:a1b2c3d4e5f60718293a4b5c6d7e8f9001122334>"));
846 assert!(ttl.contains("git:commit \"a1b2c3d4e5f60718293a4b5c6d7e8f9001122334\""));
847 assert!(ttl.contains("git:branch \"main\""));
848 assert!(ttl.contains("git:parent \"00112233445566778899aabbccddeeff00112233\""));
849 // The generated entity is the resource.
850 assert!(ttl.contains("<urn:git:commit:a1b2c3d4e5f60718293a4b5c6d7e8f9001122334> a prov:Activity"));
851 assert!(ttl.contains("prov:generated </notes/hello.ttl>"));
852 }
853
854 #[test]
855 fn prov_ttl_omits_parent_when_absent() {
856 let mut m = sample_mark();
857 m.git.parent = None;
858 let ttl = prov_ttl(&m);
859 assert!(!ttl.contains("git:parent"));
860 // Must still be a well-terminated activity block.
861 assert!(ttl.contains("git:repo \"deadbeef\" .\n"));
862 }
863
864 #[test]
865 fn prov_ttl_emits_anchor_block_when_present() {
866 let mut m = sample_mark();
867 m.anchor = Some(BlockTrailAnchor {
868 ticker: "PROV".into(),
869 state_hash: "deadbeef".into(),
870 txid: "cafebabe".into(),
871 vout: 2,
872 address: "tb1pexample".into(),
873 network: "testnet4".into(),
874 blockheight: Some(840_000),
875 state_strings: vec![],
876 pubkey: None,
877 });
878 let ttl = prov_ttl(&m);
879 assert!(ttl.contains("<urn:bt:tx:cafebabe:2> a prov:Entity"));
880 assert!(ttl.contains("bt:ticker \"PROV\""));
881 assert!(ttl.contains("bt:stateHash \"deadbeef\""));
882 assert!(ttl.contains("bt:blockheight \"840000\"^^xsd:integer"));
883 assert!(ttl.contains("prov:wasDerivedFrom <urn:git:commit:"));
884 }
885
886 #[test]
887 fn prov_ttl_escapes_quotes_and_backslashes() {
888 let mut m = sample_mark();
889 m.agent_did = "did:nostr:\"weird\\did".into();
890 let ttl = prov_ttl(&m);
891 // The raw quote/backslash must be escaped inside the literal.
892 assert!(ttl.contains("did:nostr:\\\"weird\\\\did"));
893 }
894
895 #[test]
896 fn xsd_datetime_known_epoch() {
897 // 1_750_000_000 == 2025-06-15T15:06:40Z (verified against `date -u -d @1750000000`).
898 assert_eq!(xsd_datetime(1_750_000_000), "2025-06-15T15:06:40Z");
899 // Unix epoch.
900 assert_eq!(xsd_datetime(0), "1970-01-01T00:00:00Z");
901 }
902
903 #[test]
904 fn provenance_error_display() {
905 assert_eq!(
906 ProvenanceError::Git("boom".into()).to_string(),
907 "git-mark: boom"
908 );
909 assert_eq!(
910 ProvenanceError::InvalidPath("/x.acl".into()).to_string(),
911 "invalid provenance path: /x.acl"
912 );
913 }
914
915 // -----------------------------------------------------------------------
916 // Phase 5: composition (AnchorPolicy + ProvenanceLog) — pure, mocked tiers
917 // -----------------------------------------------------------------------
918
919 use std::sync::atomic::{AtomicUsize, Ordering};
920
921 /// In-memory [`GitMarker`] — fabricates a deterministic SHA per call and
922 /// counts invocations. No subprocess, so it compiles + runs on wasm too.
923 #[derive(Default)]
924 struct MockMarker {
925 calls: AtomicUsize,
926 }
927 #[async_trait::async_trait(?Send)]
928 impl GitMarker for MockMarker {
929 async fn mark_write(
930 &self,
931 _repo: &Path,
932 path: &str,
933 _agent_did: &str,
934 _message: &str,
935 ) -> Result<GitMark, ProvenanceError> {
936 let n = self.calls.fetch_add(1, Ordering::SeqCst);
937 // 40-hex deterministic SHA derived from the call ordinal + path.
938 let sha = hex::encode(Sha256::digest(format!("{n}:{path}").as_bytes()))[..40].to_string();
939 Ok(GitMark {
940 commit_sha: sha,
941 repo: "mockpod".into(),
942 branch: "main".into(),
943 parent: None,
944 })
945 }
946 async fn head(&self, _repo: &Path) -> Result<Option<String>, ProvenanceError> {
947 Ok(None)
948 }
949 }
950
951 /// In-memory [`BlockAnchorer`] — records the `state_hash` it was asked to
952 /// anchor (so a test can assert the git SHA was bound) and counts calls.
953 #[derive(Default)]
954 struct MockAnchorer {
955 calls: AtomicUsize,
956 last_state_hash: std::sync::Mutex<Option<String>>,
957 }
958 #[async_trait::async_trait(?Send)]
959 impl BlockAnchorer for MockAnchorer {
960 async fn anchor(
961 &self,
962 ticker: &str,
963 state_hash: &str,
964 network: &str,
965 ) -> Result<BlockTrailAnchor, ProvenanceError> {
966 self.calls.fetch_add(1, Ordering::SeqCst);
967 *self.last_state_hash.lock().unwrap() = Some(state_hash.to_string());
968 Ok(BlockTrailAnchor {
969 ticker: ticker.into(),
970 state_hash: state_hash.into(),
971 txid: "ab".repeat(32),
972 vout: 0,
973 address: "tb1pmock".into(),
974 network: network.into(),
975 blockheight: None,
976 state_strings: vec!["{\"seq\":0}".into()],
977 pubkey: Some("02".to_string() + &"ab".repeat(32)),
978 })
979 }
980 async fn verify(&self, _anchor: &BlockTrailAnchor) -> Result<bool, ProvenanceError> {
981 Ok(true)
982 }
983 }
984
985 fn repo() -> &'static Path {
986 Path::new("/tmp/mockpod")
987 }
988
989 /// Build a [`WriteRecord`] for the mock tiers (PROV trail on testnet4).
990 fn rec<'a>(
991 path: &'a str,
992 policy: AnchorPolicy,
993 high_value: bool,
994 created: u64,
995 ) -> WriteRecord<'a> {
996 WriteRecord {
997 repo: repo(),
998 path,
999 agent_did: "did:nostr:a",
1000 message: "PUT",
1001 policy,
1002 high_value,
1003 ticker: "PROV",
1004 network: "testnet4",
1005 created,
1006 }
1007 }
1008
1009 #[test]
1010 fn anchor_policy_inline_matrix() {
1011 assert!(!AnchorPolicy::Never.anchors_inline(true));
1012 assert!(!AnchorPolicy::Never.anchors_inline(false));
1013 assert!(AnchorPolicy::Always.anchors_inline(false));
1014 assert!(AnchorPolicy::Always.anchors_inline(true));
1015 assert!(AnchorPolicy::HighValue.anchors_inline(true));
1016 assert!(!AnchorPolicy::HighValue.anchors_inline(false));
1017 // Epoch never anchors inline — it defers to the accumulator.
1018 assert!(!AnchorPolicy::Epoch.anchors_inline(true));
1019 assert_eq!(AnchorPolicy::default(), AnchorPolicy::Never);
1020 }
1021
1022 #[tokio::test]
1023 async fn record_cheap_write_is_git_mark_only() {
1024 // Never policy ⇒ git-mark only, no anchor, anchorer untouched.
1025 let marker = Arc::new(MockMarker::default());
1026 let anchorer = Arc::new(MockAnchorer::default());
1027 let log = ProvenanceLog::with_anchorer(marker.clone(), anchorer.clone());
1028 let mark = log
1029 .record(rec("notes/a.ttl", AnchorPolicy::Never, false, 1_750_000_000))
1030 .await
1031 .unwrap();
1032 assert!(mark.anchor.is_none(), "cheap write must carry no anchor");
1033 assert_eq!(mark.resource, "/notes/a.ttl");
1034 assert_eq!(marker.calls.load(Ordering::SeqCst), 1, "git-mark always runs");
1035 assert_eq!(anchorer.calls.load(Ordering::SeqCst), 0, "anchorer must NOT be called");
1036 }
1037
1038 #[tokio::test]
1039 async fn record_high_value_write_carries_git_mark_and_anchor() {
1040 // HighValue + high_value=true ⇒ BOTH tiers present, and the anchor's
1041 // state_hash IS the git commit SHA (the two tiers are bound).
1042 let marker = Arc::new(MockMarker::default());
1043 let anchorer = Arc::new(MockAnchorer::default());
1044 let log = ProvenanceLog::with_anchorer(marker.clone(), anchorer.clone());
1045 let mark = log
1046 .record(rec("receipts/r1.ttl", AnchorPolicy::HighValue, true, 1_750_000_000))
1047 .await
1048 .unwrap();
1049 let anchor = mark.anchor.expect("high-value write must carry an anchor");
1050 assert_eq!(
1051 anchor.state_hash, mark.git.commit_sha,
1052 "anchor must commit to the git SHA (binds the two tiers — §2.3)"
1053 );
1054 assert_eq!(anchorer.calls.load(Ordering::SeqCst), 1);
1055 assert_eq!(
1056 anchorer.last_state_hash.lock().unwrap().as_deref(),
1057 Some(mark.git.commit_sha.as_str())
1058 );
1059 }
1060
1061 #[tokio::test]
1062 async fn record_high_value_flag_false_is_git_only() {
1063 // HighValue policy but the resource is NOT flagged ⇒ git-mark only.
1064 let marker = Arc::new(MockMarker::default());
1065 let anchorer = Arc::new(MockAnchorer::default());
1066 let log = ProvenanceLog::with_anchorer(marker, anchorer.clone());
1067 let mark = log
1068 .record(rec("notes/x.ttl", AnchorPolicy::HighValue, false, 1))
1069 .await
1070 .unwrap();
1071 assert!(mark.anchor.is_none());
1072 assert_eq!(anchorer.calls.load(Ordering::SeqCst), 0);
1073 }
1074
1075 #[tokio::test]
1076 async fn record_always_anchors_every_write() {
1077 let marker = Arc::new(MockMarker::default());
1078 let anchorer = Arc::new(MockAnchorer::default());
1079 let log = ProvenanceLog::with_anchorer(marker, anchorer.clone());
1080 for i in 0..3 {
1081 let m = log
1082 .record(rec(&format!("s/{i}.ttl"), AnchorPolicy::Always, false, 1))
1083 .await
1084 .unwrap();
1085 assert!(m.anchor.is_some());
1086 }
1087 assert_eq!(anchorer.calls.load(Ordering::SeqCst), 3);
1088 }
1089
1090 #[tokio::test]
1091 async fn record_without_anchorer_degrades_to_git_only() {
1092 // No anchorer (the wasm / no-Bitcoin pod): even Always degrades to
1093 // git-mark only, silently.
1094 let marker = Arc::new(MockMarker::default());
1095 let log = ProvenanceLog::new(marker.clone());
1096 assert!(log.anchorer.is_none());
1097 let mark = log
1098 .record(rec("notes/a.ttl", AnchorPolicy::Always, true, 1))
1099 .await
1100 .unwrap();
1101 assert!(mark.anchor.is_none(), "no anchorer ⇒ no anchor regardless of policy");
1102 assert_eq!(marker.calls.load(Ordering::SeqCst), 1);
1103 }
1104
1105 #[tokio::test]
1106 async fn record_epoch_defers_anchoring_to_accumulator() {
1107 // Epoch policy: record() never anchors inline; the caller batches the
1108 // SHA and anchors the root once on epoch close.
1109 let marker = Arc::new(MockMarker::default());
1110 let anchorer = Arc::new(MockAnchorer::default());
1111 let log = ProvenanceLog::with_anchorer(marker, anchorer.clone());
1112
1113 let mut epoch = EpochAccumulator::new(3);
1114 let mut shas = Vec::new();
1115 for i in 0..3 {
1116 let m = log
1117 .record(rec(&format!("e/{i}.ttl"), AnchorPolicy::Epoch, true, 1))
1118 .await
1119 .unwrap();
1120 assert!(m.anchor.is_none(), "epoch writes never anchor inline");
1121 epoch.push(m.git.commit_sha.clone());
1122 shas.push(m.git.commit_sha);
1123 }
1124 // No per-write anchors happened.
1125 assert_eq!(anchorer.calls.load(Ordering::SeqCst), 0);
1126
1127 // Epoch is full → close → ONE anchor for the whole batch root.
1128 assert!(epoch.is_full());
1129 let closed = epoch.close().expect("non-empty epoch closes");
1130 assert_eq!(closed.commits, shas);
1131 let anchor = anchorer.anchor("PROV", &closed.root, "testnet4").await.unwrap();
1132 assert_eq!(anchorer.calls.load(Ordering::SeqCst), 1, "ONE anchor notarises N commits");
1133 assert_eq!(anchor.state_hash, closed.root);
1134 // Epoch drained — a fresh epoch begins.
1135 assert!(epoch.is_empty());
1136 }
1137
1138 // ── Merkle tree: root determinism + inclusion proofs ──────────────────
1139
1140 #[test]
1141 fn merkle_root_empty_and_single() {
1142 assert_eq!(merkle_root(&[]), [0u8; 32]);
1143 let leaf = merkle_leaf("deadbeef");
1144 // A single leaf is its own root.
1145 assert_eq!(merkle_root(&[leaf]), leaf);
1146 }
1147
1148 #[test]
1149 fn merkle_root_is_deterministic_and_order_sensitive() {
1150 let a = merkle_leaf("aaa");
1151 let b = merkle_leaf("bbb");
1152 let r1 = merkle_root(&[a, b]);
1153 let r2 = merkle_root(&[a, b]);
1154 assert_eq!(r1, r2, "deterministic");
1155 let swapped = merkle_root(&[b, a]);
1156 assert_ne!(r1, swapped, "leaf order changes the root");
1157 }
1158
1159 #[test]
1160 fn epoch_root_matches_close_root() {
1161 let mut e = EpochAccumulator::new(10);
1162 for i in 0..5 {
1163 e.push(format!("commit{i:040}"));
1164 }
1165 let peeked = e.root().unwrap();
1166 let closed = e.close().unwrap();
1167 assert_eq!(peeked, closed.root, "root() peek == close() root");
1168 }
1169
1170 #[test]
1171 fn epoch_inclusion_proof_verifies_for_every_leaf() {
1172 // N commits → one root → each commit's inclusion proof verifies.
1173 let n = 7; // odd, to exercise last-node duplication
1174 let mut e = EpochAccumulator::new(n);
1175 for i in 0..n {
1176 e.push(format!("c{i:039}")); // 40-char commit-like ids
1177 }
1178 let root = e.root().unwrap();
1179 for i in 0..n {
1180 let proof = e.inclusion_proof(i).expect("proof for in-range leaf");
1181 assert!(
1182 EpochAccumulator::verify_inclusion(&proof, &root),
1183 "leaf {i} must verify against the anchored root"
1184 );
1185 }
1186 // Out-of-range index → no proof.
1187 assert!(e.inclusion_proof(n).is_none());
1188 }
1189
1190 #[test]
1191 fn epoch_inclusion_proof_rejects_wrong_root_and_tampered_leaf() {
1192 let mut e = EpochAccumulator::new(4);
1193 for i in 0..4 {
1194 e.push(format!("c{i:039}"));
1195 }
1196 let root = e.root().unwrap();
1197 let mut proof = e.inclusion_proof(1).unwrap();
1198 // Wrong root → reject.
1199 assert!(!EpochAccumulator::verify_inclusion(&proof, &"00".repeat(32)));
1200 // Tampered leaf → reject against the genuine root.
1201 proof.leaf = hex::encode(merkle_leaf("forged"));
1202 assert!(!EpochAccumulator::verify_inclusion(&proof, &root));
1203 }
1204
1205 #[test]
1206 fn epoch_threshold_and_len_tracking() {
1207 let mut e = EpochAccumulator::new(2);
1208 assert_eq!(e.threshold(), 2);
1209 assert!(e.is_empty() && !e.is_full());
1210 e.push("a");
1211 assert_eq!(e.len(), 1);
1212 assert!(!e.is_full());
1213 e.push("b");
1214 assert!(e.is_full(), "reaching threshold ⇒ full");
1215 // Threshold clamps to ≥ 1.
1216 assert_eq!(EpochAccumulator::new(0).threshold(), 1);
1217 }
1218
1219 #[test]
1220 fn empty_epoch_close_is_none() {
1221 let mut e = EpochAccumulator::new(3);
1222 assert!(e.close().is_none());
1223 assert!(e.root().is_none());
1224 }
1225
1226 #[test]
1227 fn merkle_proof_round_trips() {
1228 let p = MerkleProof {
1229 leaf: hex::encode(merkle_leaf("x")),
1230 siblings: vec![("ab".repeat(32), true), ("cd".repeat(32), false)],
1231 };
1232 let json = serde_json::to_string(&p).unwrap();
1233 let back: MerkleProof = serde_json::from_str(&json).unwrap();
1234 assert_eq!(p, back);
1235 }
1236
1237 #[test]
1238 fn anchor_policy_round_trips() {
1239 for p in [AnchorPolicy::Never, AnchorPolicy::Always, AnchorPolicy::HighValue, AnchorPolicy::Epoch] {
1240 let json = serde_json::to_string(&p).unwrap();
1241 let back: AnchorPolicy = serde_json::from_str(&json).unwrap();
1242 assert_eq!(p, back);
1243 }
1244 }
1245}