Skip to main content

solid_pod_rs/
provenance.rs

1//! Provenance primitives — composable, cost-tiered traceability for pod writes.
2//!
3//! Implements the data model and traits from
4//! [`docs/design/provenance-upgrade-master-plan.md`](../../docs/design/provenance-upgrade-master-plan.md)
5//! §2 and [ADR-059](../../docs/adr/ADR-059-provenance-primitives-block-trails-git-marks.md)
6//! (D1, D2, D4, D6). Two tiers compose into one chain:
7//!
8//! - **git-mark** (cheap, always-on): every pod write becomes a git commit;
9//!   the commit SHA is captured as a [`GitMark`]. Content-addressed,
10//!   append-only, tamper-evident ordering for free. The native implementation
11//!   of [`GitMarker`] lives in `solid-pod-rs-git::mark` (it shells to `git`);
12//!   wasm consumers compile against a no-op marker.
13//! - **block-trail anchor** (expensive, opt-in): a Bitcoin-anchored MRC20 state
14//!   whose taproot UTXO externally timestamps a record ([`BlockTrailAnchor`]).
15//!   Reserved for high-value records. The [`BlockAnchorer`] trait is defined
16//!   here; a real implementation lands in Phase 4 (`bitcoin_tx.rs` + mempool).
17//!
18//! A [`ProvenanceMark`] always carries a [`GitMark`] and *optionally* a
19//! [`BlockTrailAnchor`]. The anchor's `state_hash` commits to the git SHA (or an
20//! epoch Merkle root over many commits), binding both tiers into one chain.
21//!
22//! ## wasm32 safety
23//!
24//! Everything in this module — the types and [`prov_ttl`] — is pure logic and
25//! compiles for `wasm32-unknown-unknown`. The traits are `?Send` (matching the
26//! crate's existing [`crate::payments::PaymentStore`] pattern) so a wasm
27//! single-threaded executor can implement them. No `tokio`, no process spawning,
28//! no I/O leaks into this surface.
29
30use std::path::Path;
31use std::sync::Arc;
32
33use serde::{Deserialize, Serialize};
34use sha2::{Digest, Sha256};
35
36// ---------------------------------------------------------------------------
37// Data model (§2.1)
38// ---------------------------------------------------------------------------
39
40/// A provenance mark over a pod resource write.
41///
42/// Always carries a git commit ([`GitMark`]); optionally upgraded with a
43/// Bitcoin block-trail anchor ([`BlockTrailAnchor`]) for high-value records.
44#[derive(Debug, Clone, PartialEq, Eq, Serialize, Deserialize)]
45pub struct ProvenanceMark {
46    /// Pod-relative path of the resource the write targeted.
47    pub resource: String,
48    /// The git commit the write produced — **always present** (cheap tier).
49    pub git: GitMark,
50    /// Optional Bitcoin block-trail anchor — **opt-in** (expensive tier).
51    #[serde(default, skip_serializing_if = "Option::is_none")]
52    pub anchor: Option<BlockTrailAnchor>,
53    /// `did:nostr` of the writer (NIP-98 authenticated principal), or an
54    /// anonymous marker when the write was unauthenticated.
55    pub agent_did: String,
56    /// Unix seconds at which the mark was produced.
57    pub created: u64,
58}
59
60/// The cheap-tier git commit captured for a pod write.
61#[derive(Debug, Clone, PartialEq, Eq, Serialize, Deserialize)]
62pub struct GitMark {
63    /// Git SHA-1 of the commit the write produced.
64    pub commit_sha: String,
65    /// Pod repo slug (the pod's first path segment / pubkey).
66    pub repo: String,
67    /// Branch the commit landed on. Pinned to `"main"` by `init.rs`.
68    pub branch: String,
69    /// Prior commit SHA (the append-only chain link), or `None` for the
70    /// genesis commit of a freshly-initialised repo.
71    #[serde(default, skip_serializing_if = "Option::is_none")]
72    pub parent: Option<String>,
73}
74
75/// The expensive-tier Bitcoin anchor for a record.
76///
77/// Reuses the existing [`crate::mrc20`] crypto (`Mrc20State`, `bt_address`,
78/// `verify_mrc20_anchor`) — no crypto is re-implemented here. The
79/// `state_strings` carry the portable, independently-verifiable proof.
80#[derive(Debug, Clone, PartialEq, Eq, Serialize, Deserialize)]
81pub struct BlockTrailAnchor {
82    /// Trail ticker / identifier.
83    pub ticker: String,
84    /// `sha256_hex(jcs(state))` — links into the MRC20 trail and commits to
85    /// the git SHA (or an epoch Merkle root).
86    pub state_hash: String,
87    /// Bitcoin transaction id of the anchoring UTXO.
88    pub txid: String,
89    /// Output index of the anchoring UTXO.
90    pub vout: u32,
91    /// Derived P2TR address (`mrc20::bt_address`).
92    pub address: String,
93    /// `"testnet4"` | `"mainnet"` (or any network the operator configures).
94    pub network: String,
95    /// Confirmation height; `None` until the anchoring tx confirms.
96    #[serde(default, skip_serializing_if = "Option::is_none")]
97    pub blockheight: Option<u64>,
98    /// Portable, independently-verifiable proof — the serialised states.
99    #[serde(default)]
100    pub state_strings: Vec<String>,
101    /// Issuer's compressed pubkey (66-char hex). Together with
102    /// `state_strings` it re-derives the taproot `address` via
103    /// `mrc20::bt_address` — the read-side check
104    /// ([`BlockAnchorer::verify`](crate::provenance::BlockAnchorer::verify))
105    /// needs it to confirm `address` was not forged. `None` on legacy /
106    /// partially-populated anchors (verify then has nothing to re-derive
107    /// against and reports `false`).
108    #[serde(default, skip_serializing_if = "Option::is_none")]
109    pub pubkey: Option<String>,
110}
111
112// ---------------------------------------------------------------------------
113// Errors
114// ---------------------------------------------------------------------------
115
116/// Failures surfaced by the provenance primitives.
117///
118/// Hand-rolled (no `thiserror` derive) so the type compiles on `wasm32`
119/// without pulling proc-macro evaluation into the pure surface; the variants
120/// mirror the crate's error-message style.
121#[derive(Debug, Clone, PartialEq, Eq)]
122pub enum ProvenanceError {
123    /// The underlying git operation failed (spawn, commit, rev-parse, …).
124    Git(String),
125    /// The Bitcoin anchor operation failed (mempool, tx-build, verify, …).
126    Anchor(String),
127    /// The resource path was rejected (traversal, sidecar suffix, …).
128    InvalidPath(String),
129    /// Persisting or emitting the mark failed.
130    Store(String),
131}
132
133impl std::fmt::Display for ProvenanceError {
134    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
135        match self {
136            ProvenanceError::Git(m) => write!(f, "git-mark: {m}"),
137            ProvenanceError::Anchor(m) => write!(f, "block-anchor: {m}"),
138            ProvenanceError::InvalidPath(m) => write!(f, "invalid provenance path: {m}"),
139            ProvenanceError::Store(m) => write!(f, "provenance store: {m}"),
140        }
141    }
142}
143
144impl std::error::Error for ProvenanceError {}
145
146// ---------------------------------------------------------------------------
147// Traits (§2.2)
148// ---------------------------------------------------------------------------
149
150/// Cheap tier. Implemented by `solid-pod-rs-git` (shells to `git`).
151///
152/// `?Send` for wasm32-safety, matching the crate's [`crate::payments::PaymentStore`]
153/// pattern. The wasm `core` consumer compiles against a no-op marker.
154#[async_trait::async_trait(?Send)]
155pub trait GitMarker: Send + Sync {
156    /// Stage `path` and commit it, returning the resulting [`GitMark`].
157    ///
158    /// `repo` is the absolute filesystem path to the (non-bare) pod repo;
159    /// `path` is the repo-relative path written; `agent_did` is recorded as
160    /// the commit author email; `message` is the commit subject. When there
161    /// is nothing to commit the implementation returns a mark referencing the
162    /// current HEAD without erroring.
163    async fn mark_write(
164        &self,
165        repo: &Path,
166        path: &str,
167        agent_did: &str,
168        message: &str,
169    ) -> Result<GitMark, ProvenanceError>;
170
171    /// Return the current HEAD commit SHA, or `None` for an unborn branch.
172    async fn head(&self, repo: &Path) -> Result<Option<String>, ProvenanceError>;
173}
174
175/// Expensive tier. Server-side (mempool + Bitcoin TX), behind feature `mrc20`.
176///
177/// Defined here; a real implementation lands in Phase 4 (`bitcoin_tx.rs`).
178#[async_trait::async_trait(?Send)]
179pub trait BlockAnchorer: Send + Sync {
180    /// Anchor `state_hash` under `ticker` on `network`, returning the produced
181    /// [`BlockTrailAnchor`]. Implemented by
182    /// `solid-pod-rs-server::mempool::MempoolBlockAnchorer` (builds + broadcasts
183    /// a taproot MRC20 anchoring tx via `bitcoin_tx.rs`).
184    async fn anchor(
185        &self,
186        ticker: &str,
187        state_hash: &str,
188        network: &str,
189    ) -> Result<BlockTrailAnchor, ProvenanceError>;
190
191    /// Verify a previously-produced anchor against the chain / fixtures
192    /// (re-derives the taproot address from the portable proof, then confirms a
193    /// UTXO sits at it).
194    async fn verify(&self, anchor: &BlockTrailAnchor) -> Result<bool, ProvenanceError>;
195}
196
197// ---------------------------------------------------------------------------
198// Composition policy (§2.3, ADR-059 D1/D5)
199// ---------------------------------------------------------------------------
200
201/// When a [`ProvenanceLog::record`] write should incur the expensive Bitcoin
202/// block-trail anchor on top of the always-on git-mark.
203///
204/// The cheap tier (git-mark) runs for *every* policy — these variants only
205/// govern the **opt-in** anchor. Pure, `Copy`, wasm-safe: it carries no I/O.
206///
207/// | Variant | Anchor behaviour |
208/// |--------------|--------------------------------------------------------|
209/// | [`Never`](AnchorPolicy::Never) | git-mark only — no on-chain cost. The default for ordinary writes. |
210/// | [`Always`](AnchorPolicy::Always) | anchor **every** write (commits the git SHA on-chain). Expensive; only for trails where every state must be externally timestamped. |
211/// | [`HighValue`](AnchorPolicy::HighValue) | anchor iff the resource is flagged anchor-worthy (its ACL carries a `ProvenanceAnchor` condition / the caller passes the high-value flag). Settlement receipts, elevation/ACSP decisions. |
212/// | [`Epoch`](AnchorPolicy::Epoch) | accumulate the git SHA into an [`EpochAccumulator`]; the batch root is anchored **once** on epoch close (one Bitcoin tx notarises many commits — ADR-059 D5). |
213///
214/// An anchor is attempted only when the policy says so **and** the
215/// [`ProvenanceLog`] was built with an anchorer ([`ProvenanceLog::anchorer`]
216/// is `Some`). With `anchorer: None` (the wasm / no-Bitcoin pod) every policy
217/// degrades to git-mark-only, silently.
218#[derive(Debug, Clone, Copy, PartialEq, Eq, Default, Serialize, Deserialize)]
219pub enum AnchorPolicy {
220    /// git-mark only; never anchor. The default for ordinary pod writes.
221    #[default]
222    Never,
223    /// Anchor every write — the git commit SHA is committed on-chain each time.
224    Always,
225    /// Anchor only when the resource is flagged high-value (ACL carries a
226    /// `ProvenanceAnchor` condition). Otherwise git-mark only.
227    HighValue,
228    /// Accumulate the commit into the current epoch; the epoch's Merkle root is
229    /// anchored once on close (amortised on-chain cost — ADR-059 D5).
230    Epoch,
231}
232
233impl AnchorPolicy {
234    /// Whether *this write* should be anchored inline (i.e. produce a
235    /// [`BlockTrailAnchor`] on the returned mark), given whether the resource
236    /// is flagged high-value.
237    ///
238    /// - `Never` / `Epoch` ⇒ never inline (`Epoch` defers to the accumulator).
239    /// - `Always` ⇒ always inline.
240    /// - `HighValue` ⇒ inline iff `high_value`.
241    #[must_use]
242    pub fn anchors_inline(self, high_value: bool) -> bool {
243        match self {
244            AnchorPolicy::Never | AnchorPolicy::Epoch => false,
245            AnchorPolicy::Always => true,
246            AnchorPolicy::HighValue => high_value,
247        }
248    }
249}
250
251// ---------------------------------------------------------------------------
252// Composition log (§2.2 `ProvenanceLog`, §2.3 composition rule)
253// ---------------------------------------------------------------------------
254
255/// The composition point for the two provenance tiers (master-plan §2.2/§2.3,
256/// ADR-059 D1).
257///
258/// A `ProvenanceLog` always holds the cheap-tier [`GitMarker`] and *optionally*
259/// the expensive-tier [`BlockAnchorer`]. [`record`](ProvenanceLog::record)
260/// implements the **cheap-always, expensive-opt-in** rule:
261///
262/// 1. **Always** `marker.mark_write()` → [`GitMark`] (every write becomes a
263///    commit; we capture the SHA).
264/// 2. **Conditionally** `anchorer.anchor()` when the [`AnchorPolicy`] says this
265///    write anchors inline AND an anchorer is present. The anchor's
266///    `state_hash` is set to the git commit SHA — so the Bitcoin UTXO commits
267///    to the git history, **binding the two tiers into one chain** (§2.3).
268///
269/// The returned [`ProvenanceMark`] carries the git-mark always and the anchor
270/// when one was produced. Persisting the PROV-O sidecar and emitting the
271/// `Updates-via` notification (step 3) is the server's job — kept out of this
272/// pure surface so it compiles for wasm.
273///
274/// ## wasm32 safety
275///
276/// `Arc<dyn GitMarker>` / `Arc<dyn BlockAnchorer>` are `?Send` trait objects;
277/// the type holds no runtime. On wasm the pod constructs it with a no-op marker
278/// and `anchorer: None`, so `record` is git-mark-only and never reaches any
279/// Bitcoin I/O.
280#[derive(Clone)]
281pub struct ProvenanceLog {
282    /// Cheap tier — always invoked. The native server injects
283    /// `solid-pod-rs-git`'s `ShellGitMarker`; wasm injects a no-op.
284    pub marker: Arc<dyn GitMarker>,
285    /// Expensive tier — optional. `None` in pods that do not pay for Bitcoin
286    /// anchoring (and always `None` on wasm).
287    pub anchorer: Option<Arc<dyn BlockAnchorer>>,
288}
289
290impl std::fmt::Debug for ProvenanceLog {
291    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
292        f.debug_struct("ProvenanceLog")
293            .field("marker", &"Arc<dyn GitMarker>")
294            .field("anchorer", &self.anchorer.as_ref().map(|_| "Arc<dyn BlockAnchorer>"))
295            .finish()
296    }
297}
298
299/// Descriptor of a single pod write passed to [`ProvenanceLog::record`].
300///
301/// Borrowed (no allocation on the hot path), mirroring
302/// [`crate::wac::conditions::RequestContext`]. Bundles the write identity
303/// (repo/path/agent/message), the expensive-tier [`AnchorPolicy`] + its
304/// `high_value` flag, and the trail coordinates (`ticker`/`network`) an anchor
305/// targets. The trail fields are ignored unless the policy actually anchors.
306#[derive(Debug, Clone, Copy)]
307pub struct WriteRecord<'a> {
308    /// Absolute filesystem path to the (non-bare) pod repo.
309    pub repo: &'a Path,
310    /// Repo-relative path of the resource written.
311    pub path: &'a str,
312    /// `did:nostr` of the writer (NIP-98 principal), or an anonymous marker.
313    pub agent_did: &'a str,
314    /// Commit subject (the LDP method + path).
315    pub message: &'a str,
316    /// Expensive-tier policy (see [`AnchorPolicy`]).
317    pub policy: AnchorPolicy,
318    /// Whether the resource is flagged high-value (ACL `ProvenanceAnchor`).
319    pub high_value: bool,
320    /// Trail ticker to anchor against (used only when anchoring).
321    pub ticker: &'a str,
322    /// Bitcoin network of the trail (used only when anchoring).
323    pub network: &'a str,
324    /// Unix seconds stamped onto the produced mark.
325    pub created: u64,
326}
327
328impl ProvenanceLog {
329    /// Construct a git-mark-only log (no Bitcoin tier). The common case for
330    /// ordinary pods and the only shape available on wasm.
331    #[must_use]
332    pub fn new(marker: Arc<dyn GitMarker>) -> Self {
333        Self { marker, anchorer: None }
334    }
335
336    /// Construct a log with both tiers wired.
337    #[must_use]
338    pub fn with_anchorer(marker: Arc<dyn GitMarker>, anchorer: Arc<dyn BlockAnchorer>) -> Self {
339        Self {
340            marker,
341            anchorer: Some(anchorer),
342        }
343    }
344
345    /// Record a pod resource write across both tiers (the composition rule).
346    ///
347    /// The write is described by a [`WriteRecord`]. Always commits (cheap tier).
348    /// Then, iff `policy.anchors_inline(high_value)` — both carried by the
349    /// [`WriteRecord`] — AND an anchorer is present, anchors the **git commit
350    /// SHA** under the record's `ticker`/`network`, attaching the
351    /// [`BlockTrailAnchor`] to the returned mark. The anchor's `state_hash` is
352    /// the commit SHA, binding git ↔ Bitcoin (master-plan §2.3).
353    ///
354    /// For [`AnchorPolicy::Epoch`] this method never anchors inline — the caller
355    /// feeds the returned `git.commit_sha` into an [`EpochAccumulator`] and
356    /// anchors the batch root on epoch close.
357    ///
358    /// Errors from the **cheap** tier propagate (the git-mark is the contract).
359    /// Errors from the **expensive** tier are returned too, so the caller can
360    /// decide its own best-effort policy — the server hook logs+swallows them
361    /// (a failed anchor must never fail the LDP write), exactly as it does for
362    /// the git-mark.
363    pub async fn record(&self, rec: WriteRecord<'_>) -> Result<ProvenanceMark, ProvenanceError> {
364        // 1. Cheap tier — ALWAYS. A failure here is a hard error: the git-mark
365        //    is the always-on contract.
366        let git = self
367            .marker
368            .mark_write(rec.repo, rec.path, rec.agent_did, rec.message)
369            .await?;
370
371        // 2. Expensive tier — opt-in. Only when the policy anchors this write
372        //    inline AND an anchorer is wired. The anchored state_hash IS the
373        //    git commit SHA — the Bitcoin UTXO now commits to the git history
374        //    (master-plan §2.3 "binds both primitives into one chain").
375        let anchor = if rec.policy.anchors_inline(rec.high_value) {
376            match &self.anchorer {
377                Some(a) => Some(a.anchor(rec.ticker, &git.commit_sha, rec.network).await?),
378                None => None,
379            }
380        } else {
381            None
382        };
383
384        Ok(ProvenanceMark {
385            resource: path_to_resource(rec.path),
386            git,
387            anchor,
388            agent_did: rec.agent_did.to_string(),
389            created: rec.created,
390        })
391    }
392}
393
394/// Normalise a repo-relative `path` into the pod-relative `resource` form a
395/// [`ProvenanceMark`] records (leading slash). Idempotent for already-absolute
396/// inputs.
397fn path_to_resource(path: &str) -> String {
398    if path.starts_with('/') {
399        path.to_string()
400    } else {
401        format!("/{path}")
402    }
403}
404
405// ---------------------------------------------------------------------------
406// Epoch Merkle-root anchoring (§2.3, ADR-059 D5) — pure, wasm-safe
407// ---------------------------------------------------------------------------
408
409/// Compute a binary SHA-256 Merkle root over `leaves` (each a 32-byte digest),
410/// duplicating the last node on an odd level (Bitcoin-style). Returns the
411/// all-zero digest for an empty input.
412///
413/// Pure and wasm-safe — uses only the always-compiled `sha2` dependency. Leaves
414/// are hashed *as given*; callers pass `sha256(commit_sha)` so the tree commits
415/// to the exact commit identifiers.
416fn merkle_root(leaves: &[[u8; 32]]) -> [u8; 32] {
417    if leaves.is_empty() {
418        return [0u8; 32];
419    }
420    let mut level: Vec<[u8; 32]> = leaves.to_vec();
421    while level.len() > 1 {
422        let mut next = Vec::with_capacity(level.len().div_ceil(2));
423        let mut i = 0;
424        while i < level.len() {
425            let left = level[i];
426            // Duplicate the last node when the level is odd.
427            let right = if i + 1 < level.len() { level[i + 1] } else { left };
428            let mut h = Sha256::new();
429            h.update(left);
430            h.update(right);
431            next.push(h.finalize().into());
432            i += 2;
433        }
434        level = next;
435    }
436    level[0]
437}
438
439/// Hash one leaf value (a git commit SHA, as text) into the Merkle leaf digest.
440fn merkle_leaf(commit_sha: &str) -> [u8; 32] {
441    Sha256::digest(commit_sha.as_bytes()).into()
442}
443
444/// A Merkle inclusion proof: the sibling digests from leaf to root, each tagged
445/// with whether the sibling sits on the **right** of the running hash at that
446/// level. Verified with [`EpochAccumulator::verify_inclusion`].
447#[derive(Debug, Clone, PartialEq, Eq, Serialize, Deserialize)]
448pub struct MerkleProof {
449    /// Hex of the leaf value's digest (`sha256(commit_sha)`).
450    pub leaf: String,
451    /// Sibling steps from the leaf upward: `(sibling_hex, sibling_is_right)`.
452    pub siblings: Vec<(String, bool)>,
453}
454
455/// Accumulates git commit SHAs into an epoch and, on close, yields the single
456/// Merkle root to anchor (ADR-059 D5 — *one Bitcoin tx notarises many
457/// commits*).
458///
459/// Writes whose [`AnchorPolicy`] is [`Epoch`](AnchorPolicy::Epoch) call
460/// [`push`](EpochAccumulator::push) with the commit SHA the git-mark produced.
461/// When the configured commit-count threshold is reached
462/// ([`is_full`](EpochAccumulator::is_full)), the caller [`close`s](EpochAccumulator::close)
463/// the epoch to obtain the root (hex) and the batched SHAs, anchors the root
464/// **once** via a [`BlockAnchorer`], and starts a fresh epoch. A per-commit
465/// [`inclusion_proof`](EpochAccumulator::inclusion_proof) lets any commit be
466/// proven against the anchored root without re-anchoring.
467///
468/// Pure and wasm-safe: the accumulator and Merkle maths carry no I/O; the
469/// single anchor call is the caller's, via the (optional) anchorer.
470#[derive(Debug, Clone)]
471pub struct EpochAccumulator {
472    /// Commit SHAs collected so far this epoch (insertion order = leaf order).
473    commits: Vec<String>,
474    /// Commit-count threshold at which the epoch is considered full. Operator
475    /// policy (master-plan §5: "ACL writes epoch-only to bound cost").
476    threshold: usize,
477}
478
479/// The sealed result of closing an epoch: the Merkle root to anchor plus the
480/// batch of commit SHAs it commits to.
481#[derive(Debug, Clone, PartialEq, Eq)]
482pub struct ClosedEpoch {
483    /// Hex SHA-256 Merkle root over the epoch's commit-SHA leaves — the single
484    /// value anchored on-chain for the whole batch.
485    pub root: String,
486    /// The commit SHAs this root notarises (leaf order).
487    pub commits: Vec<String>,
488}
489
490impl EpochAccumulator {
491    /// New, empty epoch with a close `threshold` (clamped to ≥ 1).
492    #[must_use]
493    pub fn new(threshold: usize) -> Self {
494        Self {
495            commits: Vec::new(),
496            threshold: threshold.max(1),
497        }
498    }
499
500    /// Add a git commit SHA to the current epoch.
501    pub fn push(&mut self, commit_sha: impl Into<String>) {
502        self.commits.push(commit_sha.into());
503    }
504
505    /// Number of commits accumulated this epoch.
506    #[must_use]
507    pub fn len(&self) -> usize {
508        self.commits.len()
509    }
510
511    /// Whether the epoch holds no commits.
512    #[must_use]
513    pub fn is_empty(&self) -> bool {
514        self.commits.is_empty()
515    }
516
517    /// The configured close threshold.
518    #[must_use]
519    pub fn threshold(&self) -> usize {
520        self.threshold
521    }
522
523    /// Whether the epoch has reached its close threshold (time to anchor).
524    #[must_use]
525    pub fn is_full(&self) -> bool {
526        self.commits.len() >= self.threshold
527    }
528
529    /// The current Merkle root (hex) over the accumulated commits, without
530    /// draining. Returns `None` for an empty epoch (nothing to anchor).
531    #[must_use]
532    pub fn root(&self) -> Option<String> {
533        if self.commits.is_empty() {
534            return None;
535        }
536        let leaves: Vec<[u8; 32]> = self.commits.iter().map(|c| merkle_leaf(c)).collect();
537        Some(hex::encode(merkle_root(&leaves)))
538    }
539
540    /// Seal the epoch: compute the root, return it with the batched commit SHAs,
541    /// and **drain** the accumulator so a fresh epoch begins. Returns `None`
542    /// (and drains nothing) for an empty epoch.
543    pub fn close(&mut self) -> Option<ClosedEpoch> {
544        if self.commits.is_empty() {
545            return None;
546        }
547        let commits = std::mem::take(&mut self.commits);
548        let leaves: Vec<[u8; 32]> = commits.iter().map(|c| merkle_leaf(c)).collect();
549        let root = hex::encode(merkle_root(&leaves));
550        Some(ClosedEpoch { root, commits })
551    }
552
553    /// Produce an inclusion proof for the commit at leaf `index` against the
554    /// *current* set of accumulated commits. `None` if `index` is out of range.
555    ///
556    /// The proof verifies against the root produced by [`root`](Self::root) /
557    /// [`close`](Self::close) over the same commit set — i.e. against the value
558    /// anchored on-chain.
559    #[must_use]
560    pub fn inclusion_proof(&self, index: usize) -> Option<MerkleProof> {
561        let n = self.commits.len();
562        if index >= n {
563            return None;
564        }
565        let mut level: Vec<[u8; 32]> = self.commits.iter().map(|c| merkle_leaf(c)).collect();
566        let leaf_hex = hex::encode(level[index]);
567        let mut idx = index;
568        let mut siblings: Vec<(String, bool)> = Vec::new();
569        while level.len() > 1 {
570            let sibling_idx = if idx % 2 == 0 { idx + 1 } else { idx - 1 };
571            // On an odd level the rightmost node is paired with itself.
572            let sib = if sibling_idx < level.len() {
573                level[sibling_idx]
574            } else {
575                level[idx]
576            };
577            let sibling_is_right = idx % 2 == 0;
578            siblings.push((hex::encode(sib), sibling_is_right));
579
580            // Build the next level.
581            let mut next = Vec::with_capacity(level.len().div_ceil(2));
582            let mut i = 0;
583            while i < level.len() {
584                let left = level[i];
585                let right = if i + 1 < level.len() { level[i + 1] } else { left };
586                let mut h = Sha256::new();
587                h.update(left);
588                h.update(right);
589                next.push(h.finalize().into());
590                i += 2;
591            }
592            level = next;
593            idx /= 2;
594        }
595        Some(MerkleProof {
596            leaf: leaf_hex,
597            siblings,
598        })
599    }
600
601    /// Verify a [`MerkleProof`] against an expected `root_hex` (the anchored
602    /// root). Recomputes the path and compares — no accumulator state needed,
603    /// so a verifier can check inclusion with only the proof + the on-chain
604    /// root.
605    #[must_use]
606    pub fn verify_inclusion(proof: &MerkleProof, root_hex: &str) -> bool {
607        let Ok(mut acc) = hex::decode(&proof.leaf) else {
608            return false;
609        };
610        if acc.len() != 32 {
611            return false;
612        }
613        for (sib_hex, sib_is_right) in &proof.siblings {
614            let Ok(sib) = hex::decode(sib_hex) else {
615                return false;
616            };
617            if sib.len() != 32 {
618                return false;
619            }
620            let mut h = Sha256::new();
621            if *sib_is_right {
622                h.update(&acc);
623                h.update(&sib);
624            } else {
625                h.update(&sib);
626                h.update(&acc);
627            }
628            acc = h.finalize().to_vec();
629        }
630        hex::encode(acc) == root_hex
631    }
632}
633
634// ---------------------------------------------------------------------------
635// PROV-O serialiser (§2.3 step 3, D7)
636// ---------------------------------------------------------------------------
637
638/// Escape a string for inclusion inside a Turtle double-quoted literal
639/// (RDF 1.1 Turtle §2.5.3 / §6.4 string escapes).
640fn ttl_escape(s: &str) -> String {
641    let mut out = String::with_capacity(s.len());
642    for c in s.chars() {
643        match c {
644            '\\' => out.push_str("\\\\"),
645            '"' => out.push_str("\\\""),
646            '\n' => out.push_str("\\n"),
647            '\r' => out.push_str("\\r"),
648            '\t' => out.push_str("\\t"),
649            _ => out.push(c),
650        }
651    }
652    out
653}
654
655/// Render `secs` (Unix seconds) as an `xsd:dateTime` literal in UTC.
656///
657/// Pure, allocation-light, and wasm-safe — avoids dragging `chrono`'s
658/// formatting into the pure surface (the crate already depends on `chrono`
659/// but we keep this self-contained and deterministic for the golden test).
660fn xsd_datetime(secs: u64) -> String {
661    // Civil-from-days (Howard Hinnant's algorithm) — exact, no leap tables.
662    let days = (secs / 86_400) as i64;
663    let rem = (secs % 86_400) as i64;
664    let (hh, mm, ss) = (rem / 3600, (rem % 3600) / 60, rem % 60);
665
666    let z = days + 719_468;
667    let era = if z >= 0 { z } else { z - 146_096 } / 146_097;
668    let doe = z - era * 146_097;
669    let yoe = (doe - doe / 1460 + doe / 36_524 - doe / 146_096) / 365;
670    let y = yoe + era * 400;
671    let doy = doe - (365 * yoe + yoe / 4 - yoe / 100);
672    let mp = (5 * doy + 2) / 153;
673    let d = doy - (153 * mp + 2) / 5 + 1;
674    let m = if mp < 10 { mp + 3 } else { mp - 9 };
675    let y = if m <= 2 { y + 1 } else { y };
676
677    format!("{y:04}-{m:02}-{d:02}T{hh:02}:{mm:02}:{ss:02}Z")
678}
679
680/// Produce a minimal, correct PROV-O Turtle sidecar for a [`ProvenanceMark`].
681///
682/// The mark is modelled as a `prov:Activity` (the write) that
683/// `prov:generated` the resource entity, was performed by the agent
684/// (`prov:wasAssociatedWith`), and is identified by its git commit SHA. The
685/// resource entity records `prov:wasGeneratedBy` the activity. When a
686/// block-trail anchor is present it is emitted as an associated entity bearing
687/// the txid/state-hash so the sidecar carries both tiers.
688///
689/// Kept deliberately small: stable prefix block, one activity, one entity, one
690/// agent, optional anchor entity. Round-trip-safe with the unit tests below.
691pub fn prov_ttl(mark: &ProvenanceMark) -> String {
692    let sha = &mark.git.commit_sha;
693    let resource = ttl_escape(&mark.resource);
694    let agent = ttl_escape(&mark.agent_did);
695    let branch = ttl_escape(&mark.git.branch);
696    let repo = ttl_escape(&mark.git.repo);
697    let when = xsd_datetime(mark.created);
698
699    let mut ttl = String::new();
700    ttl.push_str("@prefix prov: <http://www.w3.org/ns/prov#> .\n");
701    ttl.push_str("@prefix xsd:  <http://www.w3.org/2001/XMLSchema#> .\n");
702    ttl.push_str("@prefix git:  <https://w3id.org/git#> .\n");
703    ttl.push_str("@prefix bt:   <https://blocktrails.org/ns#> .\n\n");
704
705    // Activity: the write, identified by the commit it produced.
706    ttl.push_str(&format!("<urn:git:commit:{sha}> a prov:Activity ;\n"));
707    ttl.push_str(&format!("    prov:generated <{resource}> ;\n"));
708    ttl.push_str(&format!("    prov:wasAssociatedWith <{agent}> ;\n"));
709    ttl.push_str(&format!("    prov:endedAtTime \"{when}\"^^xsd:dateTime ;\n"));
710    ttl.push_str(&format!("    git:commit \"{sha}\" ;\n"));
711    ttl.push_str(&format!("    git:branch \"{branch}\" ;\n"));
712    ttl.push_str(&format!("    git:repo \"{repo}\" "));
713    if let Some(parent) = &mark.git.parent {
714        let parent = ttl_escape(parent);
715        ttl.push_str(&format!(";\n    git:parent \"{parent}\" .\n"));
716    } else {
717        ttl.push_str(".\n");
718    }
719
720    // Entity: the generated resource.
721    ttl.push('\n');
722    ttl.push_str(&format!("<{resource}> a prov:Entity ;\n"));
723    ttl.push_str(&format!(
724        "    prov:wasGeneratedBy <urn:git:commit:{sha}> ;\n"
725    ));
726    ttl.push_str(&format!(
727        "    prov:wasAttributedTo <{agent}> .\n"
728    ));
729
730    // Agent.
731    ttl.push('\n');
732    ttl.push_str(&format!("<{agent}> a prov:Agent .\n"));
733
734    // Optional anchor entity (expensive tier).
735    if let Some(a) = &mark.anchor {
736        let txid = ttl_escape(&a.txid);
737        let ticker = ttl_escape(&a.ticker);
738        let state_hash = ttl_escape(&a.state_hash);
739        let network = ttl_escape(&a.network);
740        ttl.push('\n');
741        ttl.push_str(&format!("<urn:bt:tx:{txid}:{}> a prov:Entity ;\n", a.vout));
742        ttl.push_str(&format!(
743            "    prov:wasDerivedFrom <urn:git:commit:{sha}> ;\n"
744        ));
745        ttl.push_str(&format!("    bt:ticker \"{ticker}\" ;\n"));
746        ttl.push_str(&format!("    bt:stateHash \"{state_hash}\" ;\n"));
747        ttl.push_str(&format!("    bt:network \"{network}\" ;\n"));
748        ttl.push_str(&format!("    bt:txid \"{txid}\" ;\n"));
749        ttl.push_str(&format!("    bt:vout \"{}\"^^xsd:integer ", a.vout));
750        if let Some(h) = a.blockheight {
751            ttl.push_str(&format!(";\n    bt:blockheight \"{h}\"^^xsd:integer .\n"));
752        } else {
753            ttl.push_str(".\n");
754        }
755    }
756
757    ttl
758}
759
760// ---------------------------------------------------------------------------
761// Tests
762// ---------------------------------------------------------------------------
763
764#[cfg(test)]
765mod tests {
766    use super::*;
767
768    fn sample_git() -> GitMark {
769        GitMark {
770            commit_sha: "a1b2c3d4e5f60718293a4b5c6d7e8f9001122334".into(),
771            repo: "deadbeef".into(),
772            branch: "main".into(),
773            parent: Some("00112233445566778899aabbccddeeff00112233".into()),
774        }
775    }
776
777    fn sample_mark() -> ProvenanceMark {
778        ProvenanceMark {
779            resource: "/notes/hello.ttl".into(),
780            git: sample_git(),
781            anchor: None,
782            agent_did: "did:nostr:abcdef".into(),
783            created: 1_750_000_000,
784        }
785    }
786
787    #[test]
788    fn git_mark_round_trips() {
789        let g = sample_git();
790        let json = serde_json::to_string(&g).unwrap();
791        let back: GitMark = serde_json::from_str(&json).unwrap();
792        assert_eq!(g, back);
793    }
794
795    #[test]
796    fn provenance_mark_round_trips_without_anchor() {
797        let m = sample_mark();
798        let json = serde_json::to_string(&m).unwrap();
799        // `anchor: None` must be omitted by skip_serializing_if.
800        assert!(!json.contains("anchor"));
801        let back: ProvenanceMark = serde_json::from_str(&json).unwrap();
802        assert_eq!(m, back);
803    }
804
805    #[test]
806    fn provenance_mark_round_trips_with_anchor() {
807        let mut m = sample_mark();
808        m.anchor = Some(BlockTrailAnchor {
809            ticker: "PROV".into(),
810            state_hash: "ff".repeat(32),
811            txid: "ab".repeat(32),
812            vout: 1,
813            address: "tb1pexample".into(),
814            network: "testnet4".into(),
815            blockheight: Some(840_000),
816            state_strings: vec!["{\"seq\":0}".into(), "{\"seq\":1}".into()],
817            pubkey: Some("02".to_string() + &"ab".repeat(32)),
818        });
819        let json = serde_json::to_string(&m).unwrap();
820        let back: ProvenanceMark = serde_json::from_str(&json).unwrap();
821        assert_eq!(m, back);
822    }
823
824    #[test]
825    fn block_trail_anchor_defaults_state_strings() {
826        // state_strings missing in JSON must deserialise to an empty vec.
827        let json = r#"{
828            "ticker":"PROV","state_hash":"00","txid":"00","vout":0,
829            "address":"tb1p","network":"testnet4"
830        }"#;
831        let a: BlockTrailAnchor = serde_json::from_str(json).unwrap();
832        assert!(a.state_strings.is_empty());
833        assert!(a.blockheight.is_none());
834    }
835
836    #[test]
837    fn prov_ttl_contains_core_triples() {
838        let ttl = prov_ttl(&sample_mark());
839        assert!(ttl.contains("@prefix prov: <http://www.w3.org/ns/prov#> ."));
840        assert!(ttl.contains("a prov:Activity"));
841        assert!(ttl.contains("prov:wasGeneratedBy"));
842        assert!(ttl.contains("prov:wasAssociatedWith <did:nostr:abcdef>"));
843        assert!(ttl.contains("a prov:Agent"));
844        // Commit sha appears as the activity id + git:commit literal.
845        assert!(ttl.contains("<urn:git:commit:a1b2c3d4e5f60718293a4b5c6d7e8f9001122334>"));
846        assert!(ttl.contains("git:commit \"a1b2c3d4e5f60718293a4b5c6d7e8f9001122334\""));
847        assert!(ttl.contains("git:branch \"main\""));
848        assert!(ttl.contains("git:parent \"00112233445566778899aabbccddeeff00112233\""));
849        // The generated entity is the resource.
850        assert!(ttl.contains("<urn:git:commit:a1b2c3d4e5f60718293a4b5c6d7e8f9001122334> a prov:Activity"));
851        assert!(ttl.contains("prov:generated </notes/hello.ttl>"));
852    }
853
854    #[test]
855    fn prov_ttl_omits_parent_when_absent() {
856        let mut m = sample_mark();
857        m.git.parent = None;
858        let ttl = prov_ttl(&m);
859        assert!(!ttl.contains("git:parent"));
860        // Must still be a well-terminated activity block.
861        assert!(ttl.contains("git:repo \"deadbeef\" .\n"));
862    }
863
864    #[test]
865    fn prov_ttl_emits_anchor_block_when_present() {
866        let mut m = sample_mark();
867        m.anchor = Some(BlockTrailAnchor {
868            ticker: "PROV".into(),
869            state_hash: "deadbeef".into(),
870            txid: "cafebabe".into(),
871            vout: 2,
872            address: "tb1pexample".into(),
873            network: "testnet4".into(),
874            blockheight: Some(840_000),
875            state_strings: vec![],
876            pubkey: None,
877        });
878        let ttl = prov_ttl(&m);
879        assert!(ttl.contains("<urn:bt:tx:cafebabe:2> a prov:Entity"));
880        assert!(ttl.contains("bt:ticker \"PROV\""));
881        assert!(ttl.contains("bt:stateHash \"deadbeef\""));
882        assert!(ttl.contains("bt:blockheight \"840000\"^^xsd:integer"));
883        assert!(ttl.contains("prov:wasDerivedFrom <urn:git:commit:"));
884    }
885
886    #[test]
887    fn prov_ttl_escapes_quotes_and_backslashes() {
888        let mut m = sample_mark();
889        m.agent_did = "did:nostr:\"weird\\did".into();
890        let ttl = prov_ttl(&m);
891        // The raw quote/backslash must be escaped inside the literal.
892        assert!(ttl.contains("did:nostr:\\\"weird\\\\did"));
893    }
894
895    #[test]
896    fn xsd_datetime_known_epoch() {
897        // 1_750_000_000 == 2025-06-15T15:06:40Z (verified against `date -u -d @1750000000`).
898        assert_eq!(xsd_datetime(1_750_000_000), "2025-06-15T15:06:40Z");
899        // Unix epoch.
900        assert_eq!(xsd_datetime(0), "1970-01-01T00:00:00Z");
901    }
902
903    #[test]
904    fn provenance_error_display() {
905        assert_eq!(
906            ProvenanceError::Git("boom".into()).to_string(),
907            "git-mark: boom"
908        );
909        assert_eq!(
910            ProvenanceError::InvalidPath("/x.acl".into()).to_string(),
911            "invalid provenance path: /x.acl"
912        );
913    }
914
915    // -----------------------------------------------------------------------
916    // Phase 5: composition (AnchorPolicy + ProvenanceLog) — pure, mocked tiers
917    // -----------------------------------------------------------------------
918
919    use std::sync::atomic::{AtomicUsize, Ordering};
920
921    /// In-memory [`GitMarker`] — fabricates a deterministic SHA per call and
922    /// counts invocations. No subprocess, so it compiles + runs on wasm too.
923    #[derive(Default)]
924    struct MockMarker {
925        calls: AtomicUsize,
926    }
927    #[async_trait::async_trait(?Send)]
928    impl GitMarker for MockMarker {
929        async fn mark_write(
930            &self,
931            _repo: &Path,
932            path: &str,
933            _agent_did: &str,
934            _message: &str,
935        ) -> Result<GitMark, ProvenanceError> {
936            let n = self.calls.fetch_add(1, Ordering::SeqCst);
937            // 40-hex deterministic SHA derived from the call ordinal + path.
938            let sha = hex::encode(Sha256::digest(format!("{n}:{path}").as_bytes()))[..40].to_string();
939            Ok(GitMark {
940                commit_sha: sha,
941                repo: "mockpod".into(),
942                branch: "main".into(),
943                parent: None,
944            })
945        }
946        async fn head(&self, _repo: &Path) -> Result<Option<String>, ProvenanceError> {
947            Ok(None)
948        }
949    }
950
951    /// In-memory [`BlockAnchorer`] — records the `state_hash` it was asked to
952    /// anchor (so a test can assert the git SHA was bound) and counts calls.
953    #[derive(Default)]
954    struct MockAnchorer {
955        calls: AtomicUsize,
956        last_state_hash: std::sync::Mutex<Option<String>>,
957    }
958    #[async_trait::async_trait(?Send)]
959    impl BlockAnchorer for MockAnchorer {
960        async fn anchor(
961            &self,
962            ticker: &str,
963            state_hash: &str,
964            network: &str,
965        ) -> Result<BlockTrailAnchor, ProvenanceError> {
966            self.calls.fetch_add(1, Ordering::SeqCst);
967            *self.last_state_hash.lock().unwrap() = Some(state_hash.to_string());
968            Ok(BlockTrailAnchor {
969                ticker: ticker.into(),
970                state_hash: state_hash.into(),
971                txid: "ab".repeat(32),
972                vout: 0,
973                address: "tb1pmock".into(),
974                network: network.into(),
975                blockheight: None,
976                state_strings: vec!["{\"seq\":0}".into()],
977                pubkey: Some("02".to_string() + &"ab".repeat(32)),
978            })
979        }
980        async fn verify(&self, _anchor: &BlockTrailAnchor) -> Result<bool, ProvenanceError> {
981            Ok(true)
982        }
983    }
984
985    fn repo() -> &'static Path {
986        Path::new("/tmp/mockpod")
987    }
988
989    /// Build a [`WriteRecord`] for the mock tiers (PROV trail on testnet4).
990    fn rec<'a>(
991        path: &'a str,
992        policy: AnchorPolicy,
993        high_value: bool,
994        created: u64,
995    ) -> WriteRecord<'a> {
996        WriteRecord {
997            repo: repo(),
998            path,
999            agent_did: "did:nostr:a",
1000            message: "PUT",
1001            policy,
1002            high_value,
1003            ticker: "PROV",
1004            network: "testnet4",
1005            created,
1006        }
1007    }
1008
1009    #[test]
1010    fn anchor_policy_inline_matrix() {
1011        assert!(!AnchorPolicy::Never.anchors_inline(true));
1012        assert!(!AnchorPolicy::Never.anchors_inline(false));
1013        assert!(AnchorPolicy::Always.anchors_inline(false));
1014        assert!(AnchorPolicy::Always.anchors_inline(true));
1015        assert!(AnchorPolicy::HighValue.anchors_inline(true));
1016        assert!(!AnchorPolicy::HighValue.anchors_inline(false));
1017        // Epoch never anchors inline — it defers to the accumulator.
1018        assert!(!AnchorPolicy::Epoch.anchors_inline(true));
1019        assert_eq!(AnchorPolicy::default(), AnchorPolicy::Never);
1020    }
1021
1022    #[tokio::test]
1023    async fn record_cheap_write_is_git_mark_only() {
1024        // Never policy ⇒ git-mark only, no anchor, anchorer untouched.
1025        let marker = Arc::new(MockMarker::default());
1026        let anchorer = Arc::new(MockAnchorer::default());
1027        let log = ProvenanceLog::with_anchorer(marker.clone(), anchorer.clone());
1028        let mark = log
1029            .record(rec("notes/a.ttl", AnchorPolicy::Never, false, 1_750_000_000))
1030            .await
1031            .unwrap();
1032        assert!(mark.anchor.is_none(), "cheap write must carry no anchor");
1033        assert_eq!(mark.resource, "/notes/a.ttl");
1034        assert_eq!(marker.calls.load(Ordering::SeqCst), 1, "git-mark always runs");
1035        assert_eq!(anchorer.calls.load(Ordering::SeqCst), 0, "anchorer must NOT be called");
1036    }
1037
1038    #[tokio::test]
1039    async fn record_high_value_write_carries_git_mark_and_anchor() {
1040        // HighValue + high_value=true ⇒ BOTH tiers present, and the anchor's
1041        // state_hash IS the git commit SHA (the two tiers are bound).
1042        let marker = Arc::new(MockMarker::default());
1043        let anchorer = Arc::new(MockAnchorer::default());
1044        let log = ProvenanceLog::with_anchorer(marker.clone(), anchorer.clone());
1045        let mark = log
1046            .record(rec("receipts/r1.ttl", AnchorPolicy::HighValue, true, 1_750_000_000))
1047            .await
1048            .unwrap();
1049        let anchor = mark.anchor.expect("high-value write must carry an anchor");
1050        assert_eq!(
1051            anchor.state_hash, mark.git.commit_sha,
1052            "anchor must commit to the git SHA (binds the two tiers — §2.3)"
1053        );
1054        assert_eq!(anchorer.calls.load(Ordering::SeqCst), 1);
1055        assert_eq!(
1056            anchorer.last_state_hash.lock().unwrap().as_deref(),
1057            Some(mark.git.commit_sha.as_str())
1058        );
1059    }
1060
1061    #[tokio::test]
1062    async fn record_high_value_flag_false_is_git_only() {
1063        // HighValue policy but the resource is NOT flagged ⇒ git-mark only.
1064        let marker = Arc::new(MockMarker::default());
1065        let anchorer = Arc::new(MockAnchorer::default());
1066        let log = ProvenanceLog::with_anchorer(marker, anchorer.clone());
1067        let mark = log
1068            .record(rec("notes/x.ttl", AnchorPolicy::HighValue, false, 1))
1069            .await
1070            .unwrap();
1071        assert!(mark.anchor.is_none());
1072        assert_eq!(anchorer.calls.load(Ordering::SeqCst), 0);
1073    }
1074
1075    #[tokio::test]
1076    async fn record_always_anchors_every_write() {
1077        let marker = Arc::new(MockMarker::default());
1078        let anchorer = Arc::new(MockAnchorer::default());
1079        let log = ProvenanceLog::with_anchorer(marker, anchorer.clone());
1080        for i in 0..3 {
1081            let m = log
1082                .record(rec(&format!("s/{i}.ttl"), AnchorPolicy::Always, false, 1))
1083                .await
1084                .unwrap();
1085            assert!(m.anchor.is_some());
1086        }
1087        assert_eq!(anchorer.calls.load(Ordering::SeqCst), 3);
1088    }
1089
1090    #[tokio::test]
1091    async fn record_without_anchorer_degrades_to_git_only() {
1092        // No anchorer (the wasm / no-Bitcoin pod): even Always degrades to
1093        // git-mark only, silently.
1094        let marker = Arc::new(MockMarker::default());
1095        let log = ProvenanceLog::new(marker.clone());
1096        assert!(log.anchorer.is_none());
1097        let mark = log
1098            .record(rec("notes/a.ttl", AnchorPolicy::Always, true, 1))
1099            .await
1100            .unwrap();
1101        assert!(mark.anchor.is_none(), "no anchorer ⇒ no anchor regardless of policy");
1102        assert_eq!(marker.calls.load(Ordering::SeqCst), 1);
1103    }
1104
1105    #[tokio::test]
1106    async fn record_epoch_defers_anchoring_to_accumulator() {
1107        // Epoch policy: record() never anchors inline; the caller batches the
1108        // SHA and anchors the root once on epoch close.
1109        let marker = Arc::new(MockMarker::default());
1110        let anchorer = Arc::new(MockAnchorer::default());
1111        let log = ProvenanceLog::with_anchorer(marker, anchorer.clone());
1112
1113        let mut epoch = EpochAccumulator::new(3);
1114        let mut shas = Vec::new();
1115        for i in 0..3 {
1116            let m = log
1117                .record(rec(&format!("e/{i}.ttl"), AnchorPolicy::Epoch, true, 1))
1118                .await
1119                .unwrap();
1120            assert!(m.anchor.is_none(), "epoch writes never anchor inline");
1121            epoch.push(m.git.commit_sha.clone());
1122            shas.push(m.git.commit_sha);
1123        }
1124        // No per-write anchors happened.
1125        assert_eq!(anchorer.calls.load(Ordering::SeqCst), 0);
1126
1127        // Epoch is full → close → ONE anchor for the whole batch root.
1128        assert!(epoch.is_full());
1129        let closed = epoch.close().expect("non-empty epoch closes");
1130        assert_eq!(closed.commits, shas);
1131        let anchor = anchorer.anchor("PROV", &closed.root, "testnet4").await.unwrap();
1132        assert_eq!(anchorer.calls.load(Ordering::SeqCst), 1, "ONE anchor notarises N commits");
1133        assert_eq!(anchor.state_hash, closed.root);
1134        // Epoch drained — a fresh epoch begins.
1135        assert!(epoch.is_empty());
1136    }
1137
1138    // ── Merkle tree: root determinism + inclusion proofs ──────────────────
1139
1140    #[test]
1141    fn merkle_root_empty_and_single() {
1142        assert_eq!(merkle_root(&[]), [0u8; 32]);
1143        let leaf = merkle_leaf("deadbeef");
1144        // A single leaf is its own root.
1145        assert_eq!(merkle_root(&[leaf]), leaf);
1146    }
1147
1148    #[test]
1149    fn merkle_root_is_deterministic_and_order_sensitive() {
1150        let a = merkle_leaf("aaa");
1151        let b = merkle_leaf("bbb");
1152        let r1 = merkle_root(&[a, b]);
1153        let r2 = merkle_root(&[a, b]);
1154        assert_eq!(r1, r2, "deterministic");
1155        let swapped = merkle_root(&[b, a]);
1156        assert_ne!(r1, swapped, "leaf order changes the root");
1157    }
1158
1159    #[test]
1160    fn epoch_root_matches_close_root() {
1161        let mut e = EpochAccumulator::new(10);
1162        for i in 0..5 {
1163            e.push(format!("commit{i:040}"));
1164        }
1165        let peeked = e.root().unwrap();
1166        let closed = e.close().unwrap();
1167        assert_eq!(peeked, closed.root, "root() peek == close() root");
1168    }
1169
1170    #[test]
1171    fn epoch_inclusion_proof_verifies_for_every_leaf() {
1172        // N commits → one root → each commit's inclusion proof verifies.
1173        let n = 7; // odd, to exercise last-node duplication
1174        let mut e = EpochAccumulator::new(n);
1175        for i in 0..n {
1176            e.push(format!("c{i:039}")); // 40-char commit-like ids
1177        }
1178        let root = e.root().unwrap();
1179        for i in 0..n {
1180            let proof = e.inclusion_proof(i).expect("proof for in-range leaf");
1181            assert!(
1182                EpochAccumulator::verify_inclusion(&proof, &root),
1183                "leaf {i} must verify against the anchored root"
1184            );
1185        }
1186        // Out-of-range index → no proof.
1187        assert!(e.inclusion_proof(n).is_none());
1188    }
1189
1190    #[test]
1191    fn epoch_inclusion_proof_rejects_wrong_root_and_tampered_leaf() {
1192        let mut e = EpochAccumulator::new(4);
1193        for i in 0..4 {
1194            e.push(format!("c{i:039}"));
1195        }
1196        let root = e.root().unwrap();
1197        let mut proof = e.inclusion_proof(1).unwrap();
1198        // Wrong root → reject.
1199        assert!(!EpochAccumulator::verify_inclusion(&proof, &"00".repeat(32)));
1200        // Tampered leaf → reject against the genuine root.
1201        proof.leaf = hex::encode(merkle_leaf("forged"));
1202        assert!(!EpochAccumulator::verify_inclusion(&proof, &root));
1203    }
1204
1205    #[test]
1206    fn epoch_threshold_and_len_tracking() {
1207        let mut e = EpochAccumulator::new(2);
1208        assert_eq!(e.threshold(), 2);
1209        assert!(e.is_empty() && !e.is_full());
1210        e.push("a");
1211        assert_eq!(e.len(), 1);
1212        assert!(!e.is_full());
1213        e.push("b");
1214        assert!(e.is_full(), "reaching threshold ⇒ full");
1215        // Threshold clamps to ≥ 1.
1216        assert_eq!(EpochAccumulator::new(0).threshold(), 1);
1217    }
1218
1219    #[test]
1220    fn empty_epoch_close_is_none() {
1221        let mut e = EpochAccumulator::new(3);
1222        assert!(e.close().is_none());
1223        assert!(e.root().is_none());
1224    }
1225
1226    #[test]
1227    fn merkle_proof_round_trips() {
1228        let p = MerkleProof {
1229            leaf: hex::encode(merkle_leaf("x")),
1230            siblings: vec![("ab".repeat(32), true), ("cd".repeat(32), false)],
1231        };
1232        let json = serde_json::to_string(&p).unwrap();
1233        let back: MerkleProof = serde_json::from_str(&json).unwrap();
1234        assert_eq!(p, back);
1235    }
1236
1237    #[test]
1238    fn anchor_policy_round_trips() {
1239        for p in [AnchorPolicy::Never, AnchorPolicy::Always, AnchorPolicy::HighValue, AnchorPolicy::Epoch] {
1240            let json = serde_json::to_string(&p).unwrap();
1241            let back: AnchorPolicy = serde_json::from_str(&json).unwrap();
1242            assert_eq!(p, back);
1243        }
1244    }
1245}