Skip to main content

solid_pod_rs/
provenance.rs

1//! Provenance primitives — composable, cost-tiered traceability for pod writes.
2//!
3//! Implements the data model and traits from
4//! [`docs/design/provenance-upgrade-master-plan.md`](../../docs/design/provenance-upgrade-master-plan.md)
5//! §2 and [ADR-059](../../docs/adr/ADR-059-provenance-primitives-block-trails-git-marks.md)
6//! (D1, D2, D4, D6). Two tiers compose into one chain:
7//!
8//! - **git-mark** (cheap, always-on): every pod write becomes a git commit;
9//!   the commit SHA is captured as a [`GitMark`]. Content-addressed,
10//!   append-only, tamper-evident ordering for free. The native implementation
11//!   of [`GitMarker`] lives in `solid-pod-rs-git::mark` (it shells to `git`);
12//!   wasm consumers compile against a no-op marker.
13//! - **block-trail anchor** (expensive, opt-in): a Bitcoin-anchored MRC20 state
14//!   whose taproot UTXO externally timestamps a record ([`BlockTrailAnchor`]).
15//!   Reserved for high-value records. The [`BlockAnchorer`] trait is defined
16//!   here; a real implementation lands in Phase 4 (`bitcoin_tx.rs` + mempool).
17//!
18//! A [`ProvenanceMark`] always carries a [`GitMark`] and *optionally* a
19//! [`BlockTrailAnchor`]. The anchor's `state_hash` commits to the git SHA (or an
20//! epoch Merkle root over many commits), binding both tiers into one chain.
21//!
22//! ## wasm32 safety
23//!
24//! Everything in this module — the types and [`prov_ttl`] — is pure logic and
25//! compiles for `wasm32-unknown-unknown`. The traits are `?Send` (matching the
26//! crate's existing [`crate::payments::PaymentStore`] pattern) so a wasm
27//! single-threaded executor can implement them. No `tokio`, no process spawning,
28//! no I/O leaks into this surface.
29
30use std::path::Path;
31use std::sync::Arc;
32
33use serde::{Deserialize, Serialize};
34use sha2::{Digest, Sha256};
35
36// ---------------------------------------------------------------------------
37// Data model (§2.1)
38// ---------------------------------------------------------------------------
39
40/// A provenance mark over a pod resource write.
41///
42/// Always carries a git commit ([`GitMark`]); optionally upgraded with a
43/// Bitcoin block-trail anchor ([`BlockTrailAnchor`]) for high-value records.
44#[derive(Debug, Clone, PartialEq, Eq, Serialize, Deserialize)]
45pub struct ProvenanceMark {
46    /// Pod-relative path of the resource the write targeted.
47    pub resource: String,
48    /// The git commit the write produced — **always present** (cheap tier).
49    pub git: GitMark,
50    /// Optional Bitcoin block-trail anchor — **opt-in** (expensive tier).
51    #[serde(default, skip_serializing_if = "Option::is_none")]
52    pub anchor: Option<BlockTrailAnchor>,
53    /// `did:nostr` of the writer (NIP-98 authenticated principal), or an
54    /// anonymous marker when the write was unauthenticated.
55    pub agent_did: String,
56    /// Unix seconds at which the mark was produced.
57    pub created: u64,
58}
59
60/// The cheap-tier git commit captured for a pod write.
61#[derive(Debug, Clone, PartialEq, Eq, Serialize, Deserialize)]
62pub struct GitMark {
63    /// Git SHA-1 of the commit the write produced.
64    pub commit_sha: String,
65    /// Pod repo slug (the pod's first path segment / pubkey).
66    pub repo: String,
67    /// Branch the commit landed on. Pinned to `"main"` by `init.rs`.
68    pub branch: String,
69    /// Prior commit SHA (the append-only chain link), or `None` for the
70    /// genesis commit of a freshly-initialised repo.
71    #[serde(default, skip_serializing_if = "Option::is_none")]
72    pub parent: Option<String>,
73}
74
75/// The on-disk `gitmark.json` envelope — the Carvalho-lineage substrate
76/// shape (ADR-124, C7), verified byte-for-byte against
77/// `microfed/gitmark.json`.
78///
79/// **Exactly five keys**: `@id`, `genesis`, `nick`, `package`, `repository`.
80/// `@context`/`@type`/`commit`/`parent` are deliberately ABSENT — they are
81/// not in the ground-truth file (parent-linkage lives in `blocktrails.json`
82/// `states[]`/`txo[]`, not here). For byte-parity with create-agent, emit
83/// only these five keys.
84///
85/// `@id` is `gitmark:<commit_sha>:<vout>`; `genesis` is
86/// `gitmark:<first-commit-sha>:0` (and equals `@id` for the first mark).
87/// This is a *projection* of the internal [`GitMark`] — the in-memory
88/// `{commit_sha, repo, branch, parent}` shape (used by the PROV-O sidecar +
89/// the composition log) is unchanged; only the on-disk substrate file
90/// adopts this shape.
91#[derive(Debug, Clone, PartialEq, Eq, Serialize, Deserialize)]
92pub struct GitMarkEnvelope {
93    /// `gitmark:<commit_sha>:<vout>` — the single-use-seal coordinate.
94    #[serde(rename = "@id")]
95    pub id: String,
96    /// `gitmark:<first-commit-sha>:0` — the trail's genesis seal.
97    pub genesis: String,
98    /// Short human name for the mark (e.g. the package/pod nickname).
99    pub nick: String,
100    /// Pod-relative package path (e.g. `./package.json`).
101    pub package: String,
102    /// Repo-relative root (e.g. `./`).
103    pub repository: String,
104}
105
106impl GitMark {
107    /// Project this internal [`GitMark`] onto the canonical 5-key
108    /// `gitmark.json` envelope (ADR-124, C7).
109    ///
110    /// - `vout` is the seal output index for this mark's `@id`
111    ///   (`gitmark:<commit_sha>:<vout>`).
112    /// - `genesis_sha` is the trail's first commit SHA; pass this mark's own
113    ///   `commit_sha` for the genesis mark (then `genesis == @id`).
114    /// - `nick`/`package` are the additive projection fields; `repository`
115    ///   defaults to `./` (repo-relative root, matching the ground truth).
116    ///
117    /// `branch`/`parent` are intentionally NOT emitted — they are not part of
118    /// the on-disk envelope. Parent-linkage is carried by `blocktrails.json`.
119    #[must_use]
120    pub fn to_gitmark_envelope(
121        &self,
122        vout: u32,
123        genesis_sha: &str,
124        nick: impl Into<String>,
125        package: impl Into<String>,
126    ) -> GitMarkEnvelope {
127        GitMarkEnvelope {
128            id: format!("gitmark:{}:{vout}", self.commit_sha),
129            genesis: format!("gitmark:{genesis_sha}:0"),
130            nick: nick.into(),
131            package: package.into(),
132            repository: "./".to_string(),
133        }
134    }
135
136    /// Serialise this mark to the canonical `gitmark.json` text (5-key
137    /// envelope, ADR-124). Convenience over [`Self::to_gitmark_envelope`] +
138    /// `serde_json::to_string_pretty`.
139    pub fn to_gitmark_json(
140        &self,
141        vout: u32,
142        genesis_sha: &str,
143        nick: impl Into<String>,
144        package: impl Into<String>,
145    ) -> Result<String, ProvenanceError> {
146        let env = self.to_gitmark_envelope(vout, genesis_sha, nick, package);
147        serde_json::to_string_pretty(&env)
148            .map_err(|e| ProvenanceError::Store(format!("gitmark.json serialise: {e}")))
149    }
150}
151
152/// The expensive-tier Bitcoin anchor for a record.
153///
154/// Reuses the existing [`crate::mrc20`] crypto (`Mrc20State`, `bt_address`,
155/// `verify_mrc20_anchor`) — no crypto is re-implemented here. The
156/// `state_strings` carry the portable, independently-verifiable proof.
157#[derive(Debug, Clone, PartialEq, Eq, Serialize, Deserialize)]
158pub struct BlockTrailAnchor {
159    /// Trail ticker / identifier.
160    pub ticker: String,
161    /// `sha256_hex(jcs(state))` — links into the MRC20 trail and commits to
162    /// the git SHA (or an epoch Merkle root).
163    pub state_hash: String,
164    /// Bitcoin transaction id of the anchoring UTXO.
165    pub txid: String,
166    /// Output index of the anchoring UTXO.
167    pub vout: u32,
168    /// Derived P2TR address (`mrc20::bt_address`).
169    pub address: String,
170    /// `"testnet4"` | `"mainnet"` (or any network the operator configures).
171    pub network: String,
172    /// Confirmation height; `None` until the anchoring tx confirms.
173    #[serde(default, skip_serializing_if = "Option::is_none")]
174    pub blockheight: Option<u64>,
175    /// Portable, independently-verifiable proof — the serialised states.
176    #[serde(default)]
177    pub state_strings: Vec<String>,
178    /// Issuer's compressed pubkey (66-char hex). Together with
179    /// `state_strings` it re-derives the taproot `address` via
180    /// `mrc20::bt_address` — the read-side check
181    /// ([`BlockAnchorer::verify`](crate::provenance::BlockAnchorer::verify))
182    /// needs it to confirm `address` was not forged. `None` on legacy /
183    /// partially-populated anchors (verify then has nothing to re-derive
184    /// against and reports `false`).
185    #[serde(default, skip_serializing_if = "Option::is_none")]
186    pub pubkey: Option<String>,
187}
188
189/// A single UTXO step in a [`BlocktrailEnvelope`] `txo[]` chain — the
190/// BIP-341 single-use-seal coordinate (`<txid>:<vout>`) plus its
191/// confirmation status.
192#[derive(Debug, Clone, PartialEq, Eq, Serialize, Deserialize)]
193pub struct BlocktrailTxo {
194    /// `<txid>:<vout>` — the seal output this state was sealed under.
195    pub outpoint: String,
196    /// Confirmation height; `None` until the sealing tx confirms.
197    #[serde(default, skip_serializing_if = "Option::is_none")]
198    pub blockheight: Option<u64>,
199}
200
201/// The on-disk `blocktrails.json` envelope — the 4th web-contract layer
202/// (ADR-124 §2.2). The `gitmark.json` substrate anchors the reducer/state/
203/// ledger layers; `blocktrails.json` is the **trail** layer.
204///
205/// Per the reconciliation (C6): this shape is reconstructed from the
206/// `webcontracts.org` / "Melvo Predicts" reference pattern — it is NOT
207/// byte-verifiable against a fetchable create-agent artefact (the
208/// create-agent repo contains only `gitmark.json`). Only `gitmark.json` is
209/// "verbatim".
210///
211/// Shape: `@type "Blocktrail"`, `profile "gitmark"` (the anchoring
212/// substrate), a BIP-341 single-use-seal chain expressed as `states[]`
213/// (the commit SHAs — the ledger states, in order) paired with `txo[]` (the
214/// UTXO chain that seals them). `genesis` ties back to the `gitmark.json`
215/// genesis seal.
216#[derive(Debug, Clone, PartialEq, Eq, Serialize, Deserialize)]
217pub struct BlocktrailEnvelope {
218    /// `gitmark:<first-commit-sha>:0` — the genesis seal (shared with the
219    /// `gitmark.json` `genesis`).
220    #[serde(rename = "@id")]
221    pub id: String,
222    /// Always `"Blocktrail"`.
223    #[serde(rename = "@type")]
224    pub type_: String,
225    /// Anchoring substrate profile — always `"gitmark"` here.
226    pub profile: String,
227    /// `gitmark:<first-commit-sha>:0`.
228    pub genesis: String,
229    /// The ledger states in order — commit SHAs the trail notarises.
230    pub states: Vec<String>,
231    /// The BIP-341 single-use-seal UTXO chain sealing `states[]` (1:1 order).
232    pub txo: Vec<BlocktrailTxo>,
233}
234
235impl BlocktrailEnvelope {
236    /// Build a `Blocktrail` over an ordered set of commit SHAs (`states`)
237    /// and their sealing UTXO chain (`txo`), profiled on the `gitmark`
238    /// substrate (ADR-124 §2.2, C6 reference shape).
239    ///
240    /// `genesis_sha` is the trail's first commit SHA; the envelope's `@id`
241    /// and `genesis` are both `gitmark:<genesis_sha>:0`.
242    #[must_use]
243    pub fn new_gitmark_profile(
244        genesis_sha: &str,
245        states: Vec<String>,
246        txo: Vec<BlocktrailTxo>,
247    ) -> Self {
248        let genesis = format!("gitmark:{genesis_sha}:0");
249        Self {
250            id: genesis.clone(),
251            type_: "Blocktrail".to_string(),
252            profile: "gitmark".to_string(),
253            genesis,
254            states,
255            txo,
256        }
257    }
258
259    /// Serialise to the canonical `blocktrails.json` text.
260    pub fn to_blocktrails_json(&self) -> Result<String, ProvenanceError> {
261        serde_json::to_string_pretty(self)
262            .map_err(|e| ProvenanceError::Store(format!("blocktrails.json serialise: {e}")))
263    }
264}
265
266// ---------------------------------------------------------------------------
267// Errors
268// ---------------------------------------------------------------------------
269
270/// Failures surfaced by the provenance primitives.
271///
272/// Hand-rolled (no `thiserror` derive) so the type compiles on `wasm32`
273/// without pulling proc-macro evaluation into the pure surface; the variants
274/// mirror the crate's error-message style.
275#[derive(Debug, Clone, PartialEq, Eq)]
276pub enum ProvenanceError {
277    /// The underlying git operation failed (spawn, commit, rev-parse, …).
278    Git(String),
279    /// The Bitcoin anchor operation failed (mempool, tx-build, verify, …).
280    Anchor(String),
281    /// The resource path was rejected (traversal, sidecar suffix, …).
282    InvalidPath(String),
283    /// Persisting or emitting the mark failed.
284    Store(String),
285}
286
287impl std::fmt::Display for ProvenanceError {
288    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
289        match self {
290            ProvenanceError::Git(m) => write!(f, "git-mark: {m}"),
291            ProvenanceError::Anchor(m) => write!(f, "block-anchor: {m}"),
292            ProvenanceError::InvalidPath(m) => write!(f, "invalid provenance path: {m}"),
293            ProvenanceError::Store(m) => write!(f, "provenance store: {m}"),
294        }
295    }
296}
297
298impl std::error::Error for ProvenanceError {}
299
300// ---------------------------------------------------------------------------
301// Traits (§2.2)
302// ---------------------------------------------------------------------------
303
304/// Cheap tier. Implemented by `solid-pod-rs-git` (shells to `git`).
305///
306/// `?Send` for wasm32-safety, matching the crate's [`crate::payments::PaymentStore`]
307/// pattern. The wasm `core` consumer compiles against a no-op marker.
308#[async_trait::async_trait(?Send)]
309pub trait GitMarker: Send + Sync {
310    /// Stage `path` and commit it, returning the resulting [`GitMark`].
311    ///
312    /// `repo` is the absolute filesystem path to the (non-bare) pod repo;
313    /// `path` is the repo-relative path written; `agent_did` is recorded as
314    /// the commit author email; `message` is the commit subject. When there
315    /// is nothing to commit the implementation returns a mark referencing the
316    /// current HEAD without erroring.
317    async fn mark_write(
318        &self,
319        repo: &Path,
320        path: &str,
321        agent_did: &str,
322        message: &str,
323    ) -> Result<GitMark, ProvenanceError>;
324
325    /// Return the current HEAD commit SHA, or `None` for an unborn branch.
326    async fn head(&self, repo: &Path) -> Result<Option<String>, ProvenanceError>;
327}
328
329/// Expensive tier. Server-side (mempool + Bitcoin TX), behind feature `mrc20`.
330///
331/// Defined here; a real implementation lands in Phase 4 (`bitcoin_tx.rs`).
332#[async_trait::async_trait(?Send)]
333pub trait BlockAnchorer: Send + Sync {
334    /// Anchor `state_hash` under `ticker` on `network`, returning the produced
335    /// [`BlockTrailAnchor`]. Implemented by
336    /// `solid-pod-rs-server::mempool::MempoolBlockAnchorer` (builds + broadcasts
337    /// a taproot MRC20 anchoring tx via `bitcoin_tx.rs`).
338    async fn anchor(
339        &self,
340        ticker: &str,
341        state_hash: &str,
342        network: &str,
343    ) -> Result<BlockTrailAnchor, ProvenanceError>;
344
345    /// Verify a previously-produced anchor against the chain / fixtures
346    /// (re-derives the taproot address from the portable proof, then confirms a
347    /// UTXO sits at it).
348    async fn verify(&self, anchor: &BlockTrailAnchor) -> Result<bool, ProvenanceError>;
349}
350
351// ---------------------------------------------------------------------------
352// Composition policy (§2.3, ADR-059 D1/D5)
353// ---------------------------------------------------------------------------
354
355/// When a [`ProvenanceLog::record`] write should incur the expensive Bitcoin
356/// block-trail anchor on top of the always-on git-mark.
357///
358/// The cheap tier (git-mark) runs for *every* policy — these variants only
359/// govern the **opt-in** anchor. Pure, `Copy`, wasm-safe: it carries no I/O.
360///
361/// | Variant | Anchor behaviour |
362/// |--------------|--------------------------------------------------------|
363/// | [`Never`](AnchorPolicy::Never) | git-mark only — no on-chain cost. The default for ordinary writes. |
364/// | [`Always`](AnchorPolicy::Always) | anchor **every** write (commits the git SHA on-chain). Expensive; only for trails where every state must be externally timestamped. |
365/// | [`HighValue`](AnchorPolicy::HighValue) | anchor iff the resource is flagged anchor-worthy (its ACL carries a `ProvenanceAnchor` condition / the caller passes the high-value flag). Settlement receipts, elevation/ACSP decisions. |
366/// | [`Epoch`](AnchorPolicy::Epoch) | accumulate the git SHA into an [`EpochAccumulator`]; the batch root is anchored **once** on epoch close (one Bitcoin tx notarises many commits — ADR-059 D5). |
367///
368/// An anchor is attempted only when the policy says so **and** the
369/// [`ProvenanceLog`] was built with an anchorer ([`ProvenanceLog::anchorer`]
370/// is `Some`). With `anchorer: None` (the wasm / no-Bitcoin pod) every policy
371/// degrades to git-mark-only, silently.
372#[derive(Debug, Clone, Copy, PartialEq, Eq, Default, Serialize, Deserialize)]
373pub enum AnchorPolicy {
374    /// git-mark only; never anchor. The default for ordinary pod writes.
375    #[default]
376    Never,
377    /// Anchor every write — the git commit SHA is committed on-chain each time.
378    Always,
379    /// Anchor only when the resource is flagged high-value (ACL carries a
380    /// `ProvenanceAnchor` condition). Otherwise git-mark only.
381    HighValue,
382    /// Accumulate the commit into the current epoch; the epoch's Merkle root is
383    /// anchored once on close (amortised on-chain cost — ADR-059 D5).
384    Epoch,
385}
386
387impl AnchorPolicy {
388    /// Whether *this write* should be anchored inline (i.e. produce a
389    /// [`BlockTrailAnchor`] on the returned mark), given whether the resource
390    /// is flagged high-value.
391    ///
392    /// - `Never` / `Epoch` ⇒ never inline (`Epoch` defers to the accumulator).
393    /// - `Always` ⇒ always inline.
394    /// - `HighValue` ⇒ inline iff `high_value`.
395    #[must_use]
396    pub fn anchors_inline(self, high_value: bool) -> bool {
397        match self {
398            AnchorPolicy::Never | AnchorPolicy::Epoch => false,
399            AnchorPolicy::Always => true,
400            AnchorPolicy::HighValue => high_value,
401        }
402    }
403}
404
405// ---------------------------------------------------------------------------
406// Composition log (§2.2 `ProvenanceLog`, §2.3 composition rule)
407// ---------------------------------------------------------------------------
408
409/// The composition point for the two provenance tiers (master-plan §2.2/§2.3,
410/// ADR-059 D1).
411///
412/// A `ProvenanceLog` always holds the cheap-tier [`GitMarker`] and *optionally*
413/// the expensive-tier [`BlockAnchorer`]. [`record`](ProvenanceLog::record)
414/// implements the **cheap-always, expensive-opt-in** rule:
415///
416/// 1. **Always** `marker.mark_write()` → [`GitMark`] (every write becomes a
417///    commit; we capture the SHA).
418/// 2. **Conditionally** `anchorer.anchor()` when the [`AnchorPolicy`] says this
419///    write anchors inline AND an anchorer is present. The anchor's
420///    `state_hash` is set to the git commit SHA — so the Bitcoin UTXO commits
421///    to the git history, **binding the two tiers into one chain** (§2.3).
422///
423/// The returned [`ProvenanceMark`] carries the git-mark always and the anchor
424/// when one was produced. Persisting the PROV-O sidecar and emitting the
425/// `Updates-via` notification (step 3) is the server's job — kept out of this
426/// pure surface so it compiles for wasm.
427///
428/// ## wasm32 safety
429///
430/// `Arc<dyn GitMarker>` / `Arc<dyn BlockAnchorer>` are `?Send` trait objects;
431/// the type holds no runtime. On wasm the pod constructs it with a no-op marker
432/// and `anchorer: None`, so `record` is git-mark-only and never reaches any
433/// Bitcoin I/O.
434#[derive(Clone)]
435pub struct ProvenanceLog {
436    /// Cheap tier — always invoked. The native server injects
437    /// `solid-pod-rs-git`'s `ShellGitMarker`; wasm injects a no-op.
438    pub marker: Arc<dyn GitMarker>,
439    /// Expensive tier — optional. `None` in pods that do not pay for Bitcoin
440    /// anchoring (and always `None` on wasm).
441    pub anchorer: Option<Arc<dyn BlockAnchorer>>,
442}
443
444impl std::fmt::Debug for ProvenanceLog {
445    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
446        f.debug_struct("ProvenanceLog")
447            .field("marker", &"Arc<dyn GitMarker>")
448            .field("anchorer", &self.anchorer.as_ref().map(|_| "Arc<dyn BlockAnchorer>"))
449            .finish()
450    }
451}
452
453/// Descriptor of a single pod write passed to [`ProvenanceLog::record`].
454///
455/// Borrowed (no allocation on the hot path), mirroring
456/// [`crate::wac::conditions::RequestContext`]. Bundles the write identity
457/// (repo/path/agent/message), the expensive-tier [`AnchorPolicy`] + its
458/// `high_value` flag, and the trail coordinates (`ticker`/`network`) an anchor
459/// targets. The trail fields are ignored unless the policy actually anchors.
460#[derive(Debug, Clone, Copy)]
461pub struct WriteRecord<'a> {
462    /// Absolute filesystem path to the (non-bare) pod repo.
463    pub repo: &'a Path,
464    /// Repo-relative path of the resource written.
465    pub path: &'a str,
466    /// `did:nostr` of the writer (NIP-98 principal), or an anonymous marker.
467    pub agent_did: &'a str,
468    /// Commit subject (the LDP method + path).
469    pub message: &'a str,
470    /// Expensive-tier policy (see [`AnchorPolicy`]).
471    pub policy: AnchorPolicy,
472    /// Whether the resource is flagged high-value (ACL `ProvenanceAnchor`).
473    pub high_value: bool,
474    /// Trail ticker to anchor against (used only when anchoring).
475    pub ticker: &'a str,
476    /// Bitcoin network of the trail (used only when anchoring).
477    pub network: &'a str,
478    /// Unix seconds stamped onto the produced mark.
479    pub created: u64,
480}
481
482impl ProvenanceLog {
483    /// Construct a git-mark-only log (no Bitcoin tier). The common case for
484    /// ordinary pods and the only shape available on wasm.
485    #[must_use]
486    pub fn new(marker: Arc<dyn GitMarker>) -> Self {
487        Self { marker, anchorer: None }
488    }
489
490    /// Construct a log with both tiers wired.
491    #[must_use]
492    pub fn with_anchorer(marker: Arc<dyn GitMarker>, anchorer: Arc<dyn BlockAnchorer>) -> Self {
493        Self {
494            marker,
495            anchorer: Some(anchorer),
496        }
497    }
498
499    /// Record a pod resource write across both tiers (the composition rule).
500    ///
501    /// The write is described by a [`WriteRecord`]. Always commits (cheap tier).
502    /// Then, iff `policy.anchors_inline(high_value)` — both carried by the
503    /// [`WriteRecord`] — AND an anchorer is present, anchors the **git commit
504    /// SHA** under the record's `ticker`/`network`, attaching the
505    /// [`BlockTrailAnchor`] to the returned mark. The anchor's `state_hash` is
506    /// the commit SHA, binding git ↔ Bitcoin (master-plan §2.3).
507    ///
508    /// For [`AnchorPolicy::Epoch`] this method never anchors inline — the caller
509    /// feeds the returned `git.commit_sha` into an [`EpochAccumulator`] and
510    /// anchors the batch root on epoch close.
511    ///
512    /// Errors from the **cheap** tier propagate (the git-mark is the contract).
513    /// Errors from the **expensive** tier are returned too, so the caller can
514    /// decide its own best-effort policy — the server hook logs+swallows them
515    /// (a failed anchor must never fail the LDP write), exactly as it does for
516    /// the git-mark.
517    pub async fn record(&self, rec: WriteRecord<'_>) -> Result<ProvenanceMark, ProvenanceError> {
518        // 1. Cheap tier — ALWAYS. A failure here is a hard error: the git-mark
519        //    is the always-on contract.
520        let git = self
521            .marker
522            .mark_write(rec.repo, rec.path, rec.agent_did, rec.message)
523            .await?;
524
525        // 2. Expensive tier — opt-in. Only when the policy anchors this write
526        //    inline AND an anchorer is wired. The anchored state_hash IS the
527        //    git commit SHA — the Bitcoin UTXO now commits to the git history
528        //    (master-plan §2.3 "binds both primitives into one chain").
529        let anchor = if rec.policy.anchors_inline(rec.high_value) {
530            match &self.anchorer {
531                Some(a) => Some(a.anchor(rec.ticker, &git.commit_sha, rec.network).await?),
532                None => None,
533            }
534        } else {
535            None
536        };
537
538        Ok(ProvenanceMark {
539            resource: path_to_resource(rec.path),
540            git,
541            anchor,
542            agent_did: rec.agent_did.to_string(),
543            created: rec.created,
544        })
545    }
546}
547
548/// Normalise a repo-relative `path` into the pod-relative `resource` form a
549/// [`ProvenanceMark`] records (leading slash). Idempotent for already-absolute
550/// inputs.
551fn path_to_resource(path: &str) -> String {
552    if path.starts_with('/') {
553        path.to_string()
554    } else {
555        format!("/{path}")
556    }
557}
558
559// ---------------------------------------------------------------------------
560// Epoch Merkle-root anchoring (§2.3, ADR-059 D5) — pure, wasm-safe
561// ---------------------------------------------------------------------------
562
563/// Compute a binary SHA-256 Merkle root over `leaves` (each a 32-byte digest),
564/// duplicating the last node on an odd level (Bitcoin-style). Returns the
565/// all-zero digest for an empty input.
566///
567/// Pure and wasm-safe — uses only the always-compiled `sha2` dependency. Leaves
568/// are hashed *as given*; callers pass `sha256(commit_sha)` so the tree commits
569/// to the exact commit identifiers.
570fn merkle_root(leaves: &[[u8; 32]]) -> [u8; 32] {
571    if leaves.is_empty() {
572        return [0u8; 32];
573    }
574    let mut level: Vec<[u8; 32]> = leaves.to_vec();
575    while level.len() > 1 {
576        let mut next = Vec::with_capacity(level.len().div_ceil(2));
577        let mut i = 0;
578        while i < level.len() {
579            let left = level[i];
580            // Duplicate the last node when the level is odd.
581            let right = if i + 1 < level.len() { level[i + 1] } else { left };
582            let mut h = Sha256::new();
583            h.update(left);
584            h.update(right);
585            next.push(h.finalize().into());
586            i += 2;
587        }
588        level = next;
589    }
590    level[0]
591}
592
593/// Hash one leaf value (a git commit SHA, as text) into the Merkle leaf digest.
594fn merkle_leaf(commit_sha: &str) -> [u8; 32] {
595    Sha256::digest(commit_sha.as_bytes()).into()
596}
597
598/// A Merkle inclusion proof: the sibling digests from leaf to root, each tagged
599/// with whether the sibling sits on the **right** of the running hash at that
600/// level. Verified with [`EpochAccumulator::verify_inclusion`].
601#[derive(Debug, Clone, PartialEq, Eq, Serialize, Deserialize)]
602pub struct MerkleProof {
603    /// Hex of the leaf value's digest (`sha256(commit_sha)`).
604    pub leaf: String,
605    /// Sibling steps from the leaf upward: `(sibling_hex, sibling_is_right)`.
606    pub siblings: Vec<(String, bool)>,
607}
608
609/// Accumulates git commit SHAs into an epoch and, on close, yields the single
610/// Merkle root to anchor (ADR-059 D5 — *one Bitcoin tx notarises many
611/// commits*).
612///
613/// Writes whose [`AnchorPolicy`] is [`Epoch`](AnchorPolicy::Epoch) call
614/// [`push`](EpochAccumulator::push) with the commit SHA the git-mark produced.
615/// When the configured commit-count threshold is reached
616/// ([`is_full`](EpochAccumulator::is_full)), the caller [`close`s](EpochAccumulator::close)
617/// the epoch to obtain the root (hex) and the batched SHAs, anchors the root
618/// **once** via a [`BlockAnchorer`], and starts a fresh epoch. A per-commit
619/// [`inclusion_proof`](EpochAccumulator::inclusion_proof) lets any commit be
620/// proven against the anchored root without re-anchoring.
621///
622/// Pure and wasm-safe: the accumulator and Merkle maths carry no I/O; the
623/// single anchor call is the caller's, via the (optional) anchorer.
624#[derive(Debug, Clone)]
625pub struct EpochAccumulator {
626    /// Commit SHAs collected so far this epoch (insertion order = leaf order).
627    commits: Vec<String>,
628    /// Commit-count threshold at which the epoch is considered full. Operator
629    /// policy (master-plan §5: "ACL writes epoch-only to bound cost").
630    threshold: usize,
631}
632
633/// The sealed result of closing an epoch: the Merkle root to anchor plus the
634/// batch of commit SHAs it commits to.
635#[derive(Debug, Clone, PartialEq, Eq)]
636pub struct ClosedEpoch {
637    /// Hex SHA-256 Merkle root over the epoch's commit-SHA leaves — the single
638    /// value anchored on-chain for the whole batch.
639    pub root: String,
640    /// The commit SHAs this root notarises (leaf order).
641    pub commits: Vec<String>,
642}
643
644impl EpochAccumulator {
645    /// New, empty epoch with a close `threshold` (clamped to ≥ 1).
646    #[must_use]
647    pub fn new(threshold: usize) -> Self {
648        Self {
649            commits: Vec::new(),
650            threshold: threshold.max(1),
651        }
652    }
653
654    /// Add a git commit SHA to the current epoch.
655    pub fn push(&mut self, commit_sha: impl Into<String>) {
656        self.commits.push(commit_sha.into());
657    }
658
659    /// Number of commits accumulated this epoch.
660    #[must_use]
661    pub fn len(&self) -> usize {
662        self.commits.len()
663    }
664
665    /// Whether the epoch holds no commits.
666    #[must_use]
667    pub fn is_empty(&self) -> bool {
668        self.commits.is_empty()
669    }
670
671    /// The configured close threshold.
672    #[must_use]
673    pub fn threshold(&self) -> usize {
674        self.threshold
675    }
676
677    /// Whether the epoch has reached its close threshold (time to anchor).
678    #[must_use]
679    pub fn is_full(&self) -> bool {
680        self.commits.len() >= self.threshold
681    }
682
683    /// The current Merkle root (hex) over the accumulated commits, without
684    /// draining. Returns `None` for an empty epoch (nothing to anchor).
685    #[must_use]
686    pub fn root(&self) -> Option<String> {
687        if self.commits.is_empty() {
688            return None;
689        }
690        let leaves: Vec<[u8; 32]> = self.commits.iter().map(|c| merkle_leaf(c)).collect();
691        Some(hex::encode(merkle_root(&leaves)))
692    }
693
694    /// Seal the epoch: compute the root, return it with the batched commit SHAs,
695    /// and **drain** the accumulator so a fresh epoch begins. Returns `None`
696    /// (and drains nothing) for an empty epoch.
697    pub fn close(&mut self) -> Option<ClosedEpoch> {
698        if self.commits.is_empty() {
699            return None;
700        }
701        let commits = std::mem::take(&mut self.commits);
702        let leaves: Vec<[u8; 32]> = commits.iter().map(|c| merkle_leaf(c)).collect();
703        let root = hex::encode(merkle_root(&leaves));
704        Some(ClosedEpoch { root, commits })
705    }
706
707    /// Produce an inclusion proof for the commit at leaf `index` against the
708    /// *current* set of accumulated commits. `None` if `index` is out of range.
709    ///
710    /// The proof verifies against the root produced by [`root`](Self::root) /
711    /// [`close`](Self::close) over the same commit set — i.e. against the value
712    /// anchored on-chain.
713    #[must_use]
714    pub fn inclusion_proof(&self, index: usize) -> Option<MerkleProof> {
715        let n = self.commits.len();
716        if index >= n {
717            return None;
718        }
719        let mut level: Vec<[u8; 32]> = self.commits.iter().map(|c| merkle_leaf(c)).collect();
720        let leaf_hex = hex::encode(level[index]);
721        let mut idx = index;
722        let mut siblings: Vec<(String, bool)> = Vec::new();
723        while level.len() > 1 {
724            let sibling_idx = if idx % 2 == 0 { idx + 1 } else { idx - 1 };
725            // On an odd level the rightmost node is paired with itself.
726            let sib = if sibling_idx < level.len() {
727                level[sibling_idx]
728            } else {
729                level[idx]
730            };
731            let sibling_is_right = idx % 2 == 0;
732            siblings.push((hex::encode(sib), sibling_is_right));
733
734            // Build the next level.
735            let mut next = Vec::with_capacity(level.len().div_ceil(2));
736            let mut i = 0;
737            while i < level.len() {
738                let left = level[i];
739                let right = if i + 1 < level.len() { level[i + 1] } else { left };
740                let mut h = Sha256::new();
741                h.update(left);
742                h.update(right);
743                next.push(h.finalize().into());
744                i += 2;
745            }
746            level = next;
747            idx /= 2;
748        }
749        Some(MerkleProof {
750            leaf: leaf_hex,
751            siblings,
752        })
753    }
754
755    /// Verify a [`MerkleProof`] against an expected `root_hex` (the anchored
756    /// root). Recomputes the path and compares — no accumulator state needed,
757    /// so a verifier can check inclusion with only the proof + the on-chain
758    /// root.
759    #[must_use]
760    pub fn verify_inclusion(proof: &MerkleProof, root_hex: &str) -> bool {
761        let Ok(mut acc) = hex::decode(&proof.leaf) else {
762            return false;
763        };
764        if acc.len() != 32 {
765            return false;
766        }
767        for (sib_hex, sib_is_right) in &proof.siblings {
768            let Ok(sib) = hex::decode(sib_hex) else {
769                return false;
770            };
771            if sib.len() != 32 {
772                return false;
773            }
774            let mut h = Sha256::new();
775            if *sib_is_right {
776                h.update(&acc);
777                h.update(&sib);
778            } else {
779                h.update(&sib);
780                h.update(&acc);
781            }
782            acc = h.finalize().to_vec();
783        }
784        hex::encode(acc) == root_hex
785    }
786}
787
788// ---------------------------------------------------------------------------
789// PROV-O serialiser (§2.3 step 3, D7)
790// ---------------------------------------------------------------------------
791
792/// Escape a string for inclusion inside a Turtle double-quoted literal
793/// (RDF 1.1 Turtle §2.5.3 / §6.4 string escapes).
794fn ttl_escape(s: &str) -> String {
795    let mut out = String::with_capacity(s.len());
796    for c in s.chars() {
797        match c {
798            '\\' => out.push_str("\\\\"),
799            '"' => out.push_str("\\\""),
800            '\n' => out.push_str("\\n"),
801            '\r' => out.push_str("\\r"),
802            '\t' => out.push_str("\\t"),
803            _ => out.push(c),
804        }
805    }
806    out
807}
808
809/// Render `secs` (Unix seconds) as an `xsd:dateTime` literal in UTC.
810///
811/// Pure, allocation-light, and wasm-safe — avoids dragging `chrono`'s
812/// formatting into the pure surface (the crate already depends on `chrono`
813/// but we keep this self-contained and deterministic for the golden test).
814fn xsd_datetime(secs: u64) -> String {
815    // Civil-from-days (Howard Hinnant's algorithm) — exact, no leap tables.
816    let days = (secs / 86_400) as i64;
817    let rem = (secs % 86_400) as i64;
818    let (hh, mm, ss) = (rem / 3600, (rem % 3600) / 60, rem % 60);
819
820    let z = days + 719_468;
821    let era = if z >= 0 { z } else { z - 146_096 } / 146_097;
822    let doe = z - era * 146_097;
823    let yoe = (doe - doe / 1460 + doe / 36_524 - doe / 146_096) / 365;
824    let y = yoe + era * 400;
825    let doy = doe - (365 * yoe + yoe / 4 - yoe / 100);
826    let mp = (5 * doy + 2) / 153;
827    let d = doy - (153 * mp + 2) / 5 + 1;
828    let m = if mp < 10 { mp + 3 } else { mp - 9 };
829    let y = if m <= 2 { y + 1 } else { y };
830
831    format!("{y:04}-{m:02}-{d:02}T{hh:02}:{mm:02}:{ss:02}Z")
832}
833
834/// Produce a minimal, correct PROV-O Turtle sidecar for a [`ProvenanceMark`].
835///
836/// The mark is modelled as a `prov:Activity` (the write) that
837/// `prov:generated` the resource entity, was performed by the agent
838/// (`prov:wasAssociatedWith`), and is identified by its git commit SHA. The
839/// resource entity records `prov:wasGeneratedBy` the activity. When a
840/// block-trail anchor is present it is emitted as an associated entity bearing
841/// the txid/state-hash so the sidecar carries both tiers.
842///
843/// Kept deliberately small: stable prefix block, one activity, one entity, one
844/// agent, optional anchor entity. Round-trip-safe with the unit tests below.
845pub fn prov_ttl(mark: &ProvenanceMark) -> String {
846    let sha = &mark.git.commit_sha;
847    let resource = ttl_escape(&mark.resource);
848    let agent = ttl_escape(&mark.agent_did);
849    let branch = ttl_escape(&mark.git.branch);
850    let repo = ttl_escape(&mark.git.repo);
851    let when = xsd_datetime(mark.created);
852
853    let mut ttl = String::new();
854    ttl.push_str("@prefix prov: <http://www.w3.org/ns/prov#> .\n");
855    ttl.push_str("@prefix xsd:  <http://www.w3.org/2001/XMLSchema#> .\n");
856    ttl.push_str("@prefix git:  <https://w3id.org/git#> .\n");
857    ttl.push_str("@prefix bt:   <https://blocktrails.org/ns#> .\n\n");
858
859    // Activity: the write, identified by the commit it produced.
860    ttl.push_str(&format!("<urn:git:commit:{sha}> a prov:Activity ;\n"));
861    ttl.push_str(&format!("    prov:generated <{resource}> ;\n"));
862    ttl.push_str(&format!("    prov:wasAssociatedWith <{agent}> ;\n"));
863    ttl.push_str(&format!("    prov:endedAtTime \"{when}\"^^xsd:dateTime ;\n"));
864    ttl.push_str(&format!("    git:commit \"{sha}\" ;\n"));
865    ttl.push_str(&format!("    git:branch \"{branch}\" ;\n"));
866    ttl.push_str(&format!("    git:repo \"{repo}\" "));
867    if let Some(parent) = &mark.git.parent {
868        let parent = ttl_escape(parent);
869        ttl.push_str(&format!(";\n    git:parent \"{parent}\" .\n"));
870    } else {
871        ttl.push_str(".\n");
872    }
873
874    // Entity: the generated resource.
875    ttl.push('\n');
876    ttl.push_str(&format!("<{resource}> a prov:Entity ;\n"));
877    ttl.push_str(&format!(
878        "    prov:wasGeneratedBy <urn:git:commit:{sha}> ;\n"
879    ));
880    ttl.push_str(&format!(
881        "    prov:wasAttributedTo <{agent}> .\n"
882    ));
883
884    // Agent.
885    ttl.push('\n');
886    ttl.push_str(&format!("<{agent}> a prov:Agent .\n"));
887
888    // Optional anchor entity (expensive tier).
889    if let Some(a) = &mark.anchor {
890        let txid = ttl_escape(&a.txid);
891        let ticker = ttl_escape(&a.ticker);
892        let state_hash = ttl_escape(&a.state_hash);
893        let network = ttl_escape(&a.network);
894        ttl.push('\n');
895        ttl.push_str(&format!("<urn:bt:tx:{txid}:{}> a prov:Entity ;\n", a.vout));
896        ttl.push_str(&format!(
897            "    prov:wasDerivedFrom <urn:git:commit:{sha}> ;\n"
898        ));
899        ttl.push_str(&format!("    bt:ticker \"{ticker}\" ;\n"));
900        ttl.push_str(&format!("    bt:stateHash \"{state_hash}\" ;\n"));
901        ttl.push_str(&format!("    bt:network \"{network}\" ;\n"));
902        ttl.push_str(&format!("    bt:txid \"{txid}\" ;\n"));
903        ttl.push_str(&format!("    bt:vout \"{}\"^^xsd:integer ", a.vout));
904        if let Some(h) = a.blockheight {
905            ttl.push_str(&format!(";\n    bt:blockheight \"{h}\"^^xsd:integer .\n"));
906        } else {
907            ttl.push_str(".\n");
908        }
909    }
910
911    ttl
912}
913
914// ---------------------------------------------------------------------------
915// Tests
916// ---------------------------------------------------------------------------
917
918#[cfg(test)]
919mod tests {
920    use super::*;
921
922    fn sample_git() -> GitMark {
923        GitMark {
924            commit_sha: "a1b2c3d4e5f60718293a4b5c6d7e8f9001122334".into(),
925            repo: "deadbeef".into(),
926            branch: "main".into(),
927            parent: Some("00112233445566778899aabbccddeeff00112233".into()),
928        }
929    }
930
931    fn sample_mark() -> ProvenanceMark {
932        ProvenanceMark {
933            resource: "/notes/hello.ttl".into(),
934            git: sample_git(),
935            anchor: None,
936            agent_did: "did:nostr:abcdef".into(),
937            created: 1_750_000_000,
938        }
939    }
940
941    #[test]
942    fn git_mark_round_trips() {
943        let g = sample_git();
944        let json = serde_json::to_string(&g).unwrap();
945        let back: GitMark = serde_json::from_str(&json).unwrap();
946        assert_eq!(g, back);
947    }
948
949    // ── ADR-124: gitmark.json / blocktrails.json substrate envelopes ──────
950
951    #[test]
952    fn gitmark_envelope_has_exactly_five_keys() {
953        // C7 + CI invariant #6: gitmark.json is EXACTLY {@id, genesis, nick,
954        // package, repository}. No @context/@type/commit/parent.
955        let g = sample_git();
956        let env = g.to_gitmark_envelope(0, &g.commit_sha, "gitmark", "./package.json");
957        let v: serde_json::Value = serde_json::to_value(&env).unwrap();
958        let obj = v.as_object().unwrap();
959        let mut keys: Vec<&str> = obj.keys().map(String::as_str).collect();
960        keys.sort_unstable();
961        assert_eq!(
962            keys,
963            ["@id", "genesis", "nick", "package", "repository"],
964            "gitmark.json must be exactly the 5-key envelope (C7)"
965        );
966        // The four invented keys must be absent.
967        for forbidden in ["@context", "@type", "commit", "parent"] {
968            assert!(
969                !obj.contains_key(forbidden),
970                "gitmark.json must NOT carry `{forbidden}` (not in ground truth)"
971            );
972        }
973    }
974
975    #[test]
976    fn gitmark_envelope_projection_values() {
977        let g = sample_git();
978        // Genesis mark: genesis == @id.
979        let env = g.to_gitmark_envelope(0, &g.commit_sha, "pod", "./package.json");
980        assert_eq!(env.id, format!("gitmark:{}:0", g.commit_sha));
981        assert_eq!(env.genesis, format!("gitmark:{}:0", g.commit_sha));
982        assert_eq!(env.repository, "./");
983        // Non-genesis mark with a distinct genesis SHA + vout.
984        let env2 = g.to_gitmark_envelope(2, "00".repeat(20).as_str(), "pod", "./package.json");
985        assert_eq!(env2.id, format!("gitmark:{}:2", g.commit_sha));
986        assert_eq!(env2.genesis, format!("gitmark:{}:0", "00".repeat(20)));
987        assert_ne!(env2.id, env2.genesis);
988    }
989
990    #[test]
991    fn gitmark_json_matches_carvalho_shape() {
992        // Mirrors microfed/gitmark.json key set + ordering-agnostic shape.
993        let g = sample_git();
994        let json = g.to_gitmark_json(0, &g.commit_sha, "gitmark", "./package.json").unwrap();
995        let v: serde_json::Value = serde_json::from_str(&json).unwrap();
996        assert_eq!(v["@id"], format!("gitmark:{}:0", g.commit_sha));
997        assert_eq!(v["nick"], "gitmark");
998        assert_eq!(v["package"], "./package.json");
999        assert_eq!(v["repository"], "./");
1000    }
1001
1002    #[test]
1003    fn blocktrail_envelope_shape() {
1004        // C6 webcontracts reference shape: @type Blocktrail, profile gitmark,
1005        // states[] = commit SHAs, txo[] = UTXO chain.
1006        let g = sample_git();
1007        let txo = vec![BlocktrailTxo {
1008            outpoint: format!("{}:0", "ab".repeat(32)),
1009            blockheight: Some(840_000),
1010        }];
1011        let bt = BlocktrailEnvelope::new_gitmark_profile(
1012            &g.commit_sha,
1013            vec![g.commit_sha.clone()],
1014            txo,
1015        );
1016        assert_eq!(bt.type_, "Blocktrail");
1017        assert_eq!(bt.profile, "gitmark");
1018        assert_eq!(bt.id, format!("gitmark:{}:0", g.commit_sha));
1019        assert_eq!(bt.genesis, bt.id);
1020        assert_eq!(bt.states, vec![g.commit_sha.clone()]);
1021        assert_eq!(bt.txo.len(), 1);
1022
1023        // JSON shape: @type / profile / states / txo present.
1024        let v: serde_json::Value = serde_json::from_str(&bt.to_blocktrails_json().unwrap()).unwrap();
1025        assert_eq!(v["@type"], "Blocktrail");
1026        assert_eq!(v["profile"], "gitmark");
1027        assert!(v["states"].is_array());
1028        assert!(v["txo"].is_array());
1029        assert_eq!(v["txo"][0]["outpoint"], format!("{}:0", "ab".repeat(32)));
1030    }
1031
1032    #[test]
1033    fn blocktrail_envelope_round_trips() {
1034        let bt = BlocktrailEnvelope::new_gitmark_profile(
1035            &"cd".repeat(20),
1036            vec!["aa".repeat(20), "bb".repeat(20)],
1037            vec![
1038                BlocktrailTxo { outpoint: "t0:0".into(), blockheight: None },
1039                BlocktrailTxo { outpoint: "t1:0".into(), blockheight: Some(1) },
1040            ],
1041        );
1042        let json = serde_json::to_string(&bt).unwrap();
1043        let back: BlocktrailEnvelope = serde_json::from_str(&json).unwrap();
1044        assert_eq!(bt, back);
1045    }
1046
1047    #[test]
1048    fn provenance_mark_round_trips_without_anchor() {
1049        let m = sample_mark();
1050        let json = serde_json::to_string(&m).unwrap();
1051        // `anchor: None` must be omitted by skip_serializing_if.
1052        assert!(!json.contains("anchor"));
1053        let back: ProvenanceMark = serde_json::from_str(&json).unwrap();
1054        assert_eq!(m, back);
1055    }
1056
1057    #[test]
1058    fn provenance_mark_round_trips_with_anchor() {
1059        let mut m = sample_mark();
1060        m.anchor = Some(BlockTrailAnchor {
1061            ticker: "PROV".into(),
1062            state_hash: "ff".repeat(32),
1063            txid: "ab".repeat(32),
1064            vout: 1,
1065            address: "tb1pexample".into(),
1066            network: "testnet4".into(),
1067            blockheight: Some(840_000),
1068            state_strings: vec!["{\"seq\":0}".into(), "{\"seq\":1}".into()],
1069            pubkey: Some("02".to_string() + &"ab".repeat(32)),
1070        });
1071        let json = serde_json::to_string(&m).unwrap();
1072        let back: ProvenanceMark = serde_json::from_str(&json).unwrap();
1073        assert_eq!(m, back);
1074    }
1075
1076    #[test]
1077    fn block_trail_anchor_defaults_state_strings() {
1078        // state_strings missing in JSON must deserialise to an empty vec.
1079        let json = r#"{
1080            "ticker":"PROV","state_hash":"00","txid":"00","vout":0,
1081            "address":"tb1p","network":"testnet4"
1082        }"#;
1083        let a: BlockTrailAnchor = serde_json::from_str(json).unwrap();
1084        assert!(a.state_strings.is_empty());
1085        assert!(a.blockheight.is_none());
1086    }
1087
1088    #[test]
1089    fn prov_ttl_contains_core_triples() {
1090        let ttl = prov_ttl(&sample_mark());
1091        assert!(ttl.contains("@prefix prov: <http://www.w3.org/ns/prov#> ."));
1092        assert!(ttl.contains("a prov:Activity"));
1093        assert!(ttl.contains("prov:wasGeneratedBy"));
1094        assert!(ttl.contains("prov:wasAssociatedWith <did:nostr:abcdef>"));
1095        assert!(ttl.contains("a prov:Agent"));
1096        // Commit sha appears as the activity id + git:commit literal.
1097        assert!(ttl.contains("<urn:git:commit:a1b2c3d4e5f60718293a4b5c6d7e8f9001122334>"));
1098        assert!(ttl.contains("git:commit \"a1b2c3d4e5f60718293a4b5c6d7e8f9001122334\""));
1099        assert!(ttl.contains("git:branch \"main\""));
1100        assert!(ttl.contains("git:parent \"00112233445566778899aabbccddeeff00112233\""));
1101        // The generated entity is the resource.
1102        assert!(ttl.contains("<urn:git:commit:a1b2c3d4e5f60718293a4b5c6d7e8f9001122334> a prov:Activity"));
1103        assert!(ttl.contains("prov:generated </notes/hello.ttl>"));
1104    }
1105
1106    #[test]
1107    fn prov_ttl_omits_parent_when_absent() {
1108        let mut m = sample_mark();
1109        m.git.parent = None;
1110        let ttl = prov_ttl(&m);
1111        assert!(!ttl.contains("git:parent"));
1112        // Must still be a well-terminated activity block.
1113        assert!(ttl.contains("git:repo \"deadbeef\" .\n"));
1114    }
1115
1116    #[test]
1117    fn prov_ttl_emits_anchor_block_when_present() {
1118        let mut m = sample_mark();
1119        m.anchor = Some(BlockTrailAnchor {
1120            ticker: "PROV".into(),
1121            state_hash: "deadbeef".into(),
1122            txid: "cafebabe".into(),
1123            vout: 2,
1124            address: "tb1pexample".into(),
1125            network: "testnet4".into(),
1126            blockheight: Some(840_000),
1127            state_strings: vec![],
1128            pubkey: None,
1129        });
1130        let ttl = prov_ttl(&m);
1131        assert!(ttl.contains("<urn:bt:tx:cafebabe:2> a prov:Entity"));
1132        assert!(ttl.contains("bt:ticker \"PROV\""));
1133        assert!(ttl.contains("bt:stateHash \"deadbeef\""));
1134        assert!(ttl.contains("bt:blockheight \"840000\"^^xsd:integer"));
1135        assert!(ttl.contains("prov:wasDerivedFrom <urn:git:commit:"));
1136    }
1137
1138    #[test]
1139    fn prov_ttl_escapes_quotes_and_backslashes() {
1140        let mut m = sample_mark();
1141        m.agent_did = "did:nostr:\"weird\\did".into();
1142        let ttl = prov_ttl(&m);
1143        // The raw quote/backslash must be escaped inside the literal.
1144        assert!(ttl.contains("did:nostr:\\\"weird\\\\did"));
1145    }
1146
1147    #[test]
1148    fn xsd_datetime_known_epoch() {
1149        // 1_750_000_000 == 2025-06-15T15:06:40Z (verified against `date -u -d @1750000000`).
1150        assert_eq!(xsd_datetime(1_750_000_000), "2025-06-15T15:06:40Z");
1151        // Unix epoch.
1152        assert_eq!(xsd_datetime(0), "1970-01-01T00:00:00Z");
1153    }
1154
1155    #[test]
1156    fn provenance_error_display() {
1157        assert_eq!(
1158            ProvenanceError::Git("boom".into()).to_string(),
1159            "git-mark: boom"
1160        );
1161        assert_eq!(
1162            ProvenanceError::InvalidPath("/x.acl".into()).to_string(),
1163            "invalid provenance path: /x.acl"
1164        );
1165    }
1166
1167    // -----------------------------------------------------------------------
1168    // Phase 5: composition (AnchorPolicy + ProvenanceLog) — pure, mocked tiers
1169    // -----------------------------------------------------------------------
1170
1171    use std::sync::atomic::{AtomicUsize, Ordering};
1172
1173    /// In-memory [`GitMarker`] — fabricates a deterministic SHA per call and
1174    /// counts invocations. No subprocess, so it compiles + runs on wasm too.
1175    #[derive(Default)]
1176    struct MockMarker {
1177        calls: AtomicUsize,
1178    }
1179    #[async_trait::async_trait(?Send)]
1180    impl GitMarker for MockMarker {
1181        async fn mark_write(
1182            &self,
1183            _repo: &Path,
1184            path: &str,
1185            _agent_did: &str,
1186            _message: &str,
1187        ) -> Result<GitMark, ProvenanceError> {
1188            let n = self.calls.fetch_add(1, Ordering::SeqCst);
1189            // 40-hex deterministic SHA derived from the call ordinal + path.
1190            let sha = hex::encode(Sha256::digest(format!("{n}:{path}").as_bytes()))[..40].to_string();
1191            Ok(GitMark {
1192                commit_sha: sha,
1193                repo: "mockpod".into(),
1194                branch: "main".into(),
1195                parent: None,
1196            })
1197        }
1198        async fn head(&self, _repo: &Path) -> Result<Option<String>, ProvenanceError> {
1199            Ok(None)
1200        }
1201    }
1202
1203    /// In-memory [`BlockAnchorer`] — records the `state_hash` it was asked to
1204    /// anchor (so a test can assert the git SHA was bound) and counts calls.
1205    #[derive(Default)]
1206    struct MockAnchorer {
1207        calls: AtomicUsize,
1208        last_state_hash: std::sync::Mutex<Option<String>>,
1209    }
1210    #[async_trait::async_trait(?Send)]
1211    impl BlockAnchorer for MockAnchorer {
1212        async fn anchor(
1213            &self,
1214            ticker: &str,
1215            state_hash: &str,
1216            network: &str,
1217        ) -> Result<BlockTrailAnchor, ProvenanceError> {
1218            self.calls.fetch_add(1, Ordering::SeqCst);
1219            *self.last_state_hash.lock().unwrap() = Some(state_hash.to_string());
1220            Ok(BlockTrailAnchor {
1221                ticker: ticker.into(),
1222                state_hash: state_hash.into(),
1223                txid: "ab".repeat(32),
1224                vout: 0,
1225                address: "tb1pmock".into(),
1226                network: network.into(),
1227                blockheight: None,
1228                state_strings: vec!["{\"seq\":0}".into()],
1229                pubkey: Some("02".to_string() + &"ab".repeat(32)),
1230            })
1231        }
1232        async fn verify(&self, _anchor: &BlockTrailAnchor) -> Result<bool, ProvenanceError> {
1233            Ok(true)
1234        }
1235    }
1236
1237    fn repo() -> &'static Path {
1238        Path::new("/tmp/mockpod")
1239    }
1240
1241    /// Build a [`WriteRecord`] for the mock tiers (PROV trail on testnet4).
1242    fn rec<'a>(
1243        path: &'a str,
1244        policy: AnchorPolicy,
1245        high_value: bool,
1246        created: u64,
1247    ) -> WriteRecord<'a> {
1248        WriteRecord {
1249            repo: repo(),
1250            path,
1251            agent_did: "did:nostr:a",
1252            message: "PUT",
1253            policy,
1254            high_value,
1255            ticker: "PROV",
1256            network: "testnet4",
1257            created,
1258        }
1259    }
1260
1261    #[test]
1262    fn anchor_policy_inline_matrix() {
1263        assert!(!AnchorPolicy::Never.anchors_inline(true));
1264        assert!(!AnchorPolicy::Never.anchors_inline(false));
1265        assert!(AnchorPolicy::Always.anchors_inline(false));
1266        assert!(AnchorPolicy::Always.anchors_inline(true));
1267        assert!(AnchorPolicy::HighValue.anchors_inline(true));
1268        assert!(!AnchorPolicy::HighValue.anchors_inline(false));
1269        // Epoch never anchors inline — it defers to the accumulator.
1270        assert!(!AnchorPolicy::Epoch.anchors_inline(true));
1271        assert_eq!(AnchorPolicy::default(), AnchorPolicy::Never);
1272    }
1273
1274    #[tokio::test]
1275    async fn record_cheap_write_is_git_mark_only() {
1276        // Never policy ⇒ git-mark only, no anchor, anchorer untouched.
1277        let marker = Arc::new(MockMarker::default());
1278        let anchorer = Arc::new(MockAnchorer::default());
1279        let log = ProvenanceLog::with_anchorer(marker.clone(), anchorer.clone());
1280        let mark = log
1281            .record(rec("notes/a.ttl", AnchorPolicy::Never, false, 1_750_000_000))
1282            .await
1283            .unwrap();
1284        assert!(mark.anchor.is_none(), "cheap write must carry no anchor");
1285        assert_eq!(mark.resource, "/notes/a.ttl");
1286        assert_eq!(marker.calls.load(Ordering::SeqCst), 1, "git-mark always runs");
1287        assert_eq!(anchorer.calls.load(Ordering::SeqCst), 0, "anchorer must NOT be called");
1288    }
1289
1290    #[tokio::test]
1291    async fn record_high_value_write_carries_git_mark_and_anchor() {
1292        // HighValue + high_value=true ⇒ BOTH tiers present, and the anchor's
1293        // state_hash IS the git commit SHA (the two tiers are bound).
1294        let marker = Arc::new(MockMarker::default());
1295        let anchorer = Arc::new(MockAnchorer::default());
1296        let log = ProvenanceLog::with_anchorer(marker.clone(), anchorer.clone());
1297        let mark = log
1298            .record(rec("receipts/r1.ttl", AnchorPolicy::HighValue, true, 1_750_000_000))
1299            .await
1300            .unwrap();
1301        let anchor = mark.anchor.expect("high-value write must carry an anchor");
1302        assert_eq!(
1303            anchor.state_hash, mark.git.commit_sha,
1304            "anchor must commit to the git SHA (binds the two tiers — §2.3)"
1305        );
1306        assert_eq!(anchorer.calls.load(Ordering::SeqCst), 1);
1307        assert_eq!(
1308            anchorer.last_state_hash.lock().unwrap().as_deref(),
1309            Some(mark.git.commit_sha.as_str())
1310        );
1311    }
1312
1313    #[tokio::test]
1314    async fn record_high_value_flag_false_is_git_only() {
1315        // HighValue policy but the resource is NOT flagged ⇒ git-mark only.
1316        let marker = Arc::new(MockMarker::default());
1317        let anchorer = Arc::new(MockAnchorer::default());
1318        let log = ProvenanceLog::with_anchorer(marker, anchorer.clone());
1319        let mark = log
1320            .record(rec("notes/x.ttl", AnchorPolicy::HighValue, false, 1))
1321            .await
1322            .unwrap();
1323        assert!(mark.anchor.is_none());
1324        assert_eq!(anchorer.calls.load(Ordering::SeqCst), 0);
1325    }
1326
1327    #[tokio::test]
1328    async fn record_always_anchors_every_write() {
1329        let marker = Arc::new(MockMarker::default());
1330        let anchorer = Arc::new(MockAnchorer::default());
1331        let log = ProvenanceLog::with_anchorer(marker, anchorer.clone());
1332        for i in 0..3 {
1333            let m = log
1334                .record(rec(&format!("s/{i}.ttl"), AnchorPolicy::Always, false, 1))
1335                .await
1336                .unwrap();
1337            assert!(m.anchor.is_some());
1338        }
1339        assert_eq!(anchorer.calls.load(Ordering::SeqCst), 3);
1340    }
1341
1342    #[tokio::test]
1343    async fn record_without_anchorer_degrades_to_git_only() {
1344        // No anchorer (the wasm / no-Bitcoin pod): even Always degrades to
1345        // git-mark only, silently.
1346        let marker = Arc::new(MockMarker::default());
1347        let log = ProvenanceLog::new(marker.clone());
1348        assert!(log.anchorer.is_none());
1349        let mark = log
1350            .record(rec("notes/a.ttl", AnchorPolicy::Always, true, 1))
1351            .await
1352            .unwrap();
1353        assert!(mark.anchor.is_none(), "no anchorer ⇒ no anchor regardless of policy");
1354        assert_eq!(marker.calls.load(Ordering::SeqCst), 1);
1355    }
1356
1357    #[tokio::test]
1358    async fn record_epoch_defers_anchoring_to_accumulator() {
1359        // Epoch policy: record() never anchors inline; the caller batches the
1360        // SHA and anchors the root once on epoch close.
1361        let marker = Arc::new(MockMarker::default());
1362        let anchorer = Arc::new(MockAnchorer::default());
1363        let log = ProvenanceLog::with_anchorer(marker, anchorer.clone());
1364
1365        let mut epoch = EpochAccumulator::new(3);
1366        let mut shas = Vec::new();
1367        for i in 0..3 {
1368            let m = log
1369                .record(rec(&format!("e/{i}.ttl"), AnchorPolicy::Epoch, true, 1))
1370                .await
1371                .unwrap();
1372            assert!(m.anchor.is_none(), "epoch writes never anchor inline");
1373            epoch.push(m.git.commit_sha.clone());
1374            shas.push(m.git.commit_sha);
1375        }
1376        // No per-write anchors happened.
1377        assert_eq!(anchorer.calls.load(Ordering::SeqCst), 0);
1378
1379        // Epoch is full → close → ONE anchor for the whole batch root.
1380        assert!(epoch.is_full());
1381        let closed = epoch.close().expect("non-empty epoch closes");
1382        assert_eq!(closed.commits, shas);
1383        let anchor = anchorer.anchor("PROV", &closed.root, "testnet4").await.unwrap();
1384        assert_eq!(anchorer.calls.load(Ordering::SeqCst), 1, "ONE anchor notarises N commits");
1385        assert_eq!(anchor.state_hash, closed.root);
1386        // Epoch drained — a fresh epoch begins.
1387        assert!(epoch.is_empty());
1388    }
1389
1390    // ── Merkle tree: root determinism + inclusion proofs ──────────────────
1391
1392    #[test]
1393    fn merkle_root_empty_and_single() {
1394        assert_eq!(merkle_root(&[]), [0u8; 32]);
1395        let leaf = merkle_leaf("deadbeef");
1396        // A single leaf is its own root.
1397        assert_eq!(merkle_root(&[leaf]), leaf);
1398    }
1399
1400    #[test]
1401    fn merkle_root_is_deterministic_and_order_sensitive() {
1402        let a = merkle_leaf("aaa");
1403        let b = merkle_leaf("bbb");
1404        let r1 = merkle_root(&[a, b]);
1405        let r2 = merkle_root(&[a, b]);
1406        assert_eq!(r1, r2, "deterministic");
1407        let swapped = merkle_root(&[b, a]);
1408        assert_ne!(r1, swapped, "leaf order changes the root");
1409    }
1410
1411    #[test]
1412    fn epoch_root_matches_close_root() {
1413        let mut e = EpochAccumulator::new(10);
1414        for i in 0..5 {
1415            e.push(format!("commit{i:040}"));
1416        }
1417        let peeked = e.root().unwrap();
1418        let closed = e.close().unwrap();
1419        assert_eq!(peeked, closed.root, "root() peek == close() root");
1420    }
1421
1422    #[test]
1423    fn epoch_inclusion_proof_verifies_for_every_leaf() {
1424        // N commits → one root → each commit's inclusion proof verifies.
1425        let n = 7; // odd, to exercise last-node duplication
1426        let mut e = EpochAccumulator::new(n);
1427        for i in 0..n {
1428            e.push(format!("c{i:039}")); // 40-char commit-like ids
1429        }
1430        let root = e.root().unwrap();
1431        for i in 0..n {
1432            let proof = e.inclusion_proof(i).expect("proof for in-range leaf");
1433            assert!(
1434                EpochAccumulator::verify_inclusion(&proof, &root),
1435                "leaf {i} must verify against the anchored root"
1436            );
1437        }
1438        // Out-of-range index → no proof.
1439        assert!(e.inclusion_proof(n).is_none());
1440    }
1441
1442    #[test]
1443    fn epoch_inclusion_proof_rejects_wrong_root_and_tampered_leaf() {
1444        let mut e = EpochAccumulator::new(4);
1445        for i in 0..4 {
1446            e.push(format!("c{i:039}"));
1447        }
1448        let root = e.root().unwrap();
1449        let mut proof = e.inclusion_proof(1).unwrap();
1450        // Wrong root → reject.
1451        assert!(!EpochAccumulator::verify_inclusion(&proof, &"00".repeat(32)));
1452        // Tampered leaf → reject against the genuine root.
1453        proof.leaf = hex::encode(merkle_leaf("forged"));
1454        assert!(!EpochAccumulator::verify_inclusion(&proof, &root));
1455    }
1456
1457    #[test]
1458    fn epoch_threshold_and_len_tracking() {
1459        let mut e = EpochAccumulator::new(2);
1460        assert_eq!(e.threshold(), 2);
1461        assert!(e.is_empty() && !e.is_full());
1462        e.push("a");
1463        assert_eq!(e.len(), 1);
1464        assert!(!e.is_full());
1465        e.push("b");
1466        assert!(e.is_full(), "reaching threshold ⇒ full");
1467        // Threshold clamps to ≥ 1.
1468        assert_eq!(EpochAccumulator::new(0).threshold(), 1);
1469    }
1470
1471    #[test]
1472    fn empty_epoch_close_is_none() {
1473        let mut e = EpochAccumulator::new(3);
1474        assert!(e.close().is_none());
1475        assert!(e.root().is_none());
1476    }
1477
1478    #[test]
1479    fn merkle_proof_round_trips() {
1480        let p = MerkleProof {
1481            leaf: hex::encode(merkle_leaf("x")),
1482            siblings: vec![("ab".repeat(32), true), ("cd".repeat(32), false)],
1483        };
1484        let json = serde_json::to_string(&p).unwrap();
1485        let back: MerkleProof = serde_json::from_str(&json).unwrap();
1486        assert_eq!(p, back);
1487    }
1488
1489    #[test]
1490    fn anchor_policy_round_trips() {
1491        for p in [AnchorPolicy::Never, AnchorPolicy::Always, AnchorPolicy::HighValue, AnchorPolicy::Epoch] {
1492            let json = serde_json::to_string(&p).unwrap();
1493            let back: AnchorPolicy = serde_json::from_str(&json).unwrap();
1494            assert_eq!(p, back);
1495        }
1496    }
1497}