solid_pod_rs/provenance.rs
1//! Provenance primitives — composable, cost-tiered traceability for pod writes.
2//!
3//! Implements the data model and traits from
4//! [`docs/design/provenance-upgrade-master-plan.md`](../../docs/design/provenance-upgrade-master-plan.md)
5//! §2 and [ADR-059](../../docs/adr/ADR-059-provenance-primitives-block-trails-git-marks.md)
6//! (D1, D2, D4, D6). Two tiers compose into one chain:
7//!
8//! - **git-mark** (cheap, always-on): every pod write becomes a git commit;
9//! the commit SHA is captured as a [`GitMark`]. Content-addressed,
10//! append-only, tamper-evident ordering for free. The native implementation
11//! of [`GitMarker`] lives in `solid-pod-rs-git::mark` (it shells to `git`);
12//! wasm consumers compile against a no-op marker.
13//! - **block-trail anchor** (expensive, opt-in): a Bitcoin-anchored MRC20 state
14//! whose taproot UTXO externally timestamps a record ([`BlockTrailAnchor`]).
15//! Reserved for high-value records. The [`BlockAnchorer`] trait is defined
16//! here; a real implementation lands in Phase 4 (`bitcoin_tx.rs` + mempool).
17//!
18//! A [`ProvenanceMark`] always carries a [`GitMark`] and *optionally* a
19//! [`BlockTrailAnchor`]. The anchor's `state_hash` commits to the git SHA (or an
20//! epoch Merkle root over many commits), binding both tiers into one chain.
21//!
22//! ## wasm32 safety
23//!
24//! Everything in this module — the types and [`prov_ttl`] — is pure logic and
25//! compiles for `wasm32-unknown-unknown`. The traits are `?Send` (matching the
26//! crate's existing [`crate::payments::PaymentStore`] pattern) so a wasm
27//! single-threaded executor can implement them. No `tokio`, no process spawning,
28//! no I/O leaks into this surface.
29
30use std::path::Path;
31use std::sync::Arc;
32
33use serde::{Deserialize, Serialize};
34use sha2::{Digest, Sha256};
35
36// ---------------------------------------------------------------------------
37// Data model (§2.1)
38// ---------------------------------------------------------------------------
39
40/// A provenance mark over a pod resource write.
41///
42/// Always carries a git commit ([`GitMark`]); optionally upgraded with a
43/// Bitcoin block-trail anchor ([`BlockTrailAnchor`]) for high-value records.
44#[derive(Debug, Clone, PartialEq, Eq, Serialize, Deserialize)]
45pub struct ProvenanceMark {
46 /// Pod-relative path of the resource the write targeted.
47 pub resource: String,
48 /// The git commit the write produced — **always present** (cheap tier).
49 pub git: GitMark,
50 /// Optional Bitcoin block-trail anchor — **opt-in** (expensive tier).
51 #[serde(default, skip_serializing_if = "Option::is_none")]
52 pub anchor: Option<BlockTrailAnchor>,
53 /// `did:nostr` of the writer (NIP-98 authenticated principal), or an
54 /// anonymous marker when the write was unauthenticated.
55 pub agent_did: String,
56 /// Unix seconds at which the mark was produced.
57 pub created: u64,
58}
59
60/// The cheap-tier git commit captured for a pod write.
61#[derive(Debug, Clone, PartialEq, Eq, Serialize, Deserialize)]
62pub struct GitMark {
63 /// Git SHA-1 of the commit the write produced.
64 pub commit_sha: String,
65 /// Pod repo slug (the pod's first path segment / pubkey).
66 pub repo: String,
67 /// Branch the commit landed on. Pinned to `"main"` by `init.rs`.
68 pub branch: String,
69 /// Prior commit SHA (the append-only chain link), or `None` for the
70 /// genesis commit of a freshly-initialised repo.
71 #[serde(default, skip_serializing_if = "Option::is_none")]
72 pub parent: Option<String>,
73}
74
75/// The on-disk `gitmark.json` envelope — the Carvalho-lineage substrate
76/// shape (ADR-124, C7), verified byte-for-byte against
77/// `microfed/gitmark.json`.
78///
79/// **Exactly five keys**: `@id`, `genesis`, `nick`, `package`, `repository`.
80/// `@context`/`@type`/`commit`/`parent` are deliberately ABSENT — they are
81/// not in the ground-truth file (parent-linkage lives in `blocktrails.json`
82/// `states[]`/`txo[]`, not here). For byte-parity with create-agent, emit
83/// only these five keys.
84///
85/// `@id` is `gitmark:<commit_sha>:<vout>`; `genesis` is
86/// `gitmark:<first-commit-sha>:0` (and equals `@id` for the first mark).
87/// This is a *projection* of the internal [`GitMark`] — the in-memory
88/// `{commit_sha, repo, branch, parent}` shape (used by the PROV-O sidecar +
89/// the composition log) is unchanged; only the on-disk substrate file
90/// adopts this shape.
91#[derive(Debug, Clone, PartialEq, Eq, Serialize, Deserialize)]
92pub struct GitMarkEnvelope {
93 /// `gitmark:<commit_sha>:<vout>` — the single-use-seal coordinate.
94 #[serde(rename = "@id")]
95 pub id: String,
96 /// `gitmark:<first-commit-sha>:0` — the trail's genesis seal.
97 pub genesis: String,
98 /// Short human name for the mark (e.g. the package/pod nickname).
99 pub nick: String,
100 /// Pod-relative package path (e.g. `./package.json`).
101 pub package: String,
102 /// Repo-relative root (e.g. `./`).
103 pub repository: String,
104}
105
106impl GitMark {
107 /// Project this internal [`GitMark`] onto the canonical 5-key
108 /// `gitmark.json` envelope (ADR-124, C7).
109 ///
110 /// - `vout` is the seal output index for this mark's `@id`
111 /// (`gitmark:<commit_sha>:<vout>`).
112 /// - `genesis_sha` is the trail's first commit SHA; pass this mark's own
113 /// `commit_sha` for the genesis mark (then `genesis == @id`).
114 /// - `nick`/`package` are the additive projection fields; `repository`
115 /// defaults to `./` (repo-relative root, matching the ground truth).
116 ///
117 /// `branch`/`parent` are intentionally NOT emitted — they are not part of
118 /// the on-disk envelope. Parent-linkage is carried by `blocktrails.json`.
119 #[must_use]
120 pub fn to_gitmark_envelope(
121 &self,
122 vout: u32,
123 genesis_sha: &str,
124 nick: impl Into<String>,
125 package: impl Into<String>,
126 ) -> GitMarkEnvelope {
127 GitMarkEnvelope {
128 id: format!("gitmark:{}:{vout}", self.commit_sha),
129 genesis: format!("gitmark:{genesis_sha}:0"),
130 nick: nick.into(),
131 package: package.into(),
132 repository: "./".to_string(),
133 }
134 }
135
136 /// Serialise this mark to the canonical `gitmark.json` text (5-key
137 /// envelope, ADR-124). Convenience over [`Self::to_gitmark_envelope`] +
138 /// `serde_json::to_string_pretty`.
139 pub fn to_gitmark_json(
140 &self,
141 vout: u32,
142 genesis_sha: &str,
143 nick: impl Into<String>,
144 package: impl Into<String>,
145 ) -> Result<String, ProvenanceError> {
146 let env = self.to_gitmark_envelope(vout, genesis_sha, nick, package);
147 serde_json::to_string_pretty(&env)
148 .map_err(|e| ProvenanceError::Store(format!("gitmark.json serialise: {e}")))
149 }
150}
151
152/// The expensive-tier Bitcoin anchor for a record.
153///
154/// Reuses the existing [`crate::mrc20`] crypto (`Mrc20State`, `bt_address`,
155/// `verify_mrc20_anchor`) — no crypto is re-implemented here. The
156/// `state_strings` carry the portable, independently-verifiable proof.
157#[derive(Debug, Clone, PartialEq, Eq, Serialize, Deserialize)]
158pub struct BlockTrailAnchor {
159 /// Trail ticker / identifier.
160 pub ticker: String,
161 /// `sha256_hex(jcs(state))` — links into the MRC20 trail and commits to
162 /// the git SHA (or an epoch Merkle root).
163 pub state_hash: String,
164 /// Bitcoin transaction id of the anchoring UTXO.
165 pub txid: String,
166 /// Output index of the anchoring UTXO.
167 pub vout: u32,
168 /// Derived P2TR address (`mrc20::bt_address`).
169 pub address: String,
170 /// `"testnet4"` | `"mainnet"` (or any network the operator configures).
171 pub network: String,
172 /// Confirmation height; `None` until the anchoring tx confirms.
173 #[serde(default, skip_serializing_if = "Option::is_none")]
174 pub blockheight: Option<u64>,
175 /// Portable, independently-verifiable proof — the serialised states.
176 #[serde(default)]
177 pub state_strings: Vec<String>,
178 /// Issuer's compressed pubkey (66-char hex). Together with
179 /// `state_strings` it re-derives the taproot `address` via
180 /// `mrc20::bt_address` — the read-side check
181 /// ([`BlockAnchorer::verify`](crate::provenance::BlockAnchorer::verify))
182 /// needs it to confirm `address` was not forged. `None` on legacy /
183 /// partially-populated anchors (verify then has nothing to re-derive
184 /// against and reports `false`).
185 #[serde(default, skip_serializing_if = "Option::is_none")]
186 pub pubkey: Option<String>,
187}
188
189/// A single UTXO step in a [`BlocktrailEnvelope`] `txo[]` chain — the
190/// BIP-341 single-use-seal coordinate (`<txid>:<vout>`) plus its
191/// confirmation status.
192#[derive(Debug, Clone, PartialEq, Eq, Serialize, Deserialize)]
193pub struct BlocktrailTxo {
194 /// `<txid>:<vout>` — the seal output this state was sealed under.
195 pub outpoint: String,
196 /// Confirmation height; `None` until the sealing tx confirms.
197 #[serde(default, skip_serializing_if = "Option::is_none")]
198 pub blockheight: Option<u64>,
199}
200
201/// The on-disk `blocktrails.json` envelope — the 4th web-contract layer
202/// (ADR-124 §2.2). The `gitmark.json` substrate anchors the reducer/state/
203/// ledger layers; `blocktrails.json` is the **trail** layer.
204///
205/// Per the reconciliation (C6): this shape is reconstructed from the
206/// `webcontracts.org` / "Melvo Predicts" reference pattern — it is NOT
207/// byte-verifiable against a fetchable create-agent artefact (the
208/// create-agent repo contains only `gitmark.json`). Only `gitmark.json` is
209/// "verbatim".
210///
211/// Shape: `@type "Blocktrail"`, `profile "gitmark"` (the anchoring
212/// substrate), a BIP-341 single-use-seal chain expressed as `states[]`
213/// (the commit SHAs — the ledger states, in order) paired with `txo[]` (the
214/// UTXO chain that seals them). `genesis` ties back to the `gitmark.json`
215/// genesis seal.
216#[derive(Debug, Clone, PartialEq, Eq, Serialize, Deserialize)]
217pub struct BlocktrailEnvelope {
218 /// `gitmark:<first-commit-sha>:0` — the genesis seal (shared with the
219 /// `gitmark.json` `genesis`).
220 #[serde(rename = "@id")]
221 pub id: String,
222 /// Always `"Blocktrail"`.
223 #[serde(rename = "@type")]
224 pub type_: String,
225 /// Anchoring substrate profile — always `"gitmark"` here.
226 pub profile: String,
227 /// `gitmark:<first-commit-sha>:0`.
228 pub genesis: String,
229 /// The ledger states in order — commit SHAs the trail notarises.
230 pub states: Vec<String>,
231 /// The BIP-341 single-use-seal UTXO chain sealing `states[]` (1:1 order).
232 pub txo: Vec<BlocktrailTxo>,
233}
234
235impl BlocktrailEnvelope {
236 /// Build a `Blocktrail` over an ordered set of commit SHAs (`states`)
237 /// and their sealing UTXO chain (`txo`), profiled on the `gitmark`
238 /// substrate (ADR-124 §2.2, C6 reference shape).
239 ///
240 /// `genesis_sha` is the trail's first commit SHA; the envelope's `@id`
241 /// and `genesis` are both `gitmark:<genesis_sha>:0`.
242 #[must_use]
243 pub fn new_gitmark_profile(
244 genesis_sha: &str,
245 states: Vec<String>,
246 txo: Vec<BlocktrailTxo>,
247 ) -> Self {
248 let genesis = format!("gitmark:{genesis_sha}:0");
249 Self {
250 id: genesis.clone(),
251 type_: "Blocktrail".to_string(),
252 profile: "gitmark".to_string(),
253 genesis,
254 states,
255 txo,
256 }
257 }
258
259 /// Serialise to the canonical `blocktrails.json` text.
260 pub fn to_blocktrails_json(&self) -> Result<String, ProvenanceError> {
261 serde_json::to_string_pretty(self)
262 .map_err(|e| ProvenanceError::Store(format!("blocktrails.json serialise: {e}")))
263 }
264}
265
266// ---------------------------------------------------------------------------
267// Errors
268// ---------------------------------------------------------------------------
269
270/// Failures surfaced by the provenance primitives.
271///
272/// Hand-rolled (no `thiserror` derive) so the type compiles on `wasm32`
273/// without pulling proc-macro evaluation into the pure surface; the variants
274/// mirror the crate's error-message style.
275#[derive(Debug, Clone, PartialEq, Eq)]
276pub enum ProvenanceError {
277 /// The underlying git operation failed (spawn, commit, rev-parse, …).
278 Git(String),
279 /// The Bitcoin anchor operation failed (mempool, tx-build, verify, …).
280 Anchor(String),
281 /// The resource path was rejected (traversal, sidecar suffix, …).
282 InvalidPath(String),
283 /// Persisting or emitting the mark failed.
284 Store(String),
285}
286
287impl std::fmt::Display for ProvenanceError {
288 fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
289 match self {
290 ProvenanceError::Git(m) => write!(f, "git-mark: {m}"),
291 ProvenanceError::Anchor(m) => write!(f, "block-anchor: {m}"),
292 ProvenanceError::InvalidPath(m) => write!(f, "invalid provenance path: {m}"),
293 ProvenanceError::Store(m) => write!(f, "provenance store: {m}"),
294 }
295 }
296}
297
298impl std::error::Error for ProvenanceError {}
299
300// ---------------------------------------------------------------------------
301// Traits (§2.2)
302// ---------------------------------------------------------------------------
303
304/// Cheap tier. Implemented by `solid-pod-rs-git` (shells to `git`).
305///
306/// `?Send` for wasm32-safety, matching the crate's [`crate::payments::PaymentStore`]
307/// pattern. The wasm `core` consumer compiles against a no-op marker.
308#[async_trait::async_trait(?Send)]
309pub trait GitMarker: Send + Sync {
310 /// Stage `path` and commit it, returning the resulting [`GitMark`].
311 ///
312 /// `repo` is the absolute filesystem path to the (non-bare) pod repo;
313 /// `path` is the repo-relative path written; `agent_did` is recorded as
314 /// the commit author email; `message` is the commit subject. When there
315 /// is nothing to commit the implementation returns a mark referencing the
316 /// current HEAD without erroring.
317 async fn mark_write(
318 &self,
319 repo: &Path,
320 path: &str,
321 agent_did: &str,
322 message: &str,
323 ) -> Result<GitMark, ProvenanceError>;
324
325 /// Return the current HEAD commit SHA, or `None` for an unborn branch.
326 async fn head(&self, repo: &Path) -> Result<Option<String>, ProvenanceError>;
327}
328
329/// Expensive tier. Server-side (mempool + Bitcoin TX), behind feature `mrc20`.
330///
331/// Defined here; a real implementation lands in Phase 4 (`bitcoin_tx.rs`).
332#[async_trait::async_trait(?Send)]
333pub trait BlockAnchorer: Send + Sync {
334 /// Anchor `state_hash` under `ticker` on `network`, returning the produced
335 /// [`BlockTrailAnchor`]. Implemented by
336 /// `solid-pod-rs-server::mempool::MempoolBlockAnchorer` (builds + broadcasts
337 /// a taproot MRC20 anchoring tx via `bitcoin_tx.rs`).
338 async fn anchor(
339 &self,
340 ticker: &str,
341 state_hash: &str,
342 network: &str,
343 ) -> Result<BlockTrailAnchor, ProvenanceError>;
344
345 /// Verify a previously-produced anchor against the chain / fixtures
346 /// (re-derives the taproot address from the portable proof, then confirms a
347 /// UTXO sits at it).
348 async fn verify(&self, anchor: &BlockTrailAnchor) -> Result<bool, ProvenanceError>;
349}
350
351// ---------------------------------------------------------------------------
352// Composition policy (§2.3, ADR-059 D1/D5)
353// ---------------------------------------------------------------------------
354
355/// When a [`ProvenanceLog::record`] write should incur the expensive Bitcoin
356/// block-trail anchor on top of the always-on git-mark.
357///
358/// The cheap tier (git-mark) runs for *every* policy — these variants only
359/// govern the **opt-in** anchor. Pure, `Copy`, wasm-safe: it carries no I/O.
360///
361/// | Variant | Anchor behaviour |
362/// |--------------|--------------------------------------------------------|
363/// | [`Never`](AnchorPolicy::Never) | git-mark only — no on-chain cost. The default for ordinary writes. |
364/// | [`Always`](AnchorPolicy::Always) | anchor **every** write (commits the git SHA on-chain). Expensive; only for trails where every state must be externally timestamped. |
365/// | [`HighValue`](AnchorPolicy::HighValue) | anchor iff the resource is flagged anchor-worthy (its ACL carries a `ProvenanceAnchor` condition / the caller passes the high-value flag). Settlement receipts, elevation/ACSP decisions. |
366/// | [`Epoch`](AnchorPolicy::Epoch) | accumulate the git SHA into an [`EpochAccumulator`]; the batch root is anchored **once** on epoch close (one Bitcoin tx notarises many commits — ADR-059 D5). |
367///
368/// An anchor is attempted only when the policy says so **and** the
369/// [`ProvenanceLog`] was built with an anchorer ([`ProvenanceLog::anchorer`]
370/// is `Some`). With `anchorer: None` (the wasm / no-Bitcoin pod) every policy
371/// degrades to git-mark-only, silently.
372#[derive(Debug, Clone, Copy, PartialEq, Eq, Default, Serialize, Deserialize)]
373pub enum AnchorPolicy {
374 /// git-mark only; never anchor. The default for ordinary pod writes.
375 #[default]
376 Never,
377 /// Anchor every write — the git commit SHA is committed on-chain each time.
378 Always,
379 /// Anchor only when the resource is flagged high-value (ACL carries a
380 /// `ProvenanceAnchor` condition). Otherwise git-mark only.
381 HighValue,
382 /// Accumulate the commit into the current epoch; the epoch's Merkle root is
383 /// anchored once on close (amortised on-chain cost — ADR-059 D5).
384 Epoch,
385}
386
387impl AnchorPolicy {
388 /// Whether *this write* should be anchored inline (i.e. produce a
389 /// [`BlockTrailAnchor`] on the returned mark), given whether the resource
390 /// is flagged high-value.
391 ///
392 /// - `Never` / `Epoch` ⇒ never inline (`Epoch` defers to the accumulator).
393 /// - `Always` ⇒ always inline.
394 /// - `HighValue` ⇒ inline iff `high_value`.
395 #[must_use]
396 pub fn anchors_inline(self, high_value: bool) -> bool {
397 match self {
398 AnchorPolicy::Never | AnchorPolicy::Epoch => false,
399 AnchorPolicy::Always => true,
400 AnchorPolicy::HighValue => high_value,
401 }
402 }
403}
404
405// ---------------------------------------------------------------------------
406// Composition log (§2.2 `ProvenanceLog`, §2.3 composition rule)
407// ---------------------------------------------------------------------------
408
409/// The composition point for the two provenance tiers (master-plan §2.2/§2.3,
410/// ADR-059 D1).
411///
412/// A `ProvenanceLog` always holds the cheap-tier [`GitMarker`] and *optionally*
413/// the expensive-tier [`BlockAnchorer`]. [`record`](ProvenanceLog::record)
414/// implements the **cheap-always, expensive-opt-in** rule:
415///
416/// 1. **Always** `marker.mark_write()` → [`GitMark`] (every write becomes a
417/// commit; we capture the SHA).
418/// 2. **Conditionally** `anchorer.anchor()` when the [`AnchorPolicy`] says this
419/// write anchors inline AND an anchorer is present. The anchor's
420/// `state_hash` is set to the git commit SHA — so the Bitcoin UTXO commits
421/// to the git history, **binding the two tiers into one chain** (§2.3).
422///
423/// The returned [`ProvenanceMark`] carries the git-mark always and the anchor
424/// when one was produced. Persisting the PROV-O sidecar and emitting the
425/// `Updates-via` notification (step 3) is the server's job — kept out of this
426/// pure surface so it compiles for wasm.
427///
428/// ## wasm32 safety
429///
430/// `Arc<dyn GitMarker>` / `Arc<dyn BlockAnchorer>` are `?Send` trait objects;
431/// the type holds no runtime. On wasm the pod constructs it with a no-op marker
432/// and `anchorer: None`, so `record` is git-mark-only and never reaches any
433/// Bitcoin I/O.
434#[derive(Clone)]
435pub struct ProvenanceLog {
436 /// Cheap tier — always invoked. The native server injects
437 /// `solid-pod-rs-git`'s `ShellGitMarker`; wasm injects a no-op.
438 pub marker: Arc<dyn GitMarker>,
439 /// Expensive tier — optional. `None` in pods that do not pay for Bitcoin
440 /// anchoring (and always `None` on wasm).
441 pub anchorer: Option<Arc<dyn BlockAnchorer>>,
442}
443
444impl std::fmt::Debug for ProvenanceLog {
445 fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
446 f.debug_struct("ProvenanceLog")
447 .field("marker", &"Arc<dyn GitMarker>")
448 .field("anchorer", &self.anchorer.as_ref().map(|_| "Arc<dyn BlockAnchorer>"))
449 .finish()
450 }
451}
452
453/// Descriptor of a single pod write passed to [`ProvenanceLog::record`].
454///
455/// Borrowed (no allocation on the hot path), mirroring
456/// [`crate::wac::conditions::RequestContext`]. Bundles the write identity
457/// (repo/path/agent/message), the expensive-tier [`AnchorPolicy`] + its
458/// `high_value` flag, and the trail coordinates (`ticker`/`network`) an anchor
459/// targets. The trail fields are ignored unless the policy actually anchors.
460#[derive(Debug, Clone, Copy)]
461pub struct WriteRecord<'a> {
462 /// Absolute filesystem path to the (non-bare) pod repo.
463 pub repo: &'a Path,
464 /// Repo-relative path of the resource written.
465 pub path: &'a str,
466 /// `did:nostr` of the writer (NIP-98 principal), or an anonymous marker.
467 pub agent_did: &'a str,
468 /// Commit subject (the LDP method + path).
469 pub message: &'a str,
470 /// Expensive-tier policy (see [`AnchorPolicy`]).
471 pub policy: AnchorPolicy,
472 /// Whether the resource is flagged high-value (ACL `ProvenanceAnchor`).
473 pub high_value: bool,
474 /// Trail ticker to anchor against (used only when anchoring).
475 pub ticker: &'a str,
476 /// Bitcoin network of the trail (used only when anchoring).
477 pub network: &'a str,
478 /// Unix seconds stamped onto the produced mark.
479 pub created: u64,
480}
481
482impl ProvenanceLog {
483 /// Construct a git-mark-only log (no Bitcoin tier). The common case for
484 /// ordinary pods and the only shape available on wasm.
485 #[must_use]
486 pub fn new(marker: Arc<dyn GitMarker>) -> Self {
487 Self { marker, anchorer: None }
488 }
489
490 /// Construct a log with both tiers wired.
491 #[must_use]
492 pub fn with_anchorer(marker: Arc<dyn GitMarker>, anchorer: Arc<dyn BlockAnchorer>) -> Self {
493 Self {
494 marker,
495 anchorer: Some(anchorer),
496 }
497 }
498
499 /// Record a pod resource write across both tiers (the composition rule).
500 ///
501 /// The write is described by a [`WriteRecord`]. Always commits (cheap tier).
502 /// Then, iff `policy.anchors_inline(high_value)` — both carried by the
503 /// [`WriteRecord`] — AND an anchorer is present, anchors the **git commit
504 /// SHA** under the record's `ticker`/`network`, attaching the
505 /// [`BlockTrailAnchor`] to the returned mark. The anchor's `state_hash` is
506 /// the commit SHA, binding git ↔ Bitcoin (master-plan §2.3).
507 ///
508 /// For [`AnchorPolicy::Epoch`] this method never anchors inline — the caller
509 /// feeds the returned `git.commit_sha` into an [`EpochAccumulator`] and
510 /// anchors the batch root on epoch close.
511 ///
512 /// Errors from the **cheap** tier propagate (the git-mark is the contract).
513 /// Errors from the **expensive** tier are returned too, so the caller can
514 /// decide its own best-effort policy — the server hook logs+swallows them
515 /// (a failed anchor must never fail the LDP write), exactly as it does for
516 /// the git-mark.
517 pub async fn record(&self, rec: WriteRecord<'_>) -> Result<ProvenanceMark, ProvenanceError> {
518 // 1. Cheap tier — ALWAYS. A failure here is a hard error: the git-mark
519 // is the always-on contract.
520 let git = self
521 .marker
522 .mark_write(rec.repo, rec.path, rec.agent_did, rec.message)
523 .await?;
524
525 // 2. Expensive tier — opt-in. Only when the policy anchors this write
526 // inline AND an anchorer is wired. The anchored state_hash IS the
527 // git commit SHA — the Bitcoin UTXO now commits to the git history
528 // (master-plan §2.3 "binds both primitives into one chain").
529 let anchor = if rec.policy.anchors_inline(rec.high_value) {
530 match &self.anchorer {
531 Some(a) => Some(a.anchor(rec.ticker, &git.commit_sha, rec.network).await?),
532 None => None,
533 }
534 } else {
535 None
536 };
537
538 Ok(ProvenanceMark {
539 resource: path_to_resource(rec.path),
540 git,
541 anchor,
542 agent_did: rec.agent_did.to_string(),
543 created: rec.created,
544 })
545 }
546}
547
548/// Normalise a repo-relative `path` into the pod-relative `resource` form a
549/// [`ProvenanceMark`] records (leading slash). Idempotent for already-absolute
550/// inputs.
551fn path_to_resource(path: &str) -> String {
552 if path.starts_with('/') {
553 path.to_string()
554 } else {
555 format!("/{path}")
556 }
557}
558
559// ---------------------------------------------------------------------------
560// Epoch Merkle-root anchoring (§2.3, ADR-059 D5) — pure, wasm-safe
561// ---------------------------------------------------------------------------
562
563/// Compute a binary SHA-256 Merkle root over `leaves` (each a 32-byte digest),
564/// duplicating the last node on an odd level (Bitcoin-style). Returns the
565/// all-zero digest for an empty input.
566///
567/// Pure and wasm-safe — uses only the always-compiled `sha2` dependency. Leaves
568/// are hashed *as given*; callers pass `sha256(commit_sha)` so the tree commits
569/// to the exact commit identifiers.
570fn merkle_root(leaves: &[[u8; 32]]) -> [u8; 32] {
571 if leaves.is_empty() {
572 return [0u8; 32];
573 }
574 let mut level: Vec<[u8; 32]> = leaves.to_vec();
575 while level.len() > 1 {
576 let mut next = Vec::with_capacity(level.len().div_ceil(2));
577 let mut i = 0;
578 while i < level.len() {
579 let left = level[i];
580 // Duplicate the last node when the level is odd.
581 let right = if i + 1 < level.len() { level[i + 1] } else { left };
582 let mut h = Sha256::new();
583 h.update(left);
584 h.update(right);
585 next.push(h.finalize().into());
586 i += 2;
587 }
588 level = next;
589 }
590 level[0]
591}
592
593/// Hash one leaf value (a git commit SHA, as text) into the Merkle leaf digest.
594fn merkle_leaf(commit_sha: &str) -> [u8; 32] {
595 Sha256::digest(commit_sha.as_bytes()).into()
596}
597
598/// A Merkle inclusion proof: the sibling digests from leaf to root, each tagged
599/// with whether the sibling sits on the **right** of the running hash at that
600/// level. Verified with [`EpochAccumulator::verify_inclusion`].
601#[derive(Debug, Clone, PartialEq, Eq, Serialize, Deserialize)]
602pub struct MerkleProof {
603 /// Hex of the leaf value's digest (`sha256(commit_sha)`).
604 pub leaf: String,
605 /// Sibling steps from the leaf upward: `(sibling_hex, sibling_is_right)`.
606 pub siblings: Vec<(String, bool)>,
607}
608
609/// Accumulates git commit SHAs into an epoch and, on close, yields the single
610/// Merkle root to anchor (ADR-059 D5 — *one Bitcoin tx notarises many
611/// commits*).
612///
613/// Writes whose [`AnchorPolicy`] is [`Epoch`](AnchorPolicy::Epoch) call
614/// [`push`](EpochAccumulator::push) with the commit SHA the git-mark produced.
615/// When the configured commit-count threshold is reached
616/// ([`is_full`](EpochAccumulator::is_full)), the caller [`close`s](EpochAccumulator::close)
617/// the epoch to obtain the root (hex) and the batched SHAs, anchors the root
618/// **once** via a [`BlockAnchorer`], and starts a fresh epoch. A per-commit
619/// [`inclusion_proof`](EpochAccumulator::inclusion_proof) lets any commit be
620/// proven against the anchored root without re-anchoring.
621///
622/// Pure and wasm-safe: the accumulator and Merkle maths carry no I/O; the
623/// single anchor call is the caller's, via the (optional) anchorer.
624#[derive(Debug, Clone)]
625pub struct EpochAccumulator {
626 /// Commit SHAs collected so far this epoch (insertion order = leaf order).
627 commits: Vec<String>,
628 /// Commit-count threshold at which the epoch is considered full. Operator
629 /// policy (master-plan §5: "ACL writes epoch-only to bound cost").
630 threshold: usize,
631}
632
633/// The sealed result of closing an epoch: the Merkle root to anchor plus the
634/// batch of commit SHAs it commits to.
635#[derive(Debug, Clone, PartialEq, Eq)]
636pub struct ClosedEpoch {
637 /// Hex SHA-256 Merkle root over the epoch's commit-SHA leaves — the single
638 /// value anchored on-chain for the whole batch.
639 pub root: String,
640 /// The commit SHAs this root notarises (leaf order).
641 pub commits: Vec<String>,
642}
643
644impl EpochAccumulator {
645 /// New, empty epoch with a close `threshold` (clamped to ≥ 1).
646 #[must_use]
647 pub fn new(threshold: usize) -> Self {
648 Self {
649 commits: Vec::new(),
650 threshold: threshold.max(1),
651 }
652 }
653
654 /// Add a git commit SHA to the current epoch.
655 pub fn push(&mut self, commit_sha: impl Into<String>) {
656 self.commits.push(commit_sha.into());
657 }
658
659 /// Number of commits accumulated this epoch.
660 #[must_use]
661 pub fn len(&self) -> usize {
662 self.commits.len()
663 }
664
665 /// Whether the epoch holds no commits.
666 #[must_use]
667 pub fn is_empty(&self) -> bool {
668 self.commits.is_empty()
669 }
670
671 /// The configured close threshold.
672 #[must_use]
673 pub fn threshold(&self) -> usize {
674 self.threshold
675 }
676
677 /// Whether the epoch has reached its close threshold (time to anchor).
678 #[must_use]
679 pub fn is_full(&self) -> bool {
680 self.commits.len() >= self.threshold
681 }
682
683 /// The current Merkle root (hex) over the accumulated commits, without
684 /// draining. Returns `None` for an empty epoch (nothing to anchor).
685 #[must_use]
686 pub fn root(&self) -> Option<String> {
687 if self.commits.is_empty() {
688 return None;
689 }
690 let leaves: Vec<[u8; 32]> = self.commits.iter().map(|c| merkle_leaf(c)).collect();
691 Some(hex::encode(merkle_root(&leaves)))
692 }
693
694 /// Seal the epoch: compute the root, return it with the batched commit SHAs,
695 /// and **drain** the accumulator so a fresh epoch begins. Returns `None`
696 /// (and drains nothing) for an empty epoch.
697 pub fn close(&mut self) -> Option<ClosedEpoch> {
698 if self.commits.is_empty() {
699 return None;
700 }
701 let commits = std::mem::take(&mut self.commits);
702 let leaves: Vec<[u8; 32]> = commits.iter().map(|c| merkle_leaf(c)).collect();
703 let root = hex::encode(merkle_root(&leaves));
704 Some(ClosedEpoch { root, commits })
705 }
706
707 /// Produce an inclusion proof for the commit at leaf `index` against the
708 /// *current* set of accumulated commits. `None` if `index` is out of range.
709 ///
710 /// The proof verifies against the root produced by [`root`](Self::root) /
711 /// [`close`](Self::close) over the same commit set — i.e. against the value
712 /// anchored on-chain.
713 #[must_use]
714 pub fn inclusion_proof(&self, index: usize) -> Option<MerkleProof> {
715 let n = self.commits.len();
716 if index >= n {
717 return None;
718 }
719 let mut level: Vec<[u8; 32]> = self.commits.iter().map(|c| merkle_leaf(c)).collect();
720 let leaf_hex = hex::encode(level[index]);
721 let mut idx = index;
722 let mut siblings: Vec<(String, bool)> = Vec::new();
723 while level.len() > 1 {
724 let sibling_idx = if idx % 2 == 0 { idx + 1 } else { idx - 1 };
725 // On an odd level the rightmost node is paired with itself.
726 let sib = if sibling_idx < level.len() {
727 level[sibling_idx]
728 } else {
729 level[idx]
730 };
731 let sibling_is_right = idx % 2 == 0;
732 siblings.push((hex::encode(sib), sibling_is_right));
733
734 // Build the next level.
735 let mut next = Vec::with_capacity(level.len().div_ceil(2));
736 let mut i = 0;
737 while i < level.len() {
738 let left = level[i];
739 let right = if i + 1 < level.len() { level[i + 1] } else { left };
740 let mut h = Sha256::new();
741 h.update(left);
742 h.update(right);
743 next.push(h.finalize().into());
744 i += 2;
745 }
746 level = next;
747 idx /= 2;
748 }
749 Some(MerkleProof {
750 leaf: leaf_hex,
751 siblings,
752 })
753 }
754
755 /// Verify a [`MerkleProof`] against an expected `root_hex` (the anchored
756 /// root). Recomputes the path and compares — no accumulator state needed,
757 /// so a verifier can check inclusion with only the proof + the on-chain
758 /// root.
759 #[must_use]
760 pub fn verify_inclusion(proof: &MerkleProof, root_hex: &str) -> bool {
761 let Ok(mut acc) = hex::decode(&proof.leaf) else {
762 return false;
763 };
764 if acc.len() != 32 {
765 return false;
766 }
767 for (sib_hex, sib_is_right) in &proof.siblings {
768 let Ok(sib) = hex::decode(sib_hex) else {
769 return false;
770 };
771 if sib.len() != 32 {
772 return false;
773 }
774 let mut h = Sha256::new();
775 if *sib_is_right {
776 h.update(&acc);
777 h.update(&sib);
778 } else {
779 h.update(&sib);
780 h.update(&acc);
781 }
782 acc = h.finalize().to_vec();
783 }
784 hex::encode(acc) == root_hex
785 }
786}
787
788// ---------------------------------------------------------------------------
789// PROV-O serialiser (§2.3 step 3, D7)
790// ---------------------------------------------------------------------------
791
792/// Escape a string for inclusion inside a Turtle double-quoted literal
793/// (RDF 1.1 Turtle §2.5.3 / §6.4 string escapes).
794fn ttl_escape(s: &str) -> String {
795 let mut out = String::with_capacity(s.len());
796 for c in s.chars() {
797 match c {
798 '\\' => out.push_str("\\\\"),
799 '"' => out.push_str("\\\""),
800 '\n' => out.push_str("\\n"),
801 '\r' => out.push_str("\\r"),
802 '\t' => out.push_str("\\t"),
803 _ => out.push(c),
804 }
805 }
806 out
807}
808
809/// Render `secs` (Unix seconds) as an `xsd:dateTime` literal in UTC.
810///
811/// Pure, allocation-light, and wasm-safe — avoids dragging `chrono`'s
812/// formatting into the pure surface (the crate already depends on `chrono`
813/// but we keep this self-contained and deterministic for the golden test).
814fn xsd_datetime(secs: u64) -> String {
815 // Civil-from-days (Howard Hinnant's algorithm) — exact, no leap tables.
816 let days = (secs / 86_400) as i64;
817 let rem = (secs % 86_400) as i64;
818 let (hh, mm, ss) = (rem / 3600, (rem % 3600) / 60, rem % 60);
819
820 let z = days + 719_468;
821 let era = if z >= 0 { z } else { z - 146_096 } / 146_097;
822 let doe = z - era * 146_097;
823 let yoe = (doe - doe / 1460 + doe / 36_524 - doe / 146_096) / 365;
824 let y = yoe + era * 400;
825 let doy = doe - (365 * yoe + yoe / 4 - yoe / 100);
826 let mp = (5 * doy + 2) / 153;
827 let d = doy - (153 * mp + 2) / 5 + 1;
828 let m = if mp < 10 { mp + 3 } else { mp - 9 };
829 let y = if m <= 2 { y + 1 } else { y };
830
831 format!("{y:04}-{m:02}-{d:02}T{hh:02}:{mm:02}:{ss:02}Z")
832}
833
834/// Produce a minimal, correct PROV-O Turtle sidecar for a [`ProvenanceMark`].
835///
836/// The mark is modelled as a `prov:Activity` (the write) that
837/// `prov:generated` the resource entity, was performed by the agent
838/// (`prov:wasAssociatedWith`), and is identified by its git commit SHA. The
839/// resource entity records `prov:wasGeneratedBy` the activity. When a
840/// block-trail anchor is present it is emitted as an associated entity bearing
841/// the txid/state-hash so the sidecar carries both tiers.
842///
843/// Kept deliberately small: stable prefix block, one activity, one entity, one
844/// agent, optional anchor entity. Round-trip-safe with the unit tests below.
845pub fn prov_ttl(mark: &ProvenanceMark) -> String {
846 let sha = &mark.git.commit_sha;
847 let resource = ttl_escape(&mark.resource);
848 let agent = ttl_escape(&mark.agent_did);
849 let branch = ttl_escape(&mark.git.branch);
850 let repo = ttl_escape(&mark.git.repo);
851 let when = xsd_datetime(mark.created);
852
853 let mut ttl = String::new();
854 ttl.push_str("@prefix prov: <http://www.w3.org/ns/prov#> .\n");
855 ttl.push_str("@prefix xsd: <http://www.w3.org/2001/XMLSchema#> .\n");
856 ttl.push_str("@prefix git: <https://w3id.org/git#> .\n");
857 ttl.push_str("@prefix bt: <https://blocktrails.org/ns#> .\n\n");
858
859 // Activity: the write, identified by the commit it produced.
860 ttl.push_str(&format!("<urn:git:commit:{sha}> a prov:Activity ;\n"));
861 ttl.push_str(&format!(" prov:generated <{resource}> ;\n"));
862 ttl.push_str(&format!(" prov:wasAssociatedWith <{agent}> ;\n"));
863 ttl.push_str(&format!(" prov:endedAtTime \"{when}\"^^xsd:dateTime ;\n"));
864 ttl.push_str(&format!(" git:commit \"{sha}\" ;\n"));
865 ttl.push_str(&format!(" git:branch \"{branch}\" ;\n"));
866 ttl.push_str(&format!(" git:repo \"{repo}\" "));
867 if let Some(parent) = &mark.git.parent {
868 let parent = ttl_escape(parent);
869 ttl.push_str(&format!(";\n git:parent \"{parent}\" .\n"));
870 } else {
871 ttl.push_str(".\n");
872 }
873
874 // Entity: the generated resource.
875 ttl.push('\n');
876 ttl.push_str(&format!("<{resource}> a prov:Entity ;\n"));
877 ttl.push_str(&format!(
878 " prov:wasGeneratedBy <urn:git:commit:{sha}> ;\n"
879 ));
880 ttl.push_str(&format!(
881 " prov:wasAttributedTo <{agent}> .\n"
882 ));
883
884 // Agent.
885 ttl.push('\n');
886 ttl.push_str(&format!("<{agent}> a prov:Agent .\n"));
887
888 // Optional anchor entity (expensive tier).
889 if let Some(a) = &mark.anchor {
890 let txid = ttl_escape(&a.txid);
891 let ticker = ttl_escape(&a.ticker);
892 let state_hash = ttl_escape(&a.state_hash);
893 let network = ttl_escape(&a.network);
894 ttl.push('\n');
895 ttl.push_str(&format!("<urn:bt:tx:{txid}:{}> a prov:Entity ;\n", a.vout));
896 ttl.push_str(&format!(
897 " prov:wasDerivedFrom <urn:git:commit:{sha}> ;\n"
898 ));
899 ttl.push_str(&format!(" bt:ticker \"{ticker}\" ;\n"));
900 ttl.push_str(&format!(" bt:stateHash \"{state_hash}\" ;\n"));
901 ttl.push_str(&format!(" bt:network \"{network}\" ;\n"));
902 ttl.push_str(&format!(" bt:txid \"{txid}\" ;\n"));
903 ttl.push_str(&format!(" bt:vout \"{}\"^^xsd:integer ", a.vout));
904 if let Some(h) = a.blockheight {
905 ttl.push_str(&format!(";\n bt:blockheight \"{h}\"^^xsd:integer .\n"));
906 } else {
907 ttl.push_str(".\n");
908 }
909 }
910
911 ttl
912}
913
914// ---------------------------------------------------------------------------
915// Tests
916// ---------------------------------------------------------------------------
917
918#[cfg(test)]
919mod tests {
920 use super::*;
921
922 fn sample_git() -> GitMark {
923 GitMark {
924 commit_sha: "a1b2c3d4e5f60718293a4b5c6d7e8f9001122334".into(),
925 repo: "deadbeef".into(),
926 branch: "main".into(),
927 parent: Some("00112233445566778899aabbccddeeff00112233".into()),
928 }
929 }
930
931 fn sample_mark() -> ProvenanceMark {
932 ProvenanceMark {
933 resource: "/notes/hello.ttl".into(),
934 git: sample_git(),
935 anchor: None,
936 agent_did: "did:nostr:abcdef".into(),
937 created: 1_750_000_000,
938 }
939 }
940
941 #[test]
942 fn git_mark_round_trips() {
943 let g = sample_git();
944 let json = serde_json::to_string(&g).unwrap();
945 let back: GitMark = serde_json::from_str(&json).unwrap();
946 assert_eq!(g, back);
947 }
948
949 // ── ADR-124: gitmark.json / blocktrails.json substrate envelopes ──────
950
951 #[test]
952 fn gitmark_envelope_has_exactly_five_keys() {
953 // C7 + CI invariant #6: gitmark.json is EXACTLY {@id, genesis, nick,
954 // package, repository}. No @context/@type/commit/parent.
955 let g = sample_git();
956 let env = g.to_gitmark_envelope(0, &g.commit_sha, "gitmark", "./package.json");
957 let v: serde_json::Value = serde_json::to_value(&env).unwrap();
958 let obj = v.as_object().unwrap();
959 let mut keys: Vec<&str> = obj.keys().map(String::as_str).collect();
960 keys.sort_unstable();
961 assert_eq!(
962 keys,
963 ["@id", "genesis", "nick", "package", "repository"],
964 "gitmark.json must be exactly the 5-key envelope (C7)"
965 );
966 // The four invented keys must be absent.
967 for forbidden in ["@context", "@type", "commit", "parent"] {
968 assert!(
969 !obj.contains_key(forbidden),
970 "gitmark.json must NOT carry `{forbidden}` (not in ground truth)"
971 );
972 }
973 }
974
975 #[test]
976 fn gitmark_envelope_projection_values() {
977 let g = sample_git();
978 // Genesis mark: genesis == @id.
979 let env = g.to_gitmark_envelope(0, &g.commit_sha, "pod", "./package.json");
980 assert_eq!(env.id, format!("gitmark:{}:0", g.commit_sha));
981 assert_eq!(env.genesis, format!("gitmark:{}:0", g.commit_sha));
982 assert_eq!(env.repository, "./");
983 // Non-genesis mark with a distinct genesis SHA + vout.
984 let env2 = g.to_gitmark_envelope(2, "00".repeat(20).as_str(), "pod", "./package.json");
985 assert_eq!(env2.id, format!("gitmark:{}:2", g.commit_sha));
986 assert_eq!(env2.genesis, format!("gitmark:{}:0", "00".repeat(20)));
987 assert_ne!(env2.id, env2.genesis);
988 }
989
990 #[test]
991 fn gitmark_json_matches_carvalho_shape() {
992 // Mirrors microfed/gitmark.json key set + ordering-agnostic shape.
993 let g = sample_git();
994 let json = g.to_gitmark_json(0, &g.commit_sha, "gitmark", "./package.json").unwrap();
995 let v: serde_json::Value = serde_json::from_str(&json).unwrap();
996 assert_eq!(v["@id"], format!("gitmark:{}:0", g.commit_sha));
997 assert_eq!(v["nick"], "gitmark");
998 assert_eq!(v["package"], "./package.json");
999 assert_eq!(v["repository"], "./");
1000 }
1001
1002 #[test]
1003 fn blocktrail_envelope_shape() {
1004 // C6 webcontracts reference shape: @type Blocktrail, profile gitmark,
1005 // states[] = commit SHAs, txo[] = UTXO chain.
1006 let g = sample_git();
1007 let txo = vec![BlocktrailTxo {
1008 outpoint: format!("{}:0", "ab".repeat(32)),
1009 blockheight: Some(840_000),
1010 }];
1011 let bt = BlocktrailEnvelope::new_gitmark_profile(
1012 &g.commit_sha,
1013 vec![g.commit_sha.clone()],
1014 txo,
1015 );
1016 assert_eq!(bt.type_, "Blocktrail");
1017 assert_eq!(bt.profile, "gitmark");
1018 assert_eq!(bt.id, format!("gitmark:{}:0", g.commit_sha));
1019 assert_eq!(bt.genesis, bt.id);
1020 assert_eq!(bt.states, vec![g.commit_sha.clone()]);
1021 assert_eq!(bt.txo.len(), 1);
1022
1023 // JSON shape: @type / profile / states / txo present.
1024 let v: serde_json::Value = serde_json::from_str(&bt.to_blocktrails_json().unwrap()).unwrap();
1025 assert_eq!(v["@type"], "Blocktrail");
1026 assert_eq!(v["profile"], "gitmark");
1027 assert!(v["states"].is_array());
1028 assert!(v["txo"].is_array());
1029 assert_eq!(v["txo"][0]["outpoint"], format!("{}:0", "ab".repeat(32)));
1030 }
1031
1032 #[test]
1033 fn blocktrail_envelope_round_trips() {
1034 let bt = BlocktrailEnvelope::new_gitmark_profile(
1035 &"cd".repeat(20),
1036 vec!["aa".repeat(20), "bb".repeat(20)],
1037 vec![
1038 BlocktrailTxo { outpoint: "t0:0".into(), blockheight: None },
1039 BlocktrailTxo { outpoint: "t1:0".into(), blockheight: Some(1) },
1040 ],
1041 );
1042 let json = serde_json::to_string(&bt).unwrap();
1043 let back: BlocktrailEnvelope = serde_json::from_str(&json).unwrap();
1044 assert_eq!(bt, back);
1045 }
1046
1047 #[test]
1048 fn provenance_mark_round_trips_without_anchor() {
1049 let m = sample_mark();
1050 let json = serde_json::to_string(&m).unwrap();
1051 // `anchor: None` must be omitted by skip_serializing_if.
1052 assert!(!json.contains("anchor"));
1053 let back: ProvenanceMark = serde_json::from_str(&json).unwrap();
1054 assert_eq!(m, back);
1055 }
1056
1057 #[test]
1058 fn provenance_mark_round_trips_with_anchor() {
1059 let mut m = sample_mark();
1060 m.anchor = Some(BlockTrailAnchor {
1061 ticker: "PROV".into(),
1062 state_hash: "ff".repeat(32),
1063 txid: "ab".repeat(32),
1064 vout: 1,
1065 address: "tb1pexample".into(),
1066 network: "testnet4".into(),
1067 blockheight: Some(840_000),
1068 state_strings: vec!["{\"seq\":0}".into(), "{\"seq\":1}".into()],
1069 pubkey: Some("02".to_string() + &"ab".repeat(32)),
1070 });
1071 let json = serde_json::to_string(&m).unwrap();
1072 let back: ProvenanceMark = serde_json::from_str(&json).unwrap();
1073 assert_eq!(m, back);
1074 }
1075
1076 #[test]
1077 fn block_trail_anchor_defaults_state_strings() {
1078 // state_strings missing in JSON must deserialise to an empty vec.
1079 let json = r#"{
1080 "ticker":"PROV","state_hash":"00","txid":"00","vout":0,
1081 "address":"tb1p","network":"testnet4"
1082 }"#;
1083 let a: BlockTrailAnchor = serde_json::from_str(json).unwrap();
1084 assert!(a.state_strings.is_empty());
1085 assert!(a.blockheight.is_none());
1086 }
1087
1088 #[test]
1089 fn prov_ttl_contains_core_triples() {
1090 let ttl = prov_ttl(&sample_mark());
1091 assert!(ttl.contains("@prefix prov: <http://www.w3.org/ns/prov#> ."));
1092 assert!(ttl.contains("a prov:Activity"));
1093 assert!(ttl.contains("prov:wasGeneratedBy"));
1094 assert!(ttl.contains("prov:wasAssociatedWith <did:nostr:abcdef>"));
1095 assert!(ttl.contains("a prov:Agent"));
1096 // Commit sha appears as the activity id + git:commit literal.
1097 assert!(ttl.contains("<urn:git:commit:a1b2c3d4e5f60718293a4b5c6d7e8f9001122334>"));
1098 assert!(ttl.contains("git:commit \"a1b2c3d4e5f60718293a4b5c6d7e8f9001122334\""));
1099 assert!(ttl.contains("git:branch \"main\""));
1100 assert!(ttl.contains("git:parent \"00112233445566778899aabbccddeeff00112233\""));
1101 // The generated entity is the resource.
1102 assert!(ttl.contains("<urn:git:commit:a1b2c3d4e5f60718293a4b5c6d7e8f9001122334> a prov:Activity"));
1103 assert!(ttl.contains("prov:generated </notes/hello.ttl>"));
1104 }
1105
1106 #[test]
1107 fn prov_ttl_omits_parent_when_absent() {
1108 let mut m = sample_mark();
1109 m.git.parent = None;
1110 let ttl = prov_ttl(&m);
1111 assert!(!ttl.contains("git:parent"));
1112 // Must still be a well-terminated activity block.
1113 assert!(ttl.contains("git:repo \"deadbeef\" .\n"));
1114 }
1115
1116 #[test]
1117 fn prov_ttl_emits_anchor_block_when_present() {
1118 let mut m = sample_mark();
1119 m.anchor = Some(BlockTrailAnchor {
1120 ticker: "PROV".into(),
1121 state_hash: "deadbeef".into(),
1122 txid: "cafebabe".into(),
1123 vout: 2,
1124 address: "tb1pexample".into(),
1125 network: "testnet4".into(),
1126 blockheight: Some(840_000),
1127 state_strings: vec![],
1128 pubkey: None,
1129 });
1130 let ttl = prov_ttl(&m);
1131 assert!(ttl.contains("<urn:bt:tx:cafebabe:2> a prov:Entity"));
1132 assert!(ttl.contains("bt:ticker \"PROV\""));
1133 assert!(ttl.contains("bt:stateHash \"deadbeef\""));
1134 assert!(ttl.contains("bt:blockheight \"840000\"^^xsd:integer"));
1135 assert!(ttl.contains("prov:wasDerivedFrom <urn:git:commit:"));
1136 }
1137
1138 #[test]
1139 fn prov_ttl_escapes_quotes_and_backslashes() {
1140 let mut m = sample_mark();
1141 m.agent_did = "did:nostr:\"weird\\did".into();
1142 let ttl = prov_ttl(&m);
1143 // The raw quote/backslash must be escaped inside the literal.
1144 assert!(ttl.contains("did:nostr:\\\"weird\\\\did"));
1145 }
1146
1147 #[test]
1148 fn xsd_datetime_known_epoch() {
1149 // 1_750_000_000 == 2025-06-15T15:06:40Z (verified against `date -u -d @1750000000`).
1150 assert_eq!(xsd_datetime(1_750_000_000), "2025-06-15T15:06:40Z");
1151 // Unix epoch.
1152 assert_eq!(xsd_datetime(0), "1970-01-01T00:00:00Z");
1153 }
1154
1155 #[test]
1156 fn provenance_error_display() {
1157 assert_eq!(
1158 ProvenanceError::Git("boom".into()).to_string(),
1159 "git-mark: boom"
1160 );
1161 assert_eq!(
1162 ProvenanceError::InvalidPath("/x.acl".into()).to_string(),
1163 "invalid provenance path: /x.acl"
1164 );
1165 }
1166
1167 // -----------------------------------------------------------------------
1168 // Phase 5: composition (AnchorPolicy + ProvenanceLog) — pure, mocked tiers
1169 // -----------------------------------------------------------------------
1170
1171 use std::sync::atomic::{AtomicUsize, Ordering};
1172
1173 /// In-memory [`GitMarker`] — fabricates a deterministic SHA per call and
1174 /// counts invocations. No subprocess, so it compiles + runs on wasm too.
1175 #[derive(Default)]
1176 struct MockMarker {
1177 calls: AtomicUsize,
1178 }
1179 #[async_trait::async_trait(?Send)]
1180 impl GitMarker for MockMarker {
1181 async fn mark_write(
1182 &self,
1183 _repo: &Path,
1184 path: &str,
1185 _agent_did: &str,
1186 _message: &str,
1187 ) -> Result<GitMark, ProvenanceError> {
1188 let n = self.calls.fetch_add(1, Ordering::SeqCst);
1189 // 40-hex deterministic SHA derived from the call ordinal + path.
1190 let sha = hex::encode(Sha256::digest(format!("{n}:{path}").as_bytes()))[..40].to_string();
1191 Ok(GitMark {
1192 commit_sha: sha,
1193 repo: "mockpod".into(),
1194 branch: "main".into(),
1195 parent: None,
1196 })
1197 }
1198 async fn head(&self, _repo: &Path) -> Result<Option<String>, ProvenanceError> {
1199 Ok(None)
1200 }
1201 }
1202
1203 /// In-memory [`BlockAnchorer`] — records the `state_hash` it was asked to
1204 /// anchor (so a test can assert the git SHA was bound) and counts calls.
1205 #[derive(Default)]
1206 struct MockAnchorer {
1207 calls: AtomicUsize,
1208 last_state_hash: std::sync::Mutex<Option<String>>,
1209 }
1210 #[async_trait::async_trait(?Send)]
1211 impl BlockAnchorer for MockAnchorer {
1212 async fn anchor(
1213 &self,
1214 ticker: &str,
1215 state_hash: &str,
1216 network: &str,
1217 ) -> Result<BlockTrailAnchor, ProvenanceError> {
1218 self.calls.fetch_add(1, Ordering::SeqCst);
1219 *self.last_state_hash.lock().unwrap() = Some(state_hash.to_string());
1220 Ok(BlockTrailAnchor {
1221 ticker: ticker.into(),
1222 state_hash: state_hash.into(),
1223 txid: "ab".repeat(32),
1224 vout: 0,
1225 address: "tb1pmock".into(),
1226 network: network.into(),
1227 blockheight: None,
1228 state_strings: vec!["{\"seq\":0}".into()],
1229 pubkey: Some("02".to_string() + &"ab".repeat(32)),
1230 })
1231 }
1232 async fn verify(&self, _anchor: &BlockTrailAnchor) -> Result<bool, ProvenanceError> {
1233 Ok(true)
1234 }
1235 }
1236
1237 fn repo() -> &'static Path {
1238 Path::new("/tmp/mockpod")
1239 }
1240
1241 /// Build a [`WriteRecord`] for the mock tiers (PROV trail on testnet4).
1242 fn rec<'a>(
1243 path: &'a str,
1244 policy: AnchorPolicy,
1245 high_value: bool,
1246 created: u64,
1247 ) -> WriteRecord<'a> {
1248 WriteRecord {
1249 repo: repo(),
1250 path,
1251 agent_did: "did:nostr:a",
1252 message: "PUT",
1253 policy,
1254 high_value,
1255 ticker: "PROV",
1256 network: "testnet4",
1257 created,
1258 }
1259 }
1260
1261 #[test]
1262 fn anchor_policy_inline_matrix() {
1263 assert!(!AnchorPolicy::Never.anchors_inline(true));
1264 assert!(!AnchorPolicy::Never.anchors_inline(false));
1265 assert!(AnchorPolicy::Always.anchors_inline(false));
1266 assert!(AnchorPolicy::Always.anchors_inline(true));
1267 assert!(AnchorPolicy::HighValue.anchors_inline(true));
1268 assert!(!AnchorPolicy::HighValue.anchors_inline(false));
1269 // Epoch never anchors inline — it defers to the accumulator.
1270 assert!(!AnchorPolicy::Epoch.anchors_inline(true));
1271 assert_eq!(AnchorPolicy::default(), AnchorPolicy::Never);
1272 }
1273
1274 #[tokio::test]
1275 async fn record_cheap_write_is_git_mark_only() {
1276 // Never policy ⇒ git-mark only, no anchor, anchorer untouched.
1277 let marker = Arc::new(MockMarker::default());
1278 let anchorer = Arc::new(MockAnchorer::default());
1279 let log = ProvenanceLog::with_anchorer(marker.clone(), anchorer.clone());
1280 let mark = log
1281 .record(rec("notes/a.ttl", AnchorPolicy::Never, false, 1_750_000_000))
1282 .await
1283 .unwrap();
1284 assert!(mark.anchor.is_none(), "cheap write must carry no anchor");
1285 assert_eq!(mark.resource, "/notes/a.ttl");
1286 assert_eq!(marker.calls.load(Ordering::SeqCst), 1, "git-mark always runs");
1287 assert_eq!(anchorer.calls.load(Ordering::SeqCst), 0, "anchorer must NOT be called");
1288 }
1289
1290 #[tokio::test]
1291 async fn record_high_value_write_carries_git_mark_and_anchor() {
1292 // HighValue + high_value=true ⇒ BOTH tiers present, and the anchor's
1293 // state_hash IS the git commit SHA (the two tiers are bound).
1294 let marker = Arc::new(MockMarker::default());
1295 let anchorer = Arc::new(MockAnchorer::default());
1296 let log = ProvenanceLog::with_anchorer(marker.clone(), anchorer.clone());
1297 let mark = log
1298 .record(rec("receipts/r1.ttl", AnchorPolicy::HighValue, true, 1_750_000_000))
1299 .await
1300 .unwrap();
1301 let anchor = mark.anchor.expect("high-value write must carry an anchor");
1302 assert_eq!(
1303 anchor.state_hash, mark.git.commit_sha,
1304 "anchor must commit to the git SHA (binds the two tiers — §2.3)"
1305 );
1306 assert_eq!(anchorer.calls.load(Ordering::SeqCst), 1);
1307 assert_eq!(
1308 anchorer.last_state_hash.lock().unwrap().as_deref(),
1309 Some(mark.git.commit_sha.as_str())
1310 );
1311 }
1312
1313 #[tokio::test]
1314 async fn record_high_value_flag_false_is_git_only() {
1315 // HighValue policy but the resource is NOT flagged ⇒ git-mark only.
1316 let marker = Arc::new(MockMarker::default());
1317 let anchorer = Arc::new(MockAnchorer::default());
1318 let log = ProvenanceLog::with_anchorer(marker, anchorer.clone());
1319 let mark = log
1320 .record(rec("notes/x.ttl", AnchorPolicy::HighValue, false, 1))
1321 .await
1322 .unwrap();
1323 assert!(mark.anchor.is_none());
1324 assert_eq!(anchorer.calls.load(Ordering::SeqCst), 0);
1325 }
1326
1327 #[tokio::test]
1328 async fn record_always_anchors_every_write() {
1329 let marker = Arc::new(MockMarker::default());
1330 let anchorer = Arc::new(MockAnchorer::default());
1331 let log = ProvenanceLog::with_anchorer(marker, anchorer.clone());
1332 for i in 0..3 {
1333 let m = log
1334 .record(rec(&format!("s/{i}.ttl"), AnchorPolicy::Always, false, 1))
1335 .await
1336 .unwrap();
1337 assert!(m.anchor.is_some());
1338 }
1339 assert_eq!(anchorer.calls.load(Ordering::SeqCst), 3);
1340 }
1341
1342 #[tokio::test]
1343 async fn record_without_anchorer_degrades_to_git_only() {
1344 // No anchorer (the wasm / no-Bitcoin pod): even Always degrades to
1345 // git-mark only, silently.
1346 let marker = Arc::new(MockMarker::default());
1347 let log = ProvenanceLog::new(marker.clone());
1348 assert!(log.anchorer.is_none());
1349 let mark = log
1350 .record(rec("notes/a.ttl", AnchorPolicy::Always, true, 1))
1351 .await
1352 .unwrap();
1353 assert!(mark.anchor.is_none(), "no anchorer ⇒ no anchor regardless of policy");
1354 assert_eq!(marker.calls.load(Ordering::SeqCst), 1);
1355 }
1356
1357 #[tokio::test]
1358 async fn record_epoch_defers_anchoring_to_accumulator() {
1359 // Epoch policy: record() never anchors inline; the caller batches the
1360 // SHA and anchors the root once on epoch close.
1361 let marker = Arc::new(MockMarker::default());
1362 let anchorer = Arc::new(MockAnchorer::default());
1363 let log = ProvenanceLog::with_anchorer(marker, anchorer.clone());
1364
1365 let mut epoch = EpochAccumulator::new(3);
1366 let mut shas = Vec::new();
1367 for i in 0..3 {
1368 let m = log
1369 .record(rec(&format!("e/{i}.ttl"), AnchorPolicy::Epoch, true, 1))
1370 .await
1371 .unwrap();
1372 assert!(m.anchor.is_none(), "epoch writes never anchor inline");
1373 epoch.push(m.git.commit_sha.clone());
1374 shas.push(m.git.commit_sha);
1375 }
1376 // No per-write anchors happened.
1377 assert_eq!(anchorer.calls.load(Ordering::SeqCst), 0);
1378
1379 // Epoch is full → close → ONE anchor for the whole batch root.
1380 assert!(epoch.is_full());
1381 let closed = epoch.close().expect("non-empty epoch closes");
1382 assert_eq!(closed.commits, shas);
1383 let anchor = anchorer.anchor("PROV", &closed.root, "testnet4").await.unwrap();
1384 assert_eq!(anchorer.calls.load(Ordering::SeqCst), 1, "ONE anchor notarises N commits");
1385 assert_eq!(anchor.state_hash, closed.root);
1386 // Epoch drained — a fresh epoch begins.
1387 assert!(epoch.is_empty());
1388 }
1389
1390 // ── Merkle tree: root determinism + inclusion proofs ──────────────────
1391
1392 #[test]
1393 fn merkle_root_empty_and_single() {
1394 assert_eq!(merkle_root(&[]), [0u8; 32]);
1395 let leaf = merkle_leaf("deadbeef");
1396 // A single leaf is its own root.
1397 assert_eq!(merkle_root(&[leaf]), leaf);
1398 }
1399
1400 #[test]
1401 fn merkle_root_is_deterministic_and_order_sensitive() {
1402 let a = merkle_leaf("aaa");
1403 let b = merkle_leaf("bbb");
1404 let r1 = merkle_root(&[a, b]);
1405 let r2 = merkle_root(&[a, b]);
1406 assert_eq!(r1, r2, "deterministic");
1407 let swapped = merkle_root(&[b, a]);
1408 assert_ne!(r1, swapped, "leaf order changes the root");
1409 }
1410
1411 #[test]
1412 fn epoch_root_matches_close_root() {
1413 let mut e = EpochAccumulator::new(10);
1414 for i in 0..5 {
1415 e.push(format!("commit{i:040}"));
1416 }
1417 let peeked = e.root().unwrap();
1418 let closed = e.close().unwrap();
1419 assert_eq!(peeked, closed.root, "root() peek == close() root");
1420 }
1421
1422 #[test]
1423 fn epoch_inclusion_proof_verifies_for_every_leaf() {
1424 // N commits → one root → each commit's inclusion proof verifies.
1425 let n = 7; // odd, to exercise last-node duplication
1426 let mut e = EpochAccumulator::new(n);
1427 for i in 0..n {
1428 e.push(format!("c{i:039}")); // 40-char commit-like ids
1429 }
1430 let root = e.root().unwrap();
1431 for i in 0..n {
1432 let proof = e.inclusion_proof(i).expect("proof for in-range leaf");
1433 assert!(
1434 EpochAccumulator::verify_inclusion(&proof, &root),
1435 "leaf {i} must verify against the anchored root"
1436 );
1437 }
1438 // Out-of-range index → no proof.
1439 assert!(e.inclusion_proof(n).is_none());
1440 }
1441
1442 #[test]
1443 fn epoch_inclusion_proof_rejects_wrong_root_and_tampered_leaf() {
1444 let mut e = EpochAccumulator::new(4);
1445 for i in 0..4 {
1446 e.push(format!("c{i:039}"));
1447 }
1448 let root = e.root().unwrap();
1449 let mut proof = e.inclusion_proof(1).unwrap();
1450 // Wrong root → reject.
1451 assert!(!EpochAccumulator::verify_inclusion(&proof, &"00".repeat(32)));
1452 // Tampered leaf → reject against the genuine root.
1453 proof.leaf = hex::encode(merkle_leaf("forged"));
1454 assert!(!EpochAccumulator::verify_inclusion(&proof, &root));
1455 }
1456
1457 #[test]
1458 fn epoch_threshold_and_len_tracking() {
1459 let mut e = EpochAccumulator::new(2);
1460 assert_eq!(e.threshold(), 2);
1461 assert!(e.is_empty() && !e.is_full());
1462 e.push("a");
1463 assert_eq!(e.len(), 1);
1464 assert!(!e.is_full());
1465 e.push("b");
1466 assert!(e.is_full(), "reaching threshold ⇒ full");
1467 // Threshold clamps to ≥ 1.
1468 assert_eq!(EpochAccumulator::new(0).threshold(), 1);
1469 }
1470
1471 #[test]
1472 fn empty_epoch_close_is_none() {
1473 let mut e = EpochAccumulator::new(3);
1474 assert!(e.close().is_none());
1475 assert!(e.root().is_none());
1476 }
1477
1478 #[test]
1479 fn merkle_proof_round_trips() {
1480 let p = MerkleProof {
1481 leaf: hex::encode(merkle_leaf("x")),
1482 siblings: vec![("ab".repeat(32), true), ("cd".repeat(32), false)],
1483 };
1484 let json = serde_json::to_string(&p).unwrap();
1485 let back: MerkleProof = serde_json::from_str(&json).unwrap();
1486 assert_eq!(p, back);
1487 }
1488
1489 #[test]
1490 fn anchor_policy_round_trips() {
1491 for p in [AnchorPolicy::Never, AnchorPolicy::Always, AnchorPolicy::HighValue, AnchorPolicy::Epoch] {
1492 let json = serde_json::to_string(&p).unwrap();
1493 let back: AnchorPolicy = serde_json::from_str(&json).unwrap();
1494 assert_eq!(p, back);
1495 }
1496 }
1497}