mkit_core/
history.rs

1//! Append-only commit-history Merkle Mountain Range (MMR).
2//!
3//! Issue #157. Light-client inclusion proofs for the commit chain: a
4//! verifier with the MMR root for a branch tip can check "commit X was
5//! leaf N on this branch" with `O(log n)` hash work, without
6//! downloading the parent chain or any pack.
7//!
8//! # Status
9//!
10//! - **Phase 1** (shipped, [`CommitHistory::open`]): `mem`-backed
11//!   in-memory MMR. Lost on process exit. Useful for tests and
12//!   short-lived computations where the proof is the only output.
13//! - **Phase 2** (this build, [`CommitHistory::open_at`]): on-disk
14//!   journaled MMR backed by
15//!   [`commonware_storage::merkle::mmr::full::Mmr`] pinned to
16//!   `=2026.5.0`. The on-disk layout is commonware's native two-store
17//!   shape — a fixed-item journal of node digests plus a metadata
18//!   sidecar for pruned pinned nodes — laid out under
19//!   `<mkit_dir>/history/<sanitized_branch>/`. See
20//!   `docs/SPEC-HISTORY-PROOF.md` §4.
21//! - **Phase 3** (planned, v0.2): `Commit.history_root` proto field,
22//!   new signing-bytes layout. Out of scope here.
23//!
24//! # Hashing
25//!
26//! The underlying primitive is `commonware-storage`'s journaled MMR
27//! parameterised over BLAKE3 (from `commonware-cryptography`). Node
28//! digests are 32-byte BLAKE3 values — same primitive mkit already
29//! uses elsewhere ([`crate::hash::Hash`]). The schedule is **not** the
30//! same as [`crate::hash::hash`]: commonware injects each node's
31//! position into the parent/leaf digest. Treat the digests as opaque
32//! beyond comparison.
33//!
34//! # Sync / async bridge
35//!
36//! commonware's journaled MMR is async over a `commonware-runtime`
37//! `Context`. The mkit-core public surface ([`CommitHistory::append`],
38//! [`CommitHistory::root`], [`CommitHistory::prove`]) is synchronous
39//! by design — it is called from the synchronous `refs::update_ref`
40//! path and from CLI helpers that have no async-runtime context.
41//!
42//! The bridge is the [`crate::protocol::async_shim::Executor`] trait
43//! hoisted in PR #167. [`CommitHistory`] is generic over an executor
44//! `X: Executor`; every sync method drives its async counterpart via
45//! `executor.block_on(...)`. The executor is held by [`Arc`] for the
46//! lifetime of the [`CommitHistory`] value so multiple `append` /
47//! `prove` calls share a single runtime.
48//!
49//! # Executor / Context ownership
50//!
51//! [`CommitHistory::open_at`] takes an [`Arc`]-shared executor from
52//! the caller. The commonware `Context` needed to drive the
53//! journaled MMR is bootstrapped *internally* via a one-shot
54//! [`commonware_runtime::tokio::Runner::start`] on a fresh OS thread
55//! (the standard workaround documented in transport-enc Phase 2):
56//! the runner returns a Context clone, the outer `Arc<Executor>` inside
57//! the Context keeps tokio's runtime alive, and the bootstrap thread
58//! joins immediately. Subsequent async ops are driven through the
59//! caller-supplied executor.
60//!
61//! This means production callers only need to construct an executor
62//! ([`crate::history::tokio_executor::TokioExecutor`] is provided when
63//! the `history-mmr` feature is on); mkit-core handles the
64//! commonware-side wiring. The trade-off: every [`CommitHistory`]
65//! owns one tokio runtime (via its executor) AND one commonware
66//! Context with its own inner tokio runtime. The two never need to
67//! interact — `tokio::fs` and `tokio::sync::Mutex` are runtime-agnostic
68//! and work whichever runtime is driving the poll.
69
70use std::path::{Path, PathBuf};
71use std::sync::Arc;
72
73use commonware_cryptography::{Blake3, Hasher as CHasher};
74use commonware_parallel::Sequential;
75use commonware_runtime::{Runner as _, Supervisor as _, buffer::paged::CacheRef};
76use commonware_storage::merkle::Bagging;
77use commonware_storage::merkle::mmr::{
78    Location as MmrLocation, Proof as MmrProof, StandardHasher,
79    full::{Config as JConfig, Mmr as JournaledMmr},
80    mem::Mmr as MemMmr,
81};
82use commonware_utils::{NZU16, NZU64, NZUsize};
83
84use crate::hash::{HASH_LEN, Hash};
85use crate::protocol::async_shim::Executor;
86use crate::refs::validate_ref_name;
87
88pub mod tokio_executor;
89
90/// Re-export of the bundled tokio-runtime executor.
91pub use tokio_executor::TokioExecutor;
92
93/// Peak-bagging policy for the commit-history MMR.
94///
95/// This is a **load-bearing cryptographic parameter**: the producer
96/// ([`CommitHistory::root`] / [`CommitHistory::prove`]) and the verifier
97/// ([`verify_inclusion`]) must fold MMR peaks into a root identically, or
98/// every inclusion proof silently fails to verify. Pinning it here — and
99/// consuming it only via [`history_hasher`] — makes that agreement
100/// un-desyncable across all call sites and both code paths.
101///
102/// `ForwardFold` is also a **back-compat** choice: it reproduces the root
103/// that the pre-2026.5 commonware default produced byte-for-byte, so MMR
104/// journals written by an older mkit build and roots already stored in the
105/// reflog stay valid across the commonware bump. Do **not** change this
106/// without an on-disk format-version bump and a migration.
107const HISTORY_BAGGING: Bagging = Bagging::ForwardFold;
108
109/// The canonical hasher for the commit-history MMR — the single source of
110/// truth for [`HISTORY_BAGGING`], so the producer and verifier sides can
111/// never drift on the bagging policy.
112fn history_hasher() -> StandardHasher<Blake3> {
113    StandardHasher::new(HISTORY_BAGGING)
114}
115
116// ---------------------------------------------------------------------
117// Public types
118// ---------------------------------------------------------------------
119
120/// 0-based index of a commit within its branch's MMR.
121///
122/// In commonware's vocabulary this is a `Location` — the leaf index in
123/// insertion order, not the MMR's internal node position. The first
124/// commit appended is `Position(0)`, the second is `Position(1)`, etc.
125/// Stable for the lifetime of the branch: positions never shift
126/// because the MMR is append-only.
127#[derive(Copy, Clone, Debug, PartialEq, Eq, PartialOrd, Ord, Hash)]
128pub struct Position(pub u64);
129
130impl Position {
131    /// Raw `u64`.
132    #[must_use]
133    pub const fn as_u64(self) -> u64 {
134        self.0
135    }
136}
137
138/// Inclusion proof — re-export of commonware's MMR proof type bound to
139/// our BLAKE3 digest. Wire shape is normatively defined in
140/// `SPEC-HISTORY-PROOF.md` §2.
141pub type InclusionProof = MmrProof<<Blake3 as CHasher>::Digest>;
142
143/// Errors returned by [`CommitHistory`] and [`verify_inclusion`].
144#[derive(Debug, thiserror::Error)]
145pub enum HistoryError {
146    /// The underlying MMR rejected the operation (out-of-bounds proof,
147    /// invalid size, etc.). The wrapped string is commonware's own
148    /// `Display` impl — stable enough for logs, not parsed.
149    #[error("mmr error: {0}")]
150    Mmr(String),
151    /// The branch name failed [`crate::refs::validate_ref_name`].
152    #[error("invalid branch name for history journal: {0:?}")]
153    InvalidBranch(String),
154    /// On-disk journal could not be opened or recovered. commonware's
155    /// own `init` performs roll-forward recovery on a half-written
156    /// trailing leaf (see `commonware_storage::merkle::mmr::full`
157    /// docs); this variant surfaces failures it cannot recover from.
158    #[error("history journal is corrupt: {0}")]
159    Corrupted(String),
160    /// Failed to bootstrap the commonware tokio Context.
161    #[error("failed to bootstrap commonware runtime: {0}")]
162    RuntimeBootstrap(String),
163    /// Failed to set up the on-disk history directory.
164    #[error("history directory I/O: {0}")]
165    Io(#[from] std::io::Error),
166}
167
168// ---------------------------------------------------------------------
169// On-disk layout
170// ---------------------------------------------------------------------
171
172/// Subdirectory of `<mkit_dir>` that holds per-branch MMR journals.
173///
174/// commonware lays its journaled-MMR partitions out as a pair of
175/// suffix-discriminated directories below this root:
176///
177/// ```text
178/// <mkit_dir>/history/<sanitized_branch>__journal-blobs/     # node-digest journal blobs
179/// <mkit_dir>/history/<sanitized_branch>__journal-metadata/  # journal segment table
180/// <mkit_dir>/history/<sanitized_branch>__metadata/          # pruned-pinned-node sidecar
181/// ```
182///
183/// The `-blobs` / `-metadata` segment suffixes are appended by
184/// commonware-storage; mkit names the leading partition tokens
185/// (`<sanitized_branch>__journal` and `<sanitized_branch>__metadata`)
186/// and hands them to `journaled::Mmr::init` via [`JConfig`].
187///
188/// commonware partition names are restricted to `[A-Za-z0-9_-]+`, so
189/// branch names go through `sanitize_branch` — a `_xx`-hex encoding
190/// of every byte outside `[A-Za-z0-9-]`. Empty branches and invalid
191/// ref names are rejected up front by [`CommitHistory::open_at`].
192pub const HISTORY_DIR: &str = "history";
193
194/// Suffix appended to the branch's partition name for the node-digest
195/// journal.
196pub const JOURNAL_PARTITION_SUFFIX: &str = "__journal";
197
198/// Suffix appended to the branch's partition name for the
199/// pruned-pinned-node metadata sidecar.
200pub const METADATA_PARTITION_SUFFIX: &str = "__metadata";
201
202// ---------------------------------------------------------------------
203// CommitHistory
204// ---------------------------------------------------------------------
205
206/// Append-only Merkle history of commit hashes for one branch.
207///
208/// Two construction modes:
209///
210/// 1. [`CommitHistory::open`] — purely in-memory. Useful for tests
211///    and for callers that don't want any disk side-effects. Lost on
212///    drop.
213/// 2. [`CommitHistory::open_at`] — on-disk journaled MMR under
214///    `<mkit_dir>/history/<branch>/`. Survives process exit.
215///
216/// Each call to a sync method (`append`, `root`, `prove`) on the
217/// journaled flavour drives an async operation on the underlying
218/// `commonware-storage::journaled::Mmr` via the caller-supplied
219/// [`Executor`].
220pub struct CommitHistory<X: Executor = TokioExecutor> {
221    backend: Backend<X>,
222    hasher: StandardHasher<Blake3>,
223}
224
225/// Internal backend selector. The mem variant is the Phase-1 shape; the
226/// journaled variant is Phase-2.
227///
228/// `Journaled` is boxed because its inner state (commonware's
229/// `Journaled` plus a `Context` clone) is ~2.2 KiB and would otherwise
230/// pad every mem-flavour `CommitHistory` by the same amount.
231enum Backend<X: Executor> {
232    Mem {
233        mmr: MemMmr<<Blake3 as CHasher>::Digest>,
234    },
235    Journaled(Box<JournaledBackend<X>>),
236}
237
238struct JournaledBackend<X: Executor> {
239    // Order matters for Drop: `mmr` must drop before `_ctx` so
240    // any pending async-resource shutdowns can still poll on the
241    // surviving tokio runtime. In practice commonware's
242    // `Journaled` only flushes synchronously in `sync`, but the
243    // ordering is cheap insurance.
244    mmr: JournaledMmr<commonware_runtime::tokio::Context, <Blake3 as CHasher>::Digest, Sequential>,
245    executor: Arc<X>,
246    // Held to keep the bootstrap tokio runtime (inside the Context's
247    // executor `Arc`) alive for the whole CommitHistory lifetime.
248    _ctx: commonware_runtime::tokio::Context,
249    // Held so update_ref_with_history can take a fresh RepoLock for
250    // every append. None for the mem-only flavour.
251    mkit_dir: PathBuf,
252    branch: String,
253}
254
255impl<X: Executor> core::fmt::Debug for CommitHistory<X> {
256    fn fmt(&self, f: &mut core::fmt::Formatter<'_>) -> core::fmt::Result {
257        match &self.backend {
258            Backend::Mem { mmr } => f
259                .debug_struct("CommitHistory::Mem")
260                .field("leaves", &u64::from(mmr.leaves()))
261                .field("size", &u64::from(mmr.size()))
262                .finish_non_exhaustive(),
263            Backend::Journaled(b) => f
264                .debug_struct("CommitHistory::Journaled")
265                .field("branch", &b.branch)
266                .field("leaves", &u64::from(b.mmr.leaves()))
267                .field("size", &u64::from(b.mmr.size()))
268                .finish_non_exhaustive(),
269        }
270    }
271}
272
273impl CommitHistory<TokioExecutor> {
274    /// Open a fresh empty in-memory history (Phase 1 shape).
275    ///
276    /// Lost on drop. Useful for unit tests and for callers that just
277    /// need a proof bundle without committing any state to disk.
278    #[must_use]
279    pub fn open() -> Self {
280        let hasher = history_hasher();
281        let mmr = MemMmr::new();
282        Self {
283            backend: Backend::Mem { mmr },
284            hasher,
285        }
286    }
287}
288
289impl<X: Executor + 'static> CommitHistory<X> {
290    /// Open a persisted history under `<mkit_dir>/history/<branch>/`.
291    ///
292    /// The on-disk layout is commonware-storage's native journaled MMR
293    /// shape — see [`HISTORY_DIR`] and SPEC-HISTORY-PROOF §4. If the
294    /// underlying journal does not yet exist, it is created empty
295    /// (subsequent [`CommitHistory::append`] calls populate it).
296    ///
297    /// Crash recovery is delegated to commonware: its `Journaled::init`
298    /// detects a half-written trailing leaf, rewinds the journal to
299    /// the last valid size, and re-derives the in-memory state. See
300    /// SPEC-HISTORY-PROOF §4.4 for the contract surfaced to mkit
301    /// callers.
302    ///
303    /// # Errors
304    ///
305    /// - [`HistoryError::InvalidBranch`] — `branch` failed
306    ///   [`validate_ref_name`].
307    /// - [`HistoryError::RuntimeBootstrap`] — could not start the
308    ///   commonware tokio Runner used to construct the underlying
309    ///   storage Context.
310    /// - [`HistoryError::Corrupted`] — commonware refused to recover
311    ///   the journal (a deeper-than-trailing-leaf corruption).
312    /// - [`HistoryError::Io`] — failed to create the history dir.
313    pub fn open_at(executor: Arc<X>, mkit_dir: &Path, branch: &str) -> Result<Self, HistoryError> {
314        if !validate_ref_name(branch) {
315            return Err(HistoryError::InvalidBranch(branch.to_string()));
316        }
317
318        let history_dir = mkit_dir.join(HISTORY_DIR);
319        std::fs::create_dir_all(&history_dir)?;
320
321        // Bootstrap a commonware tokio Context rooted at
322        // `<mkit_dir>/history`. The Context survives the bootstrap
323        // runner's drop because its inner `Arc<Executor>` (the
324        // commonware Executor, not ours) holds the tokio runtime alive.
325        let ctx = bootstrap_commonware_context(&history_dir)?;
326
327        let sanitized = sanitize_branch(branch);
328        let journal_partition = format!("{sanitized}{JOURNAL_PARTITION_SUFFIX}");
329        let metadata_partition = format!("{sanitized}{METADATA_PARTITION_SUFFIX}");
330        let cfg = JConfig {
331            journal_partition,
332            metadata_partition,
333            // 4096 items/blob → 128 KiB blobs (32 B digest × 4096). Big
334            // enough that small branches stay in one blob; small enough
335            // that pruning later doesn't carry stale data around.
336            items_per_blob: NZU64!(4096),
337            write_buffer: NZUsize!(4096),
338            strategy: Sequential,
339            page_cache: CacheRef::from_pooler(&ctx, NZU16!(4096), NZUsize!(8)),
340        };
341
342        let hasher = history_hasher();
343        let mmr = {
344            let hasher_inner = history_hasher();
345            // `child` returns an owned child Context, leaving `ctx` usable for
346            // the surviving CommitHistory (which keeps the bootstrap runtime
347            // alive via its inner executor Arc).
348            let ctx_for_init = ctx.child("mmr_init");
349            executor
350                .block_on(async move {
351                    JournaledMmr::<_, <Blake3 as CHasher>::Digest, Sequential>::init(
352                        ctx_for_init,
353                        &hasher_inner,
354                        cfg,
355                    )
356                    .await
357                })
358                .map_err(|e| HistoryError::Corrupted(e.to_string()))?
359        };
360
361        Ok(Self {
362            backend: Backend::Journaled(Box::new(JournaledBackend {
363                mmr,
364                executor,
365                _ctx: ctx,
366                mkit_dir: mkit_dir.to_path_buf(),
367                branch: branch.to_string(),
368            })),
369            hasher,
370        })
371    }
372
373    /// Borrow the `mkit_dir` this history was opened against. `None`
374    /// for the mem-only flavour. Used by
375    /// [`crate::refs::update_ref_with_history`] to take a `RepoLock`
376    /// around the ref-write + MMR-append critical section.
377    #[must_use]
378    pub fn mkit_dir(&self) -> Option<&Path> {
379        match &self.backend {
380            Backend::Mem { .. } => None,
381            Backend::Journaled(b) => Some(b.mkit_dir.as_path()),
382        }
383    }
384
385    /// Borrow the branch name this history was opened against. `None`
386    /// for the mem-only flavour.
387    #[must_use]
388    pub fn branch(&self) -> Option<&str> {
389        match &self.backend {
390            Backend::Mem { .. } => None,
391            Backend::Journaled(b) => Some(b.branch.as_str()),
392        }
393    }
394
395    /// Append a commit hash. Returns its leaf [`Position`].
396    ///
397    /// Positions are dense: the *n*-th append returns `Position(n)`.
398    /// For the journaled flavour, the underlying MMR is `sync`'d to
399    /// disk before returning — survives a `SIGKILL` immediately after.
400    pub fn append(&mut self, commit_hash: &Hash) -> Result<Position, HistoryError> {
401        let leaf = digest_from_hash(commit_hash);
402        match &mut self.backend {
403            Backend::Mem { mmr } => {
404                let leaf_loc = mmr.leaves();
405                let batch = mmr
406                    .new_batch()
407                    .add(&self.hasher, &leaf)
408                    .merkleize(mmr, &self.hasher);
409                mmr.apply_batch(&batch)
410                    .map_err(|e| HistoryError::Mmr(e.to_string()))?;
411                Ok(Position(u64::from(leaf_loc)))
412            }
413            Backend::Journaled(b) => {
414                let leaf_loc = b.mmr.leaves();
415                let batch = b.mmr.new_batch().add(&self.hasher, &leaf);
416                let batch = b.mmr.with_mem(|mem| batch.merkleize(mem, &self.hasher));
417                b.mmr
418                    .apply_batch(&batch)
419                    .map_err(|e| HistoryError::Mmr(e.to_string()))?;
420                // Flush to disk synchronously so a SIGKILL between
421                // append and the caller's next ref-write does not lose
422                // the leaf we just promised to persist.
423                let sync_fut = b.mmr.sync();
424                b.executor
425                    .block_on(sync_fut)
426                    .map_err(|e| HistoryError::Mmr(e.to_string()))?;
427                Ok(Position(u64::from(leaf_loc)))
428            }
429        }
430    }
431
432    /// Current MMR root digest. 32-byte BLAKE3 (mkit `Hash` shape).
433    ///
434    /// Defined for an empty history — commonware returns a
435    /// deterministic empty-MMR root (see SPEC-HISTORY-PROOF §2.3).
436    ///
437    /// # Panics
438    ///
439    /// Never panics in practice. Internally calls commonware's
440    /// `root(&hasher, 0)`, which only returns an error for a non-zero
441    /// inactive-peak count; mkit always requests `0` inactive peaks
442    /// (its proofs are self-contained over the full leaf set), so the
443    /// `Result` is always `Ok`.
444    #[must_use]
445    pub fn root(&self) -> Hash {
446        // `root` takes the hasher + an `inactive_peaks` count (0 for a
447        // self-contained MMR over the full leaf set) and returns an owned
448        // `Digest`. Both backends are sync here (Journaled reads its
449        // in-memory cache); 0 inactive peaks is always a valid request,
450        // so the `Result` is always `Ok` — see the `# Panics` note above.
451        let digest = match &self.backend {
452            Backend::Mem { mmr } => mmr.root(&self.hasher, 0),
453            Backend::Journaled(b) => b.mmr.root(&self.hasher, 0),
454        }
455        .expect("0 inactive peaks is always a valid root request");
456        let mut out = [0u8; HASH_LEN];
457        out.copy_from_slice(digest.as_ref());
458        out
459    }
460
461    /// Number of leaves (commits) appended so far.
462    #[must_use]
463    pub fn len(&self) -> u64 {
464        match &self.backend {
465            Backend::Mem { mmr } => u64::from(mmr.leaves()),
466            Backend::Journaled(b) => u64::from(b.mmr.leaves()),
467        }
468    }
469
470    /// `true` if no commits have been appended.
471    #[must_use]
472    pub fn is_empty(&self) -> bool {
473        self.len() == 0
474    }
475
476    /// Build an inclusion proof for the commit at `position`.
477    pub fn prove(&self, position: Position) -> Result<InclusionProof, HistoryError> {
478        let loc = MmrLocation::new(position.0);
479        match &self.backend {
480            Backend::Mem { mmr } => mmr
481                .proof(&self.hasher, loc, 0)
482                .map_err(|e| HistoryError::Mmr(e.to_string())),
483            Backend::Journaled(b) => {
484                let hasher = self.hasher.clone();
485                let proof_fut = b.mmr.proof(&hasher, loc, 0);
486                // Drive the async proof builder via the executor. The
487                // future borrows `mmr` immutably for the duration of
488                // `block_on`; the borrow ends when `block_on` returns.
489                b.executor
490                    .block_on(proof_fut)
491                    .map_err(|e| HistoryError::Mmr(e.to_string()))
492            }
493        }
494    }
495}
496
497impl Default for CommitHistory<TokioExecutor> {
498    fn default() -> Self {
499        Self::open()
500    }
501}
502
503/// Verify that `commit_hash` was appended at `position` to a history
504/// whose current root is `root`. Returns `true` on a passing proof,
505/// `false` on any tamper / wrong-position / wrong-root case.
506///
507/// Pure function: no allocation beyond what commonware's verifier does
508/// internally; safe to call from a light-client without any of the
509/// preceding chain bytes.
510#[must_use]
511pub fn verify_inclusion(
512    commit_hash: &Hash,
513    position: Position,
514    proof: &InclusionProof,
515    root: &Hash,
516) -> bool {
517    let leaf = digest_from_hash(commit_hash);
518    let root_digest = digest_from_hash(root);
519    let loc = MmrLocation::new(position.0);
520
521    // Same bagging policy as the producer — see [`HISTORY_BAGGING`].
522    let hasher = history_hasher();
523    proof.verify_element_inclusion(&hasher, leaf.as_ref(), loc, &root_digest)
524}
525
526// ---------------------------------------------------------------------
527// v0.1.x → v0.2.x rebuild shim
528// ---------------------------------------------------------------------
529
530/// Rebuild a branch's history MMR from its on-disk parent chain.
531///
532/// For repos created against an older mkit (v0.1.x, before this
533/// module persisted anything to disk) the `<mkit_dir>/history/<branch>/`
534/// directory is empty on first open, but the branch's commit chain is
535/// already on disk in the object store. This helper walks the
536/// first-parent chain from `tip` to root, then re-appends each commit
537/// (in oldest-first order) into the supplied empty [`CommitHistory`].
538///
539/// The walker is supplied by the caller as `parent_of`: given a
540/// commit hash, it returns `Ok(Some(parent_hash))` if there is a
541/// parent, `Ok(None)` for the root commit, and `Err(_)` for any
542/// lookup failure. This keeps `mkit-core::history` free of an
543/// `ObjectStore` dependency — the rebuild shim is a tool the caller
544/// (typically `mkit-cli`) drives.
545///
546/// # Errors
547///
548/// - Propagates any error from `parent_of`.
549/// - Propagates any [`HistoryError`] from `history.append`.
550pub fn rebuild_from_chain<X, F, E>(
551    history: &mut CommitHistory<X>,
552    tip: Hash,
553    mut parent_of: F,
554) -> Result<u64, RebuildError<E>>
555where
556    X: Executor + 'static,
557    F: FnMut(&Hash) -> Result<Option<Hash>, E>,
558    E: core::fmt::Display,
559{
560    let mut chain = Vec::new();
561    let mut cursor = Some(tip);
562    while let Some(h) = cursor {
563        chain.push(h);
564        cursor = parent_of(&h).map_err(RebuildError::Walker)?;
565    }
566    // Walker yielded tip..root; we need root..tip for the append order.
567    chain.reverse();
568    let count = chain.len() as u64;
569    for h in &chain {
570        history.append(h).map_err(RebuildError::History)?;
571    }
572    Ok(count)
573}
574
575/// Errors returned by [`rebuild_from_chain`].
576#[derive(Debug, thiserror::Error)]
577pub enum RebuildError<E: core::fmt::Display> {
578    /// The caller-supplied parent-walker failed. The wrapped `E`
579    /// carries the walker's error verbatim, so callers that pass a
580    /// structured error type get it back unchanged.
581    #[error("parent-chain walker failed: {0}")]
582    Walker(E),
583    /// The underlying [`CommitHistory::append`] failed.
584    #[error(transparent)]
585    History(HistoryError),
586}
587
588// ---------------------------------------------------------------------
589// Internals
590// ---------------------------------------------------------------------
591
592/// Convert an mkit `Hash` (`[u8; 32]`) into commonware's `Blake3::Digest`.
593fn digest_from_hash(h: &Hash) -> <Blake3 as CHasher>::Digest {
594    <<Blake3 as CHasher>::Digest as From<[u8; HASH_LEN]>>::from(*h)
595}
596
597/// Hex-escape a mkit branch name into a commonware-partition-safe
598/// token.
599///
600/// commonware partition names are restricted to `[A-Za-z0-9_-]+`. mkit
601/// ref names can contain `.` and `/` (and `_`, which IS allowed
602/// upstream but which we must escape too to keep this encoding
603/// injective). Every non-`[A-Za-z0-9-]` byte is encoded as `_xx`
604/// where `xx` is the lowercase-hex byte value; `_` itself encodes as
605/// `_5f`. This makes the encoding self-delimiting — every `_` in the
606/// output is the lead-in of a two-hex-digit escape, never a literal.
607///
608/// Examples:
609///   `main`        → `main`
610///   `feat/v1.0`   → `feat_2fv1_2e0`
611///   `feat_v1_0`   → `feat_5fv1_5f0`
612///
613/// The encoding is injective: different valid branch names always
614/// sanitize to different partition tokens, so the journaled MMRs of
615/// two branches in the same repo never share commonware partitions.
616fn sanitize_branch(name: &str) -> String {
617    let mut out = String::with_capacity(name.len());
618    for &b in name.as_bytes() {
619        let allowed = b.is_ascii_alphanumeric() || b == b'-';
620        if allowed {
621            out.push(b as char);
622        } else {
623            use core::fmt::Write as _;
624            let _ = write!(&mut out, "_{b:02x}");
625        }
626    }
627    out
628}
629
630/// Bootstrap a commonware tokio `Context` whose `storage_directory`
631/// is the supplied path.
632///
633/// The bootstrap is done on a fresh OS thread because tokio refuses to
634/// nest `runtime::Builder::build()` inside an already-active runtime
635/// (the `TokioExecutor`'s runtime is already alive in the caller's
636/// thread). The returned Context's inner `Arc<Executor>` keeps the
637/// bootstrap runtime alive after the bootstrap thread joins.
638///
639/// See `docs/SPEC-HISTORY-PROOF.md` §4.1 for the design rationale and
640/// the trade-off with the alternative "share the caller's tokio
641/// runtime" approach.
642fn bootstrap_commonware_context(
643    storage_directory: &Path,
644) -> Result<commonware_runtime::tokio::Context, HistoryError> {
645    let dir = storage_directory.to_path_buf();
646    std::thread::spawn(move || {
647        let cfg = commonware_runtime::tokio::Config::new().with_storage_directory(dir);
648        let runner = commonware_runtime::tokio::Runner::new(cfg);
649        // Return an owned, labelled child Context. `Context::clone` and
650        // `Metrics::with_label` were both removed in 2026.5.0;
651        // `Supervisor::child` returns an owned Context that clones the
652        // inner `executor: Arc<Executor>`, which keeps the tokio runtime
653        // alive after the bootstrap Runner is dropped at the end of
654        // `start`. The label is best-effort so any commonware metrics
655        // surfaced through this Context are easy to spot in a debugger.
656        runner
657            .start(|ctx| async move { commonware_runtime::Supervisor::child(&ctx, "mkit_history") })
658    })
659    .join()
660    .map_err(|_| HistoryError::RuntimeBootstrap("bootstrap thread panicked".to_string()))
661}
662
663// `BufferPooler`, `Clock`, `RStorage`, `Supervisor` are implicit bounds on
664// the `Context` we use — the imports above keep them in scope so trait
665// methods (`child`, `network_buffer_pool`, …) resolve. Re-export nothing.
666#[allow(dead_code)]
667fn _trait_imports_keep_alive() {
668    fn assert_send_sync<T: Send + Sync>() {}
669    assert_send_sync::<commonware_runtime::tokio::Context>();
670}
671
672// ---------------------------------------------------------------------
673// Tests
674// ---------------------------------------------------------------------
675
676#[cfg(test)]
677mod tests {
678    use super::*;
679    use tempfile::TempDir;
680
681    /// Deterministic distinct commit hash generator.
682    fn synth(i: u64) -> Hash {
683        crate::hash::hash(&i.to_be_bytes())
684    }
685
686    fn fresh_mkit_dir() -> (TempDir, PathBuf) {
687        let tmp = TempDir::new().unwrap();
688        let mkit_dir = tmp.path().join(".mkit");
689        std::fs::create_dir_all(&mkit_dir).unwrap();
690        (tmp, mkit_dir)
691    }
692
693    fn fresh_executor() -> Arc<TokioExecutor> {
694        Arc::new(TokioExecutor::new().expect("tokio runtime"))
695    }
696
697    // ---- Phase 1 mem-only API (unchanged from issue #157 Phase 1) --
698
699    #[test]
700    fn mem_empty_history_root_is_well_defined() {
701        let h1 = CommitHistory::open();
702        let h2 = CommitHistory::open();
703        assert_eq!(h1.root(), h2.root(), "empty root must be deterministic");
704        assert!(h1.is_empty());
705        assert_eq!(h1.len(), 0);
706    }
707
708    #[test]
709    fn mem_append_returns_dense_positions() {
710        let mut h = CommitHistory::open();
711        for i in 0..16u64 {
712            let pos = h.append(&synth(i)).unwrap();
713            assert_eq!(pos, Position(i), "positions must be dense and 0-based");
714        }
715        assert_eq!(h.len(), 16);
716    }
717
718    #[test]
719    fn mem_prove_and_verify_position_712_of_1000() {
720        let mut h = CommitHistory::open();
721        let commits: Vec<Hash> = (0..1000u64).map(synth).collect();
722        for c in &commits {
723            h.append(c).unwrap();
724        }
725        assert_eq!(h.len(), 1000);
726
727        let target = Position(712);
728        let proof = h.prove(target).unwrap();
729        let root = h.root();
730
731        assert!(
732            verify_inclusion(&commits[712], target, &proof, &root),
733            "honest proof must verify"
734        );
735    }
736
737    #[test]
738    fn mem_tampered_proof_fails_verification() {
739        let mut h = CommitHistory::open();
740        for i in 0..256u64 {
741            h.append(&synth(i)).unwrap();
742        }
743        let target = Position(42);
744        let mut proof = h.prove(target).unwrap();
745        let root = h.root();
746        let commit = synth(42);
747
748        assert!(verify_inclusion(&commit, target, &proof, &root));
749
750        assert!(
751            !proof.digests.is_empty(),
752            "non-trivial proof must carry at least one sibling"
753        );
754        let mut bytes: [u8; HASH_LEN] = [0u8; HASH_LEN];
755        bytes.copy_from_slice(proof.digests[0].as_ref());
756        bytes[0] ^= 0x01;
757        proof.digests[0] = <<Blake3 as CHasher>::Digest as From<[u8; HASH_LEN]>>::from(bytes);
758
759        assert!(
760            !verify_inclusion(&commit, target, &proof, &root),
761            "tampered proof must fail"
762        );
763    }
764
765    // ---- Phase 2 journaled API ------------------------------------
766
767    #[test]
768    fn open_at_rejects_invalid_branch() {
769        let (_tmp, mkit_dir) = fresh_mkit_dir();
770        let exec = fresh_executor();
771        let err = CommitHistory::open_at(exec, &mkit_dir, "../escape").expect_err("invalid branch");
772        assert!(matches!(err, HistoryError::InvalidBranch(_)));
773    }
774
775    #[test]
776    fn open_at_empty_root_matches_mem() {
777        let (_tmp, mkit_dir) = fresh_mkit_dir();
778        let exec = fresh_executor();
779        let h_disk = CommitHistory::open_at(exec, &mkit_dir, "main").unwrap();
780        let h_mem = CommitHistory::open();
781        assert_eq!(
782            h_disk.root(),
783            h_mem.root(),
784            "empty journaled root must match empty mem root"
785        );
786        assert!(h_disk.is_empty());
787    }
788
789    #[test]
790    fn open_at_round_trip_100_commits() {
791        let (_tmp, mkit_dir) = fresh_mkit_dir();
792        let exec = fresh_executor();
793
794        let commits: Vec<Hash> = (0..100u64).map(synth).collect();
795        let (root_before, len_before) = {
796            let mut h = CommitHistory::open_at(exec.clone(), &mkit_dir, "main").unwrap();
797            for c in &commits {
798                h.append(c).unwrap();
799            }
800            (h.root(), h.len())
801        };
802        // Re-open and verify the root survives.
803        let h = CommitHistory::open_at(exec.clone(), &mkit_dir, "main").unwrap();
804        assert_eq!(h.len(), len_before, "leaf count must survive reopen");
805        assert_eq!(h.root(), root_before, "root must survive reopen");
806    }
807
808    #[test]
809    fn open_at_prove_after_reopen() {
810        let (_tmp, mkit_dir) = fresh_mkit_dir();
811        let exec = fresh_executor();
812        let commits: Vec<Hash> = (0..64u64).map(synth).collect();
813        let root = {
814            let mut h = CommitHistory::open_at(exec.clone(), &mkit_dir, "main").unwrap();
815            for c in &commits {
816                h.append(c).unwrap();
817            }
818            h.root()
819        };
820        let h = CommitHistory::open_at(exec.clone(), &mkit_dir, "main").unwrap();
821        let target = Position(17);
822        let proof = h.prove(target).unwrap();
823        assert!(verify_inclusion(&commits[17], target, &proof, &root));
824    }
825
826    #[test]
827    fn open_at_distinct_branches_have_distinct_roots() {
828        let (_tmp, mkit_dir) = fresh_mkit_dir();
829        let exec = fresh_executor();
830        let mut main = CommitHistory::open_at(exec.clone(), &mkit_dir, "main").unwrap();
831        let mut dev = CommitHistory::open_at(exec.clone(), &mkit_dir, "dev").unwrap();
832        main.append(&synth(0)).unwrap();
833        dev.append(&synth(1)).unwrap();
834        assert_ne!(main.root(), dev.root());
835    }
836
837    #[test]
838    fn open_at_branch_with_slash_is_isolated() {
839        // Two branches that sanitize to different partition names
840        // must not share state.
841        let (_tmp, mkit_dir) = fresh_mkit_dir();
842        let exec = fresh_executor();
843        let mut a = CommitHistory::open_at(exec.clone(), &mkit_dir, "feat/v1").unwrap();
844        let mut b = CommitHistory::open_at(exec.clone(), &mkit_dir, "feat/v2").unwrap();
845        a.append(&synth(0)).unwrap();
846        assert_eq!(a.len(), 1);
847        assert_eq!(b.len(), 0, "sibling branch must not see appended leaf");
848        b.append(&synth(1)).unwrap();
849        assert_ne!(a.root(), b.root());
850    }
851
852    #[test]
853    fn open_at_root_matches_live_mem_root() {
854        // Executor swap test: a journaled MMR's root must equal the
855        // pure-mem MMR's root computed over the same leaf sequence.
856        // This proves the executor and the persistence layer are
857        // opaque to MMR semantics.
858        let (_tmp, mkit_dir) = fresh_mkit_dir();
859        let exec = fresh_executor();
860        let commits: Vec<Hash> = (0..50u64).map(synth).collect();
861
862        let mut journaled = CommitHistory::open_at(exec, &mkit_dir, "main").unwrap();
863        let mut mem = CommitHistory::open();
864        for c in &commits {
865            journaled.append(c).unwrap();
866            mem.append(c).unwrap();
867        }
868        assert_eq!(
869            journaled.root(),
870            mem.root(),
871            "journaled and mem MMRs must produce the same root for the same leaf sequence"
872        );
873    }
874
875    // ---- Negative-path verification (journaled flow) -------------
876
877    /// Phase-1-equivalent: appending must change the root. Same
878    /// property as the deleted `root_changes_on_append` test, lifted
879    /// onto the journaled flavour.
880    #[test]
881    fn journaled_root_changes_on_append() {
882        let (_tmp, mkit_dir) = fresh_mkit_dir();
883        let exec = fresh_executor();
884        let mut h = CommitHistory::open_at(exec, &mkit_dir, "main").unwrap();
885        let root_before = h.root();
886        h.append(&synth(0)).unwrap();
887        let root_after = h.root();
888        assert_ne!(
889            root_before, root_after,
890            "appending a leaf must change the journaled MMR root"
891        );
892    }
893
894    /// Phase-1-equivalent: an honest inclusion proof must not verify
895    /// against a different commit hash than the one that was appended.
896    #[test]
897    fn journaled_wrong_commit_fails_verification() {
898        let (_tmp, mkit_dir) = fresh_mkit_dir();
899        let exec = fresh_executor();
900        let mut h = CommitHistory::open_at(exec, &mkit_dir, "main").unwrap();
901        let h_a = synth(0);
902        let h_other = synth(99);
903        h.append(&h_a).unwrap();
904        let proof = h.prove(Position(0)).unwrap();
905        let root = h.root();
906
907        assert!(
908            verify_inclusion(&h_a, Position(0), &proof, &root),
909            "honest proof must verify with the appended commit"
910        );
911        assert!(
912            !verify_inclusion(&h_other, Position(0), &proof, &root),
913            "swapping in a different commit hash must fail verification"
914        );
915    }
916
917    /// Phase-1-equivalent: an inclusion proof from one branch's MMR
918    /// must not verify against another branch's root.
919    #[test]
920    fn journaled_wrong_root_fails_verification() {
921        let (_tmp, mkit_dir) = fresh_mkit_dir();
922        let exec = fresh_executor();
923
924        let h_a = synth(0);
925        let mut a = CommitHistory::open_at(exec.clone(), &mkit_dir, "main").unwrap();
926        a.append(&h_a).unwrap();
927        let proof = a.prove(Position(0)).unwrap();
928        let root_a = a.root();
929        assert!(
930            verify_inclusion(&h_a, Position(0), &proof, &root_a),
931            "sanity: honest proof verifies against its own root"
932        );
933
934        // Independent branch with different leaves — distinct root.
935        let mut b = CommitHistory::open_at(exec, &mkit_dir, "dev").unwrap();
936        b.append(&synth(1)).unwrap();
937        b.append(&synth(2)).unwrap();
938        let root_b = b.root();
939        assert_ne!(root_a, root_b, "distinct branches must have distinct roots");
940
941        assert!(
942            !verify_inclusion(&h_a, Position(0), &proof, &root_b),
943            "proof from one branch must not verify against another branch's root"
944        );
945    }
946
947    // ---- Crash recovery (commonware-native semantics) ----------
948
949    /// Simulate a torn write: truncate one journal blob mid-frame and
950    /// verify that commonware's `Journaled::init` either rolls forward
951    /// to the last valid size OR surfaces a recoverable error.
952    ///
953    /// SPEC-HISTORY-PROOF §4.4: mkit relies on commonware's native
954    /// recovery; mkit's own contract is only that reopening a
955    /// half-written journal does NOT panic and does NOT silently
956    /// expose stale data.
957    #[test]
958    fn truncated_journal_rolls_forward_or_surfaces_corruption() {
959        let (_tmp, mkit_dir) = fresh_mkit_dir();
960        let exec = fresh_executor();
961        let commits: Vec<Hash> = (0..32u64).map(synth).collect();
962
963        let len_before = {
964            let mut h = CommitHistory::open_at(exec.clone(), &mkit_dir, "main").unwrap();
965            for c in &commits {
966                h.append(c).unwrap();
967            }
968            h.len()
969        };
970        assert_eq!(len_before, 32);
971
972        // Find the journal blob directory and chop a few bytes off
973        // the tail of the largest file. The sanitized partition name
974        // for branch "main" is "main" → "main__journal".
975        // commonware lays out the journal partition's blobs in a
976        // subdirectory named `<partition>-blobs`; the journal metadata
977        // sidecar sits at `<partition>-metadata`. The truncation
978        // target is whichever blob holds actual digest data.
979        let journal_root = mkit_dir.join(HISTORY_DIR).join("main__journal-blobs");
980        assert!(
981            journal_root.exists(),
982            "journal blob dir must exist after appends"
983        );
984        let mut largest: Option<(std::path::PathBuf, u64)> = None;
985        for entry in std::fs::read_dir(&journal_root).unwrap() {
986            let entry = entry.unwrap();
987            let meta = entry.metadata().unwrap();
988            if meta.is_file() {
989                let path = entry.path();
990                let len = meta.len();
991                if largest.as_ref().is_none_or(|(_, l)| len > *l) {
992                    largest = Some((path, len));
993                }
994            }
995        }
996        let (blob_path, blob_len) = largest.expect("at least one blob present after 32 appends");
997        assert!(
998            blob_len > 5,
999            "blob must be large enough to truncate (got {blob_len} bytes)"
1000        );
1001
1002        // Truncate 5 bytes off the end — fewer than a digest (32 B), so
1003        // commonware sees a torn frame at the tail.
1004        let truncated_len = blob_len - 5;
1005        let f = std::fs::OpenOptions::new()
1006            .write(true)
1007            .open(&blob_path)
1008            .unwrap();
1009        f.set_len(truncated_len).unwrap();
1010        drop(f);
1011
1012        // Reopen. mkit's contract: either succeeds (rolled forward to
1013        // last valid size, possibly fewer leaves than before) OR
1014        // surfaces a `Corrupted` error. Crucially: no panic, no
1015        // silent-stale-root.
1016        let reopened = CommitHistory::open_at(exec, &mkit_dir, "main");
1017        match reopened {
1018            Ok(h) => {
1019                assert!(
1020                    h.len() <= len_before,
1021                    "rolled-forward leaf count must not exceed the pre-truncation count"
1022                );
1023                // The root of the rolled-forward state, if non-empty,
1024                // must equal the root of a freshly-built mem MMR over
1025                // the surviving leaves — proving roll-forward is
1026                // semantically consistent.
1027                if !h.is_empty() {
1028                    let n = usize::try_from(h.len()).expect("leaf count fits in usize");
1029                    let mut reference = CommitHistory::open();
1030                    for c in &commits[..n] {
1031                        reference.append(c).unwrap();
1032                    }
1033                    assert_eq!(
1034                        h.root(),
1035                        reference.root(),
1036                        "rolled-forward root must equal a clean replay of the surviving leaves"
1037                    );
1038                }
1039            }
1040            Err(HistoryError::Corrupted(_)) => {
1041                // Acceptable: commonware refused to recover. Surfaced
1042                // to the caller via the documented error variant.
1043            }
1044            Err(other) => panic!("unexpected error on truncated journal: {other:?}"),
1045        }
1046    }
1047
1048    // ---- Rebuild shim --------------------------------------------
1049
1050    #[test]
1051    fn rebuild_from_chain_matches_live_appends() {
1052        let (_tmp, mkit_dir_a) = fresh_mkit_dir();
1053        let (_tmp_b, mkit_dir_b) = fresh_mkit_dir();
1054        let exec = fresh_executor();
1055
1056        // Build the "reference" history by live appends.
1057        let commits: Vec<Hash> = (0..10u64).map(synth).collect();
1058        let reference_root = {
1059            let mut h = CommitHistory::open_at(exec.clone(), &mkit_dir_a, "main").unwrap();
1060            for c in &commits {
1061                h.append(c).unwrap();
1062            }
1063            h.root()
1064        };
1065
1066        // Simulate the v0.1.x situation: refs/heads/main has a tip
1067        // commit, but `<mkit_dir>/history/` is empty. The walker
1068        // returns each commit's parent until root.
1069        let mut h = CommitHistory::open_at(exec.clone(), &mkit_dir_b, "main").unwrap();
1070        let tip = commits[commits.len() - 1];
1071        let count = rebuild_from_chain::<TokioExecutor, _, std::io::Error>(&mut h, tip, |hash| {
1072            // Find this hash in `commits`, return the prior one or
1073            // None if at index 0.
1074            let idx = commits
1075                .iter()
1076                .position(|c| c == hash)
1077                .expect("walker called with unknown hash");
1078            if idx == 0 {
1079                Ok(None)
1080            } else {
1081                Ok(Some(commits[idx - 1]))
1082            }
1083        })
1084        .unwrap();
1085        assert_eq!(count, commits.len() as u64);
1086        assert_eq!(
1087            h.root(),
1088            reference_root,
1089            "rebuilt root must match live-append root"
1090        );
1091    }
1092
1093    // ---- Sanitization --------------------------------------------
1094
1095    #[test]
1096    fn sanitize_branch_round_trip_invariants() {
1097        // Different ref-name byte sequences must encode to different
1098        // partition tokens.
1099        assert_ne!(sanitize_branch("feat/v1.0"), sanitize_branch("feat_v1_0"));
1100        // Plain alpha-numeric / dash names pass through unchanged.
1101        assert_eq!(sanitize_branch("main"), "main");
1102        assert_eq!(sanitize_branch("release-2026"), "release-2026");
1103        // `/` escapes to `_2f`.
1104        assert_eq!(sanitize_branch("a/b"), "a_2fb");
1105        // `.` escapes to `_2e`.
1106        assert_eq!(sanitize_branch("v1.0"), "v1_2e0");
1107        // `_` itself escapes to `_5f` to keep the encoding injective.
1108        assert_eq!(sanitize_branch("a_b"), "a_5fb");
1109    }
1110}
mkit_core/history.rs

mkit_core/
history.rs