mkit_core/history.rs
1//! Append-only commit-history Merkle Mountain Range (MMR).
2//!
3//! Issue #157. Light-client inclusion proofs for the commit chain: a
4//! verifier with the MMR root for a branch tip can check "commit X was
5//! leaf N on this branch" with `O(log n)` hash work, without
6//! downloading the parent chain or any pack.
7//!
8//! # Status
9//!
10//! - **Phase 1** (shipped, [`CommitHistory::open`]): `mem`-backed
11//! in-memory MMR. Lost on process exit. Useful for tests and
12//! short-lived computations where the proof is the only output.
13//! - **Phase 2** (this build, [`CommitHistory::open_at`]): on-disk
14//! journaled MMR backed by
15//! [`commonware_storage::merkle::mmr::full::Mmr`] pinned to
16//! `=2026.5.0`. The on-disk layout is commonware's native two-store
17//! shape — a fixed-item journal of node digests plus a metadata
18//! sidecar for pruned pinned nodes — laid out under
19//! `<mkit_dir>/history/<sanitized_branch>/`. See
20//! `docs/SPEC-HISTORY-PROOF.md` §4.
21//! - **Phase 3** (planned, v0.2): `Commit.history_root` proto field,
22//! new signing-bytes layout. Out of scope here.
23//!
24//! # Hashing
25//!
26//! The underlying primitive is `commonware-storage`'s journaled MMR
27//! parameterised over BLAKE3 (from `commonware-cryptography`). Node
28//! digests are 32-byte BLAKE3 values — same primitive mkit already
29//! uses elsewhere ([`crate::hash::Hash`]). The schedule is **not** the
30//! same as [`crate::hash::hash`]: commonware injects each node's
31//! position into the parent/leaf digest. Treat the digests as opaque
32//! beyond comparison.
33//!
34//! # Sync / async bridge
35//!
36//! commonware's journaled MMR is async over a `commonware-runtime`
37//! `Context`. The mkit-core public surface ([`CommitHistory::append`],
38//! [`CommitHistory::root`], [`CommitHistory::prove`]) is synchronous
39//! by design — it is called from the synchronous `refs::update_ref`
40//! path and from CLI helpers that have no async-runtime context.
41//!
42//! The bridge is the [`crate::protocol::async_shim::Executor`] trait
43//! hoisted in PR #167. [`CommitHistory`] is generic over an executor
44//! `X: Executor`; every sync method drives its async counterpart via
45//! `executor.block_on(...)`. The executor is held by [`Arc`] for the
46//! lifetime of the [`CommitHistory`] value so multiple `append` /
47//! `prove` calls share a single runtime.
48//!
49//! # Executor / Context ownership
50//!
51//! [`CommitHistory::open_at`] takes an [`Arc`]-shared executor from
52//! the caller. The commonware `Context` needed to drive the
53//! journaled MMR is bootstrapped *internally* via a one-shot
54//! [`commonware_runtime::tokio::Runner::start`] on a fresh OS thread
55//! (the standard workaround documented in transport-enc Phase 2):
56//! the runner returns a Context clone, the outer `Arc<Executor>` inside
57//! the Context keeps tokio's runtime alive, and the bootstrap thread
58//! joins immediately. Subsequent async ops are driven through the
59//! caller-supplied executor.
60//!
61//! This means production callers only need to construct an executor
62//! ([`crate::history::tokio_executor::TokioExecutor`] is provided when
63//! the `history-mmr` feature is on); mkit-core handles the
64//! commonware-side wiring. The trade-off: every [`CommitHistory`]
65//! owns one tokio runtime (via its executor) AND one commonware
66//! Context with its own inner tokio runtime. The two never need to
67//! interact — `tokio::fs` and `tokio::sync::Mutex` are runtime-agnostic
68//! and work whichever runtime is driving the poll.
69
70use std::path::{Path, PathBuf};
71use std::sync::Arc;
72
73use commonware_cryptography::{Blake3, Hasher as CHasher};
74use commonware_parallel::Sequential;
75use commonware_runtime::{Runner as _, Supervisor as _, buffer::paged::CacheRef};
76use commonware_storage::merkle::Bagging;
77use commonware_storage::merkle::mmr::{
78 Location as MmrLocation, Proof as MmrProof, StandardHasher,
79 full::{Config as JConfig, Mmr as JournaledMmr},
80 mem::Mmr as MemMmr,
81};
82use commonware_utils::{NZU16, NZU64, NZUsize};
83
84use crate::hash::{HASH_LEN, Hash};
85use crate::protocol::async_shim::Executor;
86use crate::refs::validate_ref_name;
87
88pub mod tokio_executor;
89
90/// Re-export of the bundled tokio-runtime executor.
91pub use tokio_executor::TokioExecutor;
92
93/// Peak-bagging policy for the commit-history MMR.
94///
95/// This is a **load-bearing cryptographic parameter**: the producer
96/// ([`CommitHistory::root`] / [`CommitHistory::prove`]) and the verifier
97/// ([`verify_inclusion`]) must fold MMR peaks into a root identically, or
98/// every inclusion proof silently fails to verify. Pinning it here — and
99/// consuming it only via [`history_hasher`] — makes that agreement
100/// un-desyncable across all call sites and both code paths.
101///
102/// `ForwardFold` is also a **back-compat** choice: it reproduces the root
103/// that the pre-2026.5 commonware default produced byte-for-byte, so MMR
104/// journals written by an older mkit build and roots already stored in the
105/// reflog stay valid across the commonware bump. Do **not** change this
106/// without an on-disk format-version bump and a migration.
107const HISTORY_BAGGING: Bagging = Bagging::ForwardFold;
108
109/// The canonical hasher for the commit-history MMR — the single source of
110/// truth for [`HISTORY_BAGGING`], so the producer and verifier sides can
111/// never drift on the bagging policy.
112fn history_hasher() -> StandardHasher<Blake3> {
113 StandardHasher::new(HISTORY_BAGGING)
114}
115
116// ---------------------------------------------------------------------
117// Public types
118// ---------------------------------------------------------------------
119
120/// 0-based index of a commit within its branch's MMR.
121///
122/// In commonware's vocabulary this is a `Location` — the leaf index in
123/// insertion order, not the MMR's internal node position. The first
124/// commit appended is `Position(0)`, the second is `Position(1)`, etc.
125/// Stable for the lifetime of the branch: positions never shift
126/// because the MMR is append-only.
127#[derive(Copy, Clone, Debug, PartialEq, Eq, PartialOrd, Ord, Hash)]
128pub struct Position(pub u64);
129
130impl Position {
131 /// Raw `u64`.
132 #[must_use]
133 pub const fn as_u64(self) -> u64 {
134 self.0
135 }
136}
137
138/// Inclusion proof — re-export of commonware's MMR proof type bound to
139/// our BLAKE3 digest. Wire shape is normatively defined in
140/// `SPEC-HISTORY-PROOF.md` §2.
141pub type InclusionProof = MmrProof<<Blake3 as CHasher>::Digest>;
142
143/// Errors returned by [`CommitHistory`] and [`verify_inclusion`].
144#[derive(Debug, thiserror::Error)]
145pub enum HistoryError {
146 /// The underlying MMR rejected the operation (out-of-bounds proof,
147 /// invalid size, etc.). The wrapped string is commonware's own
148 /// `Display` impl — stable enough for logs, not parsed.
149 #[error("mmr error: {0}")]
150 Mmr(String),
151 /// The branch name failed [`crate::refs::validate_ref_name`].
152 #[error("invalid branch name for history journal: {0:?}")]
153 InvalidBranch(String),
154 /// On-disk journal could not be opened or recovered. commonware's
155 /// own `init` performs roll-forward recovery on a half-written
156 /// trailing leaf (see `commonware_storage::merkle::mmr::full`
157 /// docs); this variant surfaces failures it cannot recover from.
158 #[error("history journal is corrupt: {0}")]
159 Corrupted(String),
160 /// Failed to bootstrap the commonware tokio Context.
161 #[error("failed to bootstrap commonware runtime: {0}")]
162 RuntimeBootstrap(String),
163 /// Failed to set up the on-disk history directory.
164 #[error("history directory I/O: {0}")]
165 Io(#[from] std::io::Error),
166}
167
168// ---------------------------------------------------------------------
169// On-disk layout
170// ---------------------------------------------------------------------
171
172/// Subdirectory of `<mkit_dir>` that holds per-branch MMR journals.
173///
174/// commonware lays its journaled-MMR partitions out as a pair of
175/// suffix-discriminated directories below this root:
176///
177/// ```text
178/// <mkit_dir>/history/<sanitized_branch>__journal-blobs/ # node-digest journal blobs
179/// <mkit_dir>/history/<sanitized_branch>__journal-metadata/ # journal segment table
180/// <mkit_dir>/history/<sanitized_branch>__metadata/ # pruned-pinned-node sidecar
181/// ```
182///
183/// The `-blobs` / `-metadata` segment suffixes are appended by
184/// commonware-storage; mkit names the leading partition tokens
185/// (`<sanitized_branch>__journal` and `<sanitized_branch>__metadata`)
186/// and hands them to `journaled::Mmr::init` via [`JConfig`].
187///
188/// commonware partition names are restricted to `[A-Za-z0-9_-]+`, so
189/// branch names go through `sanitize_branch` — a `_xx`-hex encoding
190/// of every byte outside `[A-Za-z0-9-]`. Empty branches and invalid
191/// ref names are rejected up front by [`CommitHistory::open_at`].
192pub const HISTORY_DIR: &str = "history";
193
194/// Suffix appended to the branch's partition name for the node-digest
195/// journal.
196pub const JOURNAL_PARTITION_SUFFIX: &str = "__journal";
197
198/// Suffix appended to the branch's partition name for the
199/// pruned-pinned-node metadata sidecar.
200pub const METADATA_PARTITION_SUFFIX: &str = "__metadata";
201
202// ---------------------------------------------------------------------
203// CommitHistory
204// ---------------------------------------------------------------------
205
206/// Append-only Merkle history of commit hashes for one branch.
207///
208/// Two construction modes:
209///
210/// 1. [`CommitHistory::open`] — purely in-memory. Useful for tests
211/// and for callers that don't want any disk side-effects. Lost on
212/// drop.
213/// 2. [`CommitHistory::open_at`] — on-disk journaled MMR under
214/// `<mkit_dir>/history/<branch>/`. Survives process exit.
215///
216/// Each call to a sync method (`append`, `root`, `prove`) on the
217/// journaled flavour drives an async operation on the underlying
218/// `commonware-storage::journaled::Mmr` via the caller-supplied
219/// [`Executor`].
220pub struct CommitHistory<X: Executor = TokioExecutor> {
221 backend: Backend<X>,
222 hasher: StandardHasher<Blake3>,
223}
224
225/// Internal backend selector. The mem variant is the Phase-1 shape; the
226/// journaled variant is Phase-2.
227///
228/// `Journaled` is boxed because its inner state (commonware's
229/// `Journaled` plus a `Context` clone) is ~2.2 KiB and would otherwise
230/// pad every mem-flavour `CommitHistory` by the same amount.
231enum Backend<X: Executor> {
232 Mem {
233 mmr: MemMmr<<Blake3 as CHasher>::Digest>,
234 },
235 Journaled(Box<JournaledBackend<X>>),
236}
237
238struct JournaledBackend<X: Executor> {
239 // Order matters for Drop: `mmr` must drop before `_ctx` so
240 // any pending async-resource shutdowns can still poll on the
241 // surviving tokio runtime. In practice commonware's
242 // `Journaled` only flushes synchronously in `sync`, but the
243 // ordering is cheap insurance.
244 mmr: JournaledMmr<commonware_runtime::tokio::Context, <Blake3 as CHasher>::Digest, Sequential>,
245 executor: Arc<X>,
246 // Held to keep the bootstrap tokio runtime (inside the Context's
247 // executor `Arc`) alive for the whole CommitHistory lifetime.
248 _ctx: commonware_runtime::tokio::Context,
249 // Held so update_ref_with_history can take a fresh RepoLock for
250 // every append. None for the mem-only flavour.
251 mkit_dir: PathBuf,
252 branch: String,
253}
254
255impl<X: Executor> core::fmt::Debug for CommitHistory<X> {
256 fn fmt(&self, f: &mut core::fmt::Formatter<'_>) -> core::fmt::Result {
257 match &self.backend {
258 Backend::Mem { mmr } => f
259 .debug_struct("CommitHistory::Mem")
260 .field("leaves", &u64::from(mmr.leaves()))
261 .field("size", &u64::from(mmr.size()))
262 .finish_non_exhaustive(),
263 Backend::Journaled(b) => f
264 .debug_struct("CommitHistory::Journaled")
265 .field("branch", &b.branch)
266 .field("leaves", &u64::from(b.mmr.leaves()))
267 .field("size", &u64::from(b.mmr.size()))
268 .finish_non_exhaustive(),
269 }
270 }
271}
272
273impl CommitHistory<TokioExecutor> {
274 /// Open a fresh empty in-memory history (Phase 1 shape).
275 ///
276 /// Lost on drop. Useful for unit tests and for callers that just
277 /// need a proof bundle without committing any state to disk.
278 #[must_use]
279 pub fn open() -> Self {
280 let hasher = history_hasher();
281 let mmr = MemMmr::new();
282 Self {
283 backend: Backend::Mem { mmr },
284 hasher,
285 }
286 }
287}
288
289impl<X: Executor + 'static> CommitHistory<X> {
290 /// Open a persisted history under `<mkit_dir>/history/<branch>/`.
291 ///
292 /// The on-disk layout is commonware-storage's native journaled MMR
293 /// shape — see [`HISTORY_DIR`] and SPEC-HISTORY-PROOF §4. If the
294 /// underlying journal does not yet exist, it is created empty
295 /// (subsequent [`CommitHistory::append`] calls populate it).
296 ///
297 /// Crash recovery is delegated to commonware: its `Journaled::init`
298 /// detects a half-written trailing leaf, rewinds the journal to
299 /// the last valid size, and re-derives the in-memory state. See
300 /// SPEC-HISTORY-PROOF §4.4 for the contract surfaced to mkit
301 /// callers.
302 ///
303 /// # Errors
304 ///
305 /// - [`HistoryError::InvalidBranch`] — `branch` failed
306 /// [`validate_ref_name`].
307 /// - [`HistoryError::RuntimeBootstrap`] — could not start the
308 /// commonware tokio Runner used to construct the underlying
309 /// storage Context.
310 /// - [`HistoryError::Corrupted`] — commonware refused to recover
311 /// the journal (a deeper-than-trailing-leaf corruption).
312 /// - [`HistoryError::Io`] — failed to create the history dir.
313 pub fn open_at(executor: Arc<X>, mkit_dir: &Path, branch: &str) -> Result<Self, HistoryError> {
314 if !validate_ref_name(branch) {
315 return Err(HistoryError::InvalidBranch(branch.to_string()));
316 }
317
318 let history_dir = mkit_dir.join(HISTORY_DIR);
319 std::fs::create_dir_all(&history_dir)?;
320
321 // Bootstrap a commonware tokio Context rooted at
322 // `<mkit_dir>/history`. The Context survives the bootstrap
323 // runner's drop because its inner `Arc<Executor>` (the
324 // commonware Executor, not ours) holds the tokio runtime alive.
325 let ctx = bootstrap_commonware_context(&history_dir)?;
326
327 let sanitized = sanitize_branch(branch);
328 let journal_partition = format!("{sanitized}{JOURNAL_PARTITION_SUFFIX}");
329 let metadata_partition = format!("{sanitized}{METADATA_PARTITION_SUFFIX}");
330 let cfg = JConfig {
331 journal_partition,
332 metadata_partition,
333 // 4096 items/blob → 128 KiB blobs (32 B digest × 4096). Big
334 // enough that small branches stay in one blob; small enough
335 // that pruning later doesn't carry stale data around.
336 items_per_blob: NZU64!(4096),
337 write_buffer: NZUsize!(4096),
338 strategy: Sequential,
339 page_cache: CacheRef::from_pooler(&ctx, NZU16!(4096), NZUsize!(8)),
340 };
341
342 let hasher = history_hasher();
343 let mmr = {
344 let hasher_inner = history_hasher();
345 // `child` returns an owned child Context, leaving `ctx` usable for
346 // the surviving CommitHistory (which keeps the bootstrap runtime
347 // alive via its inner executor Arc).
348 let ctx_for_init = ctx.child("mmr_init");
349 executor
350 .block_on(async move {
351 JournaledMmr::<_, <Blake3 as CHasher>::Digest, Sequential>::init(
352 ctx_for_init,
353 &hasher_inner,
354 cfg,
355 )
356 .await
357 })
358 .map_err(|e| HistoryError::Corrupted(e.to_string()))?
359 };
360
361 Ok(Self {
362 backend: Backend::Journaled(Box::new(JournaledBackend {
363 mmr,
364 executor,
365 _ctx: ctx,
366 mkit_dir: mkit_dir.to_path_buf(),
367 branch: branch.to_string(),
368 })),
369 hasher,
370 })
371 }
372
373 /// Borrow the `mkit_dir` this history was opened against. `None`
374 /// for the mem-only flavour. Used by
375 /// [`crate::refs::update_ref_with_history`] to take a `RepoLock`
376 /// around the ref-write + MMR-append critical section.
377 #[must_use]
378 pub fn mkit_dir(&self) -> Option<&Path> {
379 match &self.backend {
380 Backend::Mem { .. } => None,
381 Backend::Journaled(b) => Some(b.mkit_dir.as_path()),
382 }
383 }
384
385 /// Borrow the branch name this history was opened against. `None`
386 /// for the mem-only flavour.
387 #[must_use]
388 pub fn branch(&self) -> Option<&str> {
389 match &self.backend {
390 Backend::Mem { .. } => None,
391 Backend::Journaled(b) => Some(b.branch.as_str()),
392 }
393 }
394
395 /// Append a commit hash. Returns its leaf [`Position`].
396 ///
397 /// Positions are dense: the *n*-th append returns `Position(n)`.
398 /// For the journaled flavour, the underlying MMR is `sync`'d to
399 /// disk before returning — survives a `SIGKILL` immediately after.
400 pub fn append(&mut self, commit_hash: &Hash) -> Result<Position, HistoryError> {
401 let leaf = digest_from_hash(commit_hash);
402 match &mut self.backend {
403 Backend::Mem { mmr } => {
404 let leaf_loc = mmr.leaves();
405 let batch = mmr
406 .new_batch()
407 .add(&self.hasher, &leaf)
408 .merkleize(mmr, &self.hasher);
409 mmr.apply_batch(&batch)
410 .map_err(|e| HistoryError::Mmr(e.to_string()))?;
411 Ok(Position(u64::from(leaf_loc)))
412 }
413 Backend::Journaled(b) => {
414 let leaf_loc = b.mmr.leaves();
415 let batch = b.mmr.new_batch().add(&self.hasher, &leaf);
416 let batch = b.mmr.with_mem(|mem| batch.merkleize(mem, &self.hasher));
417 b.mmr
418 .apply_batch(&batch)
419 .map_err(|e| HistoryError::Mmr(e.to_string()))?;
420 // Flush to disk synchronously so a SIGKILL between
421 // append and the caller's next ref-write does not lose
422 // the leaf we just promised to persist.
423 let sync_fut = b.mmr.sync();
424 b.executor
425 .block_on(sync_fut)
426 .map_err(|e| HistoryError::Mmr(e.to_string()))?;
427 Ok(Position(u64::from(leaf_loc)))
428 }
429 }
430 }
431
432 /// Current MMR root digest. 32-byte BLAKE3 (mkit `Hash` shape).
433 ///
434 /// Defined for an empty history — commonware returns a
435 /// deterministic empty-MMR root (see SPEC-HISTORY-PROOF §2.3).
436 ///
437 /// # Panics
438 ///
439 /// Never panics in practice. Internally calls commonware's
440 /// `root(&hasher, 0)`, which only returns an error for a non-zero
441 /// inactive-peak count; mkit always requests `0` inactive peaks
442 /// (its proofs are self-contained over the full leaf set), so the
443 /// `Result` is always `Ok`.
444 #[must_use]
445 pub fn root(&self) -> Hash {
446 // `root` takes the hasher + an `inactive_peaks` count (0 for a
447 // self-contained MMR over the full leaf set) and returns an owned
448 // `Digest`. Both backends are sync here (Journaled reads its
449 // in-memory cache); 0 inactive peaks is always a valid request,
450 // so the `Result` is always `Ok` — see the `# Panics` note above.
451 let digest = match &self.backend {
452 Backend::Mem { mmr } => mmr.root(&self.hasher, 0),
453 Backend::Journaled(b) => b.mmr.root(&self.hasher, 0),
454 }
455 .expect("0 inactive peaks is always a valid root request");
456 let mut out = [0u8; HASH_LEN];
457 out.copy_from_slice(digest.as_ref());
458 out
459 }
460
461 /// Number of leaves (commits) appended so far.
462 #[must_use]
463 pub fn len(&self) -> u64 {
464 match &self.backend {
465 Backend::Mem { mmr } => u64::from(mmr.leaves()),
466 Backend::Journaled(b) => u64::from(b.mmr.leaves()),
467 }
468 }
469
470 /// `true` if no commits have been appended.
471 #[must_use]
472 pub fn is_empty(&self) -> bool {
473 self.len() == 0
474 }
475
476 /// Build an inclusion proof for the commit at `position`.
477 pub fn prove(&self, position: Position) -> Result<InclusionProof, HistoryError> {
478 let loc = MmrLocation::new(position.0);
479 match &self.backend {
480 Backend::Mem { mmr } => mmr
481 .proof(&self.hasher, loc, 0)
482 .map_err(|e| HistoryError::Mmr(e.to_string())),
483 Backend::Journaled(b) => {
484 let hasher = self.hasher.clone();
485 let proof_fut = b.mmr.proof(&hasher, loc, 0);
486 // Drive the async proof builder via the executor. The
487 // future borrows `mmr` immutably for the duration of
488 // `block_on`; the borrow ends when `block_on` returns.
489 b.executor
490 .block_on(proof_fut)
491 .map_err(|e| HistoryError::Mmr(e.to_string()))
492 }
493 }
494 }
495}
496
497impl Default for CommitHistory<TokioExecutor> {
498 fn default() -> Self {
499 Self::open()
500 }
501}
502
503/// Verify that `commit_hash` was appended at `position` to a history
504/// whose current root is `root`. Returns `true` on a passing proof,
505/// `false` on any tamper / wrong-position / wrong-root case.
506///
507/// Pure function: no allocation beyond what commonware's verifier does
508/// internally; safe to call from a light-client without any of the
509/// preceding chain bytes.
510#[must_use]
511pub fn verify_inclusion(
512 commit_hash: &Hash,
513 position: Position,
514 proof: &InclusionProof,
515 root: &Hash,
516) -> bool {
517 let leaf = digest_from_hash(commit_hash);
518 let root_digest = digest_from_hash(root);
519 let loc = MmrLocation::new(position.0);
520
521 // Same bagging policy as the producer — see [`HISTORY_BAGGING`].
522 let hasher = history_hasher();
523 proof.verify_element_inclusion(&hasher, leaf.as_ref(), loc, &root_digest)
524}
525
526// ---------------------------------------------------------------------
527// v0.1.x → v0.2.x rebuild shim
528// ---------------------------------------------------------------------
529
530/// Rebuild a branch's history MMR from its on-disk parent chain.
531///
532/// For repos created against an older mkit (v0.1.x, before this
533/// module persisted anything to disk) the `<mkit_dir>/history/<branch>/`
534/// directory is empty on first open, but the branch's commit chain is
535/// already on disk in the object store. This helper walks the
536/// first-parent chain from `tip` to root, then re-appends each commit
537/// (in oldest-first order) into the supplied empty [`CommitHistory`].
538///
539/// The walker is supplied by the caller as `parent_of`: given a
540/// commit hash, it returns `Ok(Some(parent_hash))` if there is a
541/// parent, `Ok(None)` for the root commit, and `Err(_)` for any
542/// lookup failure. This keeps `mkit-core::history` free of an
543/// `ObjectStore` dependency — the rebuild shim is a tool the caller
544/// (typically `mkit-cli`) drives.
545///
546/// # Errors
547///
548/// - Propagates any error from `parent_of`.
549/// - Propagates any [`HistoryError`] from `history.append`.
550pub fn rebuild_from_chain<X, F, E>(
551 history: &mut CommitHistory<X>,
552 tip: Hash,
553 mut parent_of: F,
554) -> Result<u64, RebuildError<E>>
555where
556 X: Executor + 'static,
557 F: FnMut(&Hash) -> Result<Option<Hash>, E>,
558 E: core::fmt::Display,
559{
560 let mut chain = Vec::new();
561 let mut cursor = Some(tip);
562 while let Some(h) = cursor {
563 chain.push(h);
564 cursor = parent_of(&h).map_err(RebuildError::Walker)?;
565 }
566 // Walker yielded tip..root; we need root..tip for the append order.
567 chain.reverse();
568 let count = chain.len() as u64;
569 for h in &chain {
570 history.append(h).map_err(RebuildError::History)?;
571 }
572 Ok(count)
573}
574
575/// Errors returned by [`rebuild_from_chain`].
576#[derive(Debug, thiserror::Error)]
577pub enum RebuildError<E: core::fmt::Display> {
578 /// The caller-supplied parent-walker failed. The wrapped `E`
579 /// carries the walker's error verbatim, so callers that pass a
580 /// structured error type get it back unchanged.
581 #[error("parent-chain walker failed: {0}")]
582 Walker(E),
583 /// The underlying [`CommitHistory::append`] failed.
584 #[error(transparent)]
585 History(HistoryError),
586}
587
588// ---------------------------------------------------------------------
589// Internals
590// ---------------------------------------------------------------------
591
592/// Convert an mkit `Hash` (`[u8; 32]`) into commonware's `Blake3::Digest`.
593fn digest_from_hash(h: &Hash) -> <Blake3 as CHasher>::Digest {
594 <<Blake3 as CHasher>::Digest as From<[u8; HASH_LEN]>>::from(*h)
595}
596
597/// Hex-escape a mkit branch name into a commonware-partition-safe
598/// token.
599///
600/// commonware partition names are restricted to `[A-Za-z0-9_-]+`. mkit
601/// ref names can contain `.` and `/` (and `_`, which IS allowed
602/// upstream but which we must escape too to keep this encoding
603/// injective). Every non-`[A-Za-z0-9-]` byte is encoded as `_xx`
604/// where `xx` is the lowercase-hex byte value; `_` itself encodes as
605/// `_5f`. This makes the encoding self-delimiting — every `_` in the
606/// output is the lead-in of a two-hex-digit escape, never a literal.
607///
608/// Examples:
609/// `main` → `main`
610/// `feat/v1.0` → `feat_2fv1_2e0`
611/// `feat_v1_0` → `feat_5fv1_5f0`
612///
613/// The encoding is injective: different valid branch names always
614/// sanitize to different partition tokens, so the journaled MMRs of
615/// two branches in the same repo never share commonware partitions.
616fn sanitize_branch(name: &str) -> String {
617 let mut out = String::with_capacity(name.len());
618 for &b in name.as_bytes() {
619 let allowed = b.is_ascii_alphanumeric() || b == b'-';
620 if allowed {
621 out.push(b as char);
622 } else {
623 use core::fmt::Write as _;
624 let _ = write!(&mut out, "_{b:02x}");
625 }
626 }
627 out
628}
629
630/// Bootstrap a commonware tokio `Context` whose `storage_directory`
631/// is the supplied path.
632///
633/// The bootstrap is done on a fresh OS thread because tokio refuses to
634/// nest `runtime::Builder::build()` inside an already-active runtime
635/// (the `TokioExecutor`'s runtime is already alive in the caller's
636/// thread). The returned Context's inner `Arc<Executor>` keeps the
637/// bootstrap runtime alive after the bootstrap thread joins.
638///
639/// See `docs/SPEC-HISTORY-PROOF.md` §4.1 for the design rationale and
640/// the trade-off with the alternative "share the caller's tokio
641/// runtime" approach.
642fn bootstrap_commonware_context(
643 storage_directory: &Path,
644) -> Result<commonware_runtime::tokio::Context, HistoryError> {
645 let dir = storage_directory.to_path_buf();
646 std::thread::spawn(move || {
647 let cfg = commonware_runtime::tokio::Config::new().with_storage_directory(dir);
648 let runner = commonware_runtime::tokio::Runner::new(cfg);
649 // Return an owned, labelled child Context. `Context::clone` and
650 // `Metrics::with_label` were both removed in 2026.5.0;
651 // `Supervisor::child` returns an owned Context that clones the
652 // inner `executor: Arc<Executor>`, which keeps the tokio runtime
653 // alive after the bootstrap Runner is dropped at the end of
654 // `start`. The label is best-effort so any commonware metrics
655 // surfaced through this Context are easy to spot in a debugger.
656 runner
657 .start(|ctx| async move { commonware_runtime::Supervisor::child(&ctx, "mkit_history") })
658 })
659 .join()
660 .map_err(|_| HistoryError::RuntimeBootstrap("bootstrap thread panicked".to_string()))
661}
662
663// `BufferPooler`, `Clock`, `RStorage`, `Supervisor` are implicit bounds on
664// the `Context` we use — the imports above keep them in scope so trait
665// methods (`child`, `network_buffer_pool`, …) resolve. Re-export nothing.
666#[allow(dead_code)]
667fn _trait_imports_keep_alive() {
668 fn assert_send_sync<T: Send + Sync>() {}
669 assert_send_sync::<commonware_runtime::tokio::Context>();
670}
671
672// ---------------------------------------------------------------------
673// Tests
674// ---------------------------------------------------------------------
675
676#[cfg(test)]
677mod tests {
678 use super::*;
679 use tempfile::TempDir;
680
681 /// Deterministic distinct commit hash generator.
682 fn synth(i: u64) -> Hash {
683 crate::hash::hash(&i.to_be_bytes())
684 }
685
686 fn fresh_mkit_dir() -> (TempDir, PathBuf) {
687 let tmp = TempDir::new().unwrap();
688 let mkit_dir = tmp.path().join(".mkit");
689 std::fs::create_dir_all(&mkit_dir).unwrap();
690 (tmp, mkit_dir)
691 }
692
693 fn fresh_executor() -> Arc<TokioExecutor> {
694 Arc::new(TokioExecutor::new().expect("tokio runtime"))
695 }
696
697 // ---- Phase 1 mem-only API (unchanged from issue #157 Phase 1) --
698
699 #[test]
700 fn mem_empty_history_root_is_well_defined() {
701 let h1 = CommitHistory::open();
702 let h2 = CommitHistory::open();
703 assert_eq!(h1.root(), h2.root(), "empty root must be deterministic");
704 assert!(h1.is_empty());
705 assert_eq!(h1.len(), 0);
706 }
707
708 #[test]
709 fn mem_append_returns_dense_positions() {
710 let mut h = CommitHistory::open();
711 for i in 0..16u64 {
712 let pos = h.append(&synth(i)).unwrap();
713 assert_eq!(pos, Position(i), "positions must be dense and 0-based");
714 }
715 assert_eq!(h.len(), 16);
716 }
717
718 #[test]
719 fn mem_prove_and_verify_position_712_of_1000() {
720 let mut h = CommitHistory::open();
721 let commits: Vec<Hash> = (0..1000u64).map(synth).collect();
722 for c in &commits {
723 h.append(c).unwrap();
724 }
725 assert_eq!(h.len(), 1000);
726
727 let target = Position(712);
728 let proof = h.prove(target).unwrap();
729 let root = h.root();
730
731 assert!(
732 verify_inclusion(&commits[712], target, &proof, &root),
733 "honest proof must verify"
734 );
735 }
736
737 #[test]
738 fn mem_tampered_proof_fails_verification() {
739 let mut h = CommitHistory::open();
740 for i in 0..256u64 {
741 h.append(&synth(i)).unwrap();
742 }
743 let target = Position(42);
744 let mut proof = h.prove(target).unwrap();
745 let root = h.root();
746 let commit = synth(42);
747
748 assert!(verify_inclusion(&commit, target, &proof, &root));
749
750 assert!(
751 !proof.digests.is_empty(),
752 "non-trivial proof must carry at least one sibling"
753 );
754 let mut bytes: [u8; HASH_LEN] = [0u8; HASH_LEN];
755 bytes.copy_from_slice(proof.digests[0].as_ref());
756 bytes[0] ^= 0x01;
757 proof.digests[0] = <<Blake3 as CHasher>::Digest as From<[u8; HASH_LEN]>>::from(bytes);
758
759 assert!(
760 !verify_inclusion(&commit, target, &proof, &root),
761 "tampered proof must fail"
762 );
763 }
764
765 // ---- Phase 2 journaled API ------------------------------------
766
767 #[test]
768 fn open_at_rejects_invalid_branch() {
769 let (_tmp, mkit_dir) = fresh_mkit_dir();
770 let exec = fresh_executor();
771 let err = CommitHistory::open_at(exec, &mkit_dir, "../escape").expect_err("invalid branch");
772 assert!(matches!(err, HistoryError::InvalidBranch(_)));
773 }
774
775 #[test]
776 fn open_at_empty_root_matches_mem() {
777 let (_tmp, mkit_dir) = fresh_mkit_dir();
778 let exec = fresh_executor();
779 let h_disk = CommitHistory::open_at(exec, &mkit_dir, "main").unwrap();
780 let h_mem = CommitHistory::open();
781 assert_eq!(
782 h_disk.root(),
783 h_mem.root(),
784 "empty journaled root must match empty mem root"
785 );
786 assert!(h_disk.is_empty());
787 }
788
789 #[test]
790 fn open_at_round_trip_100_commits() {
791 let (_tmp, mkit_dir) = fresh_mkit_dir();
792 let exec = fresh_executor();
793
794 let commits: Vec<Hash> = (0..100u64).map(synth).collect();
795 let (root_before, len_before) = {
796 let mut h = CommitHistory::open_at(exec.clone(), &mkit_dir, "main").unwrap();
797 for c in &commits {
798 h.append(c).unwrap();
799 }
800 (h.root(), h.len())
801 };
802 // Re-open and verify the root survives.
803 let h = CommitHistory::open_at(exec.clone(), &mkit_dir, "main").unwrap();
804 assert_eq!(h.len(), len_before, "leaf count must survive reopen");
805 assert_eq!(h.root(), root_before, "root must survive reopen");
806 }
807
808 #[test]
809 fn open_at_prove_after_reopen() {
810 let (_tmp, mkit_dir) = fresh_mkit_dir();
811 let exec = fresh_executor();
812 let commits: Vec<Hash> = (0..64u64).map(synth).collect();
813 let root = {
814 let mut h = CommitHistory::open_at(exec.clone(), &mkit_dir, "main").unwrap();
815 for c in &commits {
816 h.append(c).unwrap();
817 }
818 h.root()
819 };
820 let h = CommitHistory::open_at(exec.clone(), &mkit_dir, "main").unwrap();
821 let target = Position(17);
822 let proof = h.prove(target).unwrap();
823 assert!(verify_inclusion(&commits[17], target, &proof, &root));
824 }
825
826 #[test]
827 fn open_at_distinct_branches_have_distinct_roots() {
828 let (_tmp, mkit_dir) = fresh_mkit_dir();
829 let exec = fresh_executor();
830 let mut main = CommitHistory::open_at(exec.clone(), &mkit_dir, "main").unwrap();
831 let mut dev = CommitHistory::open_at(exec.clone(), &mkit_dir, "dev").unwrap();
832 main.append(&synth(0)).unwrap();
833 dev.append(&synth(1)).unwrap();
834 assert_ne!(main.root(), dev.root());
835 }
836
837 #[test]
838 fn open_at_branch_with_slash_is_isolated() {
839 // Two branches that sanitize to different partition names
840 // must not share state.
841 let (_tmp, mkit_dir) = fresh_mkit_dir();
842 let exec = fresh_executor();
843 let mut a = CommitHistory::open_at(exec.clone(), &mkit_dir, "feat/v1").unwrap();
844 let mut b = CommitHistory::open_at(exec.clone(), &mkit_dir, "feat/v2").unwrap();
845 a.append(&synth(0)).unwrap();
846 assert_eq!(a.len(), 1);
847 assert_eq!(b.len(), 0, "sibling branch must not see appended leaf");
848 b.append(&synth(1)).unwrap();
849 assert_ne!(a.root(), b.root());
850 }
851
852 #[test]
853 fn open_at_root_matches_live_mem_root() {
854 // Executor swap test: a journaled MMR's root must equal the
855 // pure-mem MMR's root computed over the same leaf sequence.
856 // This proves the executor and the persistence layer are
857 // opaque to MMR semantics.
858 let (_tmp, mkit_dir) = fresh_mkit_dir();
859 let exec = fresh_executor();
860 let commits: Vec<Hash> = (0..50u64).map(synth).collect();
861
862 let mut journaled = CommitHistory::open_at(exec, &mkit_dir, "main").unwrap();
863 let mut mem = CommitHistory::open();
864 for c in &commits {
865 journaled.append(c).unwrap();
866 mem.append(c).unwrap();
867 }
868 assert_eq!(
869 journaled.root(),
870 mem.root(),
871 "journaled and mem MMRs must produce the same root for the same leaf sequence"
872 );
873 }
874
875 // ---- Negative-path verification (journaled flow) -------------
876
877 /// Phase-1-equivalent: appending must change the root. Same
878 /// property as the deleted `root_changes_on_append` test, lifted
879 /// onto the journaled flavour.
880 #[test]
881 fn journaled_root_changes_on_append() {
882 let (_tmp, mkit_dir) = fresh_mkit_dir();
883 let exec = fresh_executor();
884 let mut h = CommitHistory::open_at(exec, &mkit_dir, "main").unwrap();
885 let root_before = h.root();
886 h.append(&synth(0)).unwrap();
887 let root_after = h.root();
888 assert_ne!(
889 root_before, root_after,
890 "appending a leaf must change the journaled MMR root"
891 );
892 }
893
894 /// Phase-1-equivalent: an honest inclusion proof must not verify
895 /// against a different commit hash than the one that was appended.
896 #[test]
897 fn journaled_wrong_commit_fails_verification() {
898 let (_tmp, mkit_dir) = fresh_mkit_dir();
899 let exec = fresh_executor();
900 let mut h = CommitHistory::open_at(exec, &mkit_dir, "main").unwrap();
901 let h_a = synth(0);
902 let h_other = synth(99);
903 h.append(&h_a).unwrap();
904 let proof = h.prove(Position(0)).unwrap();
905 let root = h.root();
906
907 assert!(
908 verify_inclusion(&h_a, Position(0), &proof, &root),
909 "honest proof must verify with the appended commit"
910 );
911 assert!(
912 !verify_inclusion(&h_other, Position(0), &proof, &root),
913 "swapping in a different commit hash must fail verification"
914 );
915 }
916
917 /// Phase-1-equivalent: an inclusion proof from one branch's MMR
918 /// must not verify against another branch's root.
919 #[test]
920 fn journaled_wrong_root_fails_verification() {
921 let (_tmp, mkit_dir) = fresh_mkit_dir();
922 let exec = fresh_executor();
923
924 let h_a = synth(0);
925 let mut a = CommitHistory::open_at(exec.clone(), &mkit_dir, "main").unwrap();
926 a.append(&h_a).unwrap();
927 let proof = a.prove(Position(0)).unwrap();
928 let root_a = a.root();
929 assert!(
930 verify_inclusion(&h_a, Position(0), &proof, &root_a),
931 "sanity: honest proof verifies against its own root"
932 );
933
934 // Independent branch with different leaves — distinct root.
935 let mut b = CommitHistory::open_at(exec, &mkit_dir, "dev").unwrap();
936 b.append(&synth(1)).unwrap();
937 b.append(&synth(2)).unwrap();
938 let root_b = b.root();
939 assert_ne!(root_a, root_b, "distinct branches must have distinct roots");
940
941 assert!(
942 !verify_inclusion(&h_a, Position(0), &proof, &root_b),
943 "proof from one branch must not verify against another branch's root"
944 );
945 }
946
947 // ---- Crash recovery (commonware-native semantics) ----------
948
949 /// Simulate a torn write: truncate one journal blob mid-frame and
950 /// verify that commonware's `Journaled::init` either rolls forward
951 /// to the last valid size OR surfaces a recoverable error.
952 ///
953 /// SPEC-HISTORY-PROOF §4.4: mkit relies on commonware's native
954 /// recovery; mkit's own contract is only that reopening a
955 /// half-written journal does NOT panic and does NOT silently
956 /// expose stale data.
957 #[test]
958 fn truncated_journal_rolls_forward_or_surfaces_corruption() {
959 let (_tmp, mkit_dir) = fresh_mkit_dir();
960 let exec = fresh_executor();
961 let commits: Vec<Hash> = (0..32u64).map(synth).collect();
962
963 let len_before = {
964 let mut h = CommitHistory::open_at(exec.clone(), &mkit_dir, "main").unwrap();
965 for c in &commits {
966 h.append(c).unwrap();
967 }
968 h.len()
969 };
970 assert_eq!(len_before, 32);
971
972 // Find the journal blob directory and chop a few bytes off
973 // the tail of the largest file. The sanitized partition name
974 // for branch "main" is "main" → "main__journal".
975 // commonware lays out the journal partition's blobs in a
976 // subdirectory named `<partition>-blobs`; the journal metadata
977 // sidecar sits at `<partition>-metadata`. The truncation
978 // target is whichever blob holds actual digest data.
979 let journal_root = mkit_dir.join(HISTORY_DIR).join("main__journal-blobs");
980 assert!(
981 journal_root.exists(),
982 "journal blob dir must exist after appends"
983 );
984 let mut largest: Option<(std::path::PathBuf, u64)> = None;
985 for entry in std::fs::read_dir(&journal_root).unwrap() {
986 let entry = entry.unwrap();
987 let meta = entry.metadata().unwrap();
988 if meta.is_file() {
989 let path = entry.path();
990 let len = meta.len();
991 if largest.as_ref().is_none_or(|(_, l)| len > *l) {
992 largest = Some((path, len));
993 }
994 }
995 }
996 let (blob_path, blob_len) = largest.expect("at least one blob present after 32 appends");
997 assert!(
998 blob_len > 5,
999 "blob must be large enough to truncate (got {blob_len} bytes)"
1000 );
1001
1002 // Truncate 5 bytes off the end — fewer than a digest (32 B), so
1003 // commonware sees a torn frame at the tail.
1004 let truncated_len = blob_len - 5;
1005 let f = std::fs::OpenOptions::new()
1006 .write(true)
1007 .open(&blob_path)
1008 .unwrap();
1009 f.set_len(truncated_len).unwrap();
1010 drop(f);
1011
1012 // Reopen. mkit's contract: either succeeds (rolled forward to
1013 // last valid size, possibly fewer leaves than before) OR
1014 // surfaces a `Corrupted` error. Crucially: no panic, no
1015 // silent-stale-root.
1016 let reopened = CommitHistory::open_at(exec, &mkit_dir, "main");
1017 match reopened {
1018 Ok(h) => {
1019 assert!(
1020 h.len() <= len_before,
1021 "rolled-forward leaf count must not exceed the pre-truncation count"
1022 );
1023 // The root of the rolled-forward state, if non-empty,
1024 // must equal the root of a freshly-built mem MMR over
1025 // the surviving leaves — proving roll-forward is
1026 // semantically consistent.
1027 if !h.is_empty() {
1028 let n = usize::try_from(h.len()).expect("leaf count fits in usize");
1029 let mut reference = CommitHistory::open();
1030 for c in &commits[..n] {
1031 reference.append(c).unwrap();
1032 }
1033 assert_eq!(
1034 h.root(),
1035 reference.root(),
1036 "rolled-forward root must equal a clean replay of the surviving leaves"
1037 );
1038 }
1039 }
1040 Err(HistoryError::Corrupted(_)) => {
1041 // Acceptable: commonware refused to recover. Surfaced
1042 // to the caller via the documented error variant.
1043 }
1044 Err(other) => panic!("unexpected error on truncated journal: {other:?}"),
1045 }
1046 }
1047
1048 // ---- Rebuild shim --------------------------------------------
1049
1050 #[test]
1051 fn rebuild_from_chain_matches_live_appends() {
1052 let (_tmp, mkit_dir_a) = fresh_mkit_dir();
1053 let (_tmp_b, mkit_dir_b) = fresh_mkit_dir();
1054 let exec = fresh_executor();
1055
1056 // Build the "reference" history by live appends.
1057 let commits: Vec<Hash> = (0..10u64).map(synth).collect();
1058 let reference_root = {
1059 let mut h = CommitHistory::open_at(exec.clone(), &mkit_dir_a, "main").unwrap();
1060 for c in &commits {
1061 h.append(c).unwrap();
1062 }
1063 h.root()
1064 };
1065
1066 // Simulate the v0.1.x situation: refs/heads/main has a tip
1067 // commit, but `<mkit_dir>/history/` is empty. The walker
1068 // returns each commit's parent until root.
1069 let mut h = CommitHistory::open_at(exec.clone(), &mkit_dir_b, "main").unwrap();
1070 let tip = commits[commits.len() - 1];
1071 let count = rebuild_from_chain::<TokioExecutor, _, std::io::Error>(&mut h, tip, |hash| {
1072 // Find this hash in `commits`, return the prior one or
1073 // None if at index 0.
1074 let idx = commits
1075 .iter()
1076 .position(|c| c == hash)
1077 .expect("walker called with unknown hash");
1078 if idx == 0 {
1079 Ok(None)
1080 } else {
1081 Ok(Some(commits[idx - 1]))
1082 }
1083 })
1084 .unwrap();
1085 assert_eq!(count, commits.len() as u64);
1086 assert_eq!(
1087 h.root(),
1088 reference_root,
1089 "rebuilt root must match live-append root"
1090 );
1091 }
1092
1093 // ---- Sanitization --------------------------------------------
1094
1095 #[test]
1096 fn sanitize_branch_round_trip_invariants() {
1097 // Different ref-name byte sequences must encode to different
1098 // partition tokens.
1099 assert_ne!(sanitize_branch("feat/v1.0"), sanitize_branch("feat_v1_0"));
1100 // Plain alpha-numeric / dash names pass through unchanged.
1101 assert_eq!(sanitize_branch("main"), "main");
1102 assert_eq!(sanitize_branch("release-2026"), "release-2026");
1103 // `/` escapes to `_2f`.
1104 assert_eq!(sanitize_branch("a/b"), "a_2fb");
1105 // `.` escapes to `_2e`.
1106 assert_eq!(sanitize_branch("v1.0"), "v1_2e0");
1107 // `_` itself escapes to `_5f` to keep the encoding injective.
1108 assert_eq!(sanitize_branch("a_b"), "a_5fb");
1109 }
1110}