ant_node/replication/subtree.rs
1//! Gossip-triggered contiguous-subtree storage proof (ADR-0002).
2//!
3//! Pure, network-free core of the audit redesign. Given a peer's signed
4//! [`StorageCommitment`] and an auditor-chosen random nonce, both sides
5//! deterministically select **one contiguous subtree** of the committed
6//! Merkle tree; the responder expands that subtree to its leaves plus the
7//! sibling cut-hashes on the path to the root; the auditor rebuilds the root
8//! and spot-checks a few leaves against real chunk bytes.
9//!
10//! Three independent checks (ADR-0002 "Verification, three independent
11//! checks"); this module owns the first two — the third (response deadline)
12//! is enforced by the caller:
13//!
14//! 1. **Structure** — [`verify_subtree_proof`] re-derives the selected branch
15//! from `(nonce, key_count)`, rebuilds the root from the returned leaves and
16//! cut-hashes, and requires it to equal the pinned root.
17//! 2. **Real bytes** — [`select_spotcheck_indices`] picks a few leaves within
18//! the subtree; the caller fetches their bytes and checks both the plain
19//! content hash and the nonce freshness hash. Faking a fraction `x` of
20//! leaves survives only `(1 - x)^k`.
21//!
22//! ## Tree geometry (must match [`super::commitment::MerkleTree`])
23//!
24//! Leaves are sorted by key and fill positions `0..N`. The tree is
25//! left-packed: when a level has an odd number of nodes the last node is
26//! paired with itself (`node_hash(x, x)`). There are no explicit padding
27//! leaves; "padding" is the empty right side of a subtree slot that extends
28//! past `N`. Depth `D = ceil(log2(N))`. A node identified by `(depth, slot)`
29//! (depth measured from the root, slot in `0..2^depth`) covers the contiguous
30//! leaf range `[slot * span, (slot + 1) * span)` where `span = 2^(D - depth)`,
31//! intersected with `0..N`.
32
33use super::commitment::{leaf_hash, node_hash, StorageCommitment, MAX_COMMITMENT_KEY_COUNT};
34use super::protocol::compute_audit_digest;
35use crate::ant_protocol::XorName;
36use serde::{Deserialize, Serialize};
37
38/// Below this key count the whole tree is challenged; `sqrt` rounding is
39/// meaningless for tiny trees and a full proof is cheap.
40pub const SMALL_TREE_FULL_AUDIT_FLOOR: u32 = 4;
41
42/// One leaf of the selected subtree, as returned by the responder.
43#[derive(Debug, Clone, Serialize, Deserialize, PartialEq, Eq)]
44pub struct SubtreeLeaf {
45 /// The committed key (chunk address) at this leaf position.
46 pub key: XorName,
47 /// `BLAKE3(record_bytes)` — the plain content hash. This is also the
48 /// chunk's network address, so it is public; possessing it does NOT prove
49 /// possession of the bytes (that is what `nonced_hash` is for).
50 pub bytes_hash: [u8; 32],
51 /// `compute_audit_digest(nonce, peer_id, key, record_bytes)` — the
52 /// freshness hash. Only a holder of the actual bytes can produce it for a
53 /// fresh nonce, so a spot-check on it proves real possession.
54 pub nonced_hash: [u8; 32],
55}
56
57/// A responder's single-contiguous-subtree proof (ADR-0002 "The proof").
58#[derive(Debug, Clone, Serialize, Deserialize, PartialEq, Eq)]
59pub struct SubtreeProof {
60 /// Every leaf of the selected subtree, in ascending leaf-index order.
61 pub leaves: Vec<SubtreeLeaf>,
62 /// One sibling cut-hash per level on the path from the root down to the
63 /// selected subtree root, ordered root-first. Each is the plain hash of
64 /// the unselected sibling node at that level.
65 pub sibling_cut_hashes: Vec<[u8; 32]>,
66}
67
68/// The deterministically-selected contiguous subtree, derived from
69/// `(nonce, key_count)` and agreed by both sides.
70#[derive(Debug, Clone, Copy, PartialEq, Eq)]
71pub struct SubtreePath {
72 /// Depth of the subtree root below the tree root (0 = whole tree).
73 pub depth: u32,
74 /// Slot index of the subtree root within its level, in `0..2^depth`.
75 pub slot: u32,
76 /// First real leaf index covered (inclusive).
77 pub leaf_start: u32,
78 /// One past the last real leaf index covered (exclusive). Always
79 /// `leaf_end > leaf_start`, so the selection never covers zero real
80 /// leaves — this is the ADR's dead-block fix.
81 pub leaf_end: u32,
82}
83
84impl SubtreePath {
85 /// Number of real (non-padding) leaves in the selected subtree.
86 #[must_use]
87 pub fn real_leaf_count(&self) -> u32 {
88 self.leaf_end - self.leaf_start
89 }
90}
91
92/// Tree depth `D = ceil(log2(key_count))`, matching `MerkleTree` / `verify_path`.
93///
94/// `key_count == 1` → depth 0 (the single leaf is the root). Returns `None`
95/// for an out-of-protocol `key_count` so callers reject it before any work.
96#[must_use]
97fn tree_depth(key_count: u32) -> Option<u32> {
98 if key_count == 0 || key_count > MAX_COMMITMENT_KEY_COUNT {
99 return None;
100 }
101 // checked_next_power_of_two cannot fail under the cap above, but the
102 // explicit check keeps behaviour identical across debug/release.
103 let rounded = key_count.checked_next_power_of_two()?;
104 Some(rounded.trailing_zeros())
105}
106
107/// Count real leaves under the node at `(depth, slot)` for a tree of `key_count`
108/// leaves. Pure function of geometry — identical on auditor and responder.
109///
110/// `span = 2^(total_depth - depth)`; the node covers `[slot*span, (slot+1)*span)`
111/// clamped to `0..key_count`.
112#[must_use]
113fn real_leaves_under(depth: u32, slot: u64, key_count: u32, total_depth: u32) -> u32 {
114 let levels_below = total_depth - depth;
115 // span fits in u64: total_depth <= 20 for key_count <= 1e6.
116 let span = 1u64 << levels_below;
117 let start = slot.saturating_mul(span).min(u64::from(key_count));
118 let end = slot
119 .saturating_add(1)
120 .saturating_mul(span)
121 .min(u64::from(key_count));
122 // end >= start always; difference fits in u32 (<= key_count).
123 u32::try_from(end - start).unwrap_or(0)
124}
125
126/// `ceil(sqrt(key_count))` — the real-leaf floor a selected subtree must meet.
127#[must_use]
128fn sqrt_floor(key_count: u32) -> u32 {
129 // Exact integer ceil(sqrt(n)), float-free and MSRV-safe (no u64::isqrt).
130 // Newton's method converges to floor(sqrt(n)); then round up unless n is a
131 // perfect square. Always at least 1.
132 let n = u64::from(key_count);
133 if n <= 1 {
134 return 1;
135 }
136 let mut x = n;
137 let mut y = x.div_ceil(2);
138 while y < x {
139 x = y;
140 y = (x + n / x) / 2;
141 }
142 // x == floor(sqrt(n)) here.
143 let ceil = if x.saturating_mul(x) == n { x } else { x + 1 };
144 u32::try_from(ceil.max(1)).unwrap_or(u32::MAX)
145}
146
147/// Read bit `index` of the nonce (bit 0 = MSB of byte 0), `index` 0-based.
148///
149/// `1 → left child, 0 → right child` (ADR). With a 256-bit nonce and a tree
150/// depth ≤ 20 we never run out of bits.
151#[must_use]
152fn nonce_bit(nonce: &[u8; 32], index: u32) -> bool {
153 let byte = (index / 8) as usize;
154 let bit = 7 - (index % 8);
155 // byte < 32 because index < 256 for any reachable depth; guard anyway.
156 nonce.get(byte).is_some_and(|b| (b >> bit) & 1 == 1)
157}
158
159/// Deterministically select one contiguous subtree from `(nonce, key_count)`.
160///
161/// Walks the nonce bits from the root, descending into the child the bit picks,
162/// and **stops at the smallest branch whose real-leaf count is still ≥
163/// `ceil(sqrt(key_count))`**. Because an all-padding child has zero real leaves
164/// (< the floor), the walk never descends into one — so the selection always
165/// covers ≥ `sqrt` real leaves and is never empty (ADR dead-block fix).
166///
167/// For `key_count <= SMALL_TREE_FULL_AUDIT_FLOOR` the whole tree is selected.
168///
169/// Returns `None` only for an out-of-protocol `key_count` (caller rejects).
170#[must_use]
171pub fn select_subtree_path(nonce: &[u8; 32], key_count: u32) -> Option<SubtreePath> {
172 let total_depth = tree_depth(key_count)?;
173
174 // Tiny trees: challenge everything.
175 if key_count <= SMALL_TREE_FULL_AUDIT_FLOOR {
176 return Some(SubtreePath {
177 depth: 0,
178 slot: 0,
179 leaf_start: 0,
180 leaf_end: key_count,
181 });
182 }
183
184 let floor = sqrt_floor(key_count);
185 let mut depth = 0u32;
186 let mut slot = 0u64; // slot within the current level
187
188 // Descend while the chosen child still meets the floor.
189 while depth < total_depth {
190 let go_left = nonce_bit(nonce, depth);
191 // 1 = left child (bit set), 0 = right child. Right child is the odd slot.
192 let child_slot = slot * 2 + u64::from(!go_left);
193 let child_real = real_leaves_under(depth + 1, child_slot, key_count, total_depth);
194 if child_real < floor {
195 break; // descending would drop below the floor → stay here
196 }
197 depth += 1;
198 slot = child_slot;
199 }
200
201 let span = 1u64 << (total_depth - depth);
202 let leaf_start =
203 u32::try_from(slot.saturating_mul(span).min(u64::from(key_count))).unwrap_or(key_count);
204 let leaf_end = u32::try_from(
205 slot.saturating_add(1)
206 .saturating_mul(span)
207 .min(u64::from(key_count)),
208 )
209 .unwrap_or(key_count);
210
211 Some(SubtreePath {
212 depth,
213 slot: u32::try_from(slot).unwrap_or(u32::MAX),
214 leaf_start,
215 leaf_end,
216 })
217}
218
219/// Pick `k` distinct nonce-derived leaf positions within the selected subtree.
220///
221/// Returned as indices into `path.real_leaf_count()` (0-based within the
222/// subtree). DETERMINISTIC from the nonce.
223///
224/// **NOT used for the live round-2 sample.** Deriving the byte-challenge sample
225/// from the round-1 nonce is unsound: the structural root check binds only
226/// `(key, bytes_hash)` (both public), not `nonced_hash`, so a responder that
227/// knows the nonce at proof-build time could fabricate `nonced_hash` on every
228/// un-sampled leaf and fetch only the predictable sample. The auditor therefore
229/// chooses the round-2 sample with fresh CSPRNG randomness *after* receiving the
230/// proof (`storage_commitment_audit::random_spotcheck_leaves`). This
231/// deterministic helper is retained only for tests/observers that need a
232/// reproducible selection.
233#[must_use]
234pub fn select_spotcheck_indices(nonce: &[u8; 32], path: &SubtreePath, k: u32) -> Vec<u32> {
235 let n = path.real_leaf_count();
236 if n == 0 {
237 return Vec::new();
238 }
239 if n <= k {
240 return (0..n).collect();
241 }
242 // Derive a stream of indices by hashing (nonce || counter) and reducing
243 // mod n; skip collisions. Bounded: k is small (clamped to the 3..=5 band)
244 // and n > k.
245 let mut out: Vec<u32> = Vec::with_capacity(k as usize);
246 let mut counter: u32 = 0;
247 while u32::try_from(out.len()).unwrap_or(u32::MAX) < k {
248 let mut h = blake3::Hasher::new();
249 h.update(b"autonomi.ant.replication.audit_spotcheck.v1");
250 h.update(nonce);
251 h.update(&counter.to_le_bytes());
252 let digest = *h.finalize().as_bytes();
253 let mut word = [0u8; 4];
254 word.copy_from_slice(&digest[..4]);
255 let idx = u32::from_le_bytes(word) % n;
256 if !out.contains(&idx) {
257 out.push(idx);
258 }
259 counter = counter.wrapping_add(1);
260 // Bound the hash stream (vanishingly unlikely to bite with n > k).
261 if counter > k.saturating_mul(64) {
262 break;
263 }
264 }
265 // Top up deterministically if the bounded hash stream collided too often:
266 // take the lowest indices not yet selected. Still nonce-independent only in
267 // this (astronomically rare) tail, and identical on every observer — the
268 // caller is guaranteed exactly min(k, n) distinct indices, so the byte
269 // sample is never silently smaller than requested.
270 let mut candidate: u32 = 0;
271 while u32::try_from(out.len()).unwrap_or(u32::MAX) < k && candidate < n {
272 if !out.contains(&candidate) {
273 out.push(candidate);
274 }
275 candidate += 1;
276 }
277 out
278}
279
280/// Verdict from [`verify_subtree_proof`]'s structural check.
281#[derive(Debug, Clone, PartialEq, Eq)]
282pub enum StructureVerdict {
283 /// Proof is well-formed and its root matches the pinned commitment.
284 Valid,
285 /// Proof is malformed or its root does not match. Carries a static reason
286 /// for logging; all variants are confirmed failures, not benign.
287 Invalid(&'static str),
288}
289
290/// Structural verification (ADR-0002 check 1): the returned subtree genuinely
291/// belongs to the committed tree.
292///
293/// Re-derives the selected branch from `(nonce, commitment.key_count)`,
294/// rebuilds the root from `proof.leaves` and `proof.sibling_cut_hashes`, and
295/// requires it to equal `commitment.root`. Also checks leaf count and
296/// ascending-key order (the committed tree sorts leaves by key).
297///
298/// This does NOT verify possession of bytes — that is the caller's spot-check
299/// using [`select_spotcheck_indices`]. It only proves the structure.
300#[must_use]
301pub fn verify_subtree_proof(
302 proof: &SubtreeProof,
303 nonce: &[u8; 32],
304 commitment: &StorageCommitment,
305) -> StructureVerdict {
306 let Some(path) = select_subtree_path(nonce, commitment.key_count) else {
307 return StructureVerdict::Invalid("out-of-protocol key_count");
308 };
309
310 // Leaf count must equal the agreed subtree's real-leaf count exactly.
311 let expected_leaves = path.real_leaf_count() as usize;
312 if proof.leaves.len() != expected_leaves {
313 return StructureVerdict::Invalid("wrong leaf count");
314 }
315 // Sibling cut-hashes: one per level on the path to the subtree root.
316 if proof.sibling_cut_hashes.len() != path.depth as usize {
317 return StructureVerdict::Invalid("wrong cut-hash count");
318 }
319
320 // Leaves must be strictly ascending by key (matches MerkleTree sort), which
321 // also rejects duplicates.
322 for w in proof.leaves.windows(2) {
323 if let [a, b] = w {
324 if a.key >= b.key {
325 return StructureVerdict::Invalid("leaves not strictly ascending");
326 }
327 }
328 }
329
330 // Out-of-protocol key_count cannot happen here (select_subtree_path already
331 // returned Some), but recompute total_depth defensively for the climb maths.
332 let Some(total_depth) = tree_depth(commitment.key_count) else {
333 return StructureVerdict::Invalid("out-of-protocol key_count");
334 };
335
336 // Phase A — reconstruct the selected subtree's root NODE exactly as the
337 // committed tree's level-by-level build produces it. The subtree root sits
338 // at `(level_from_leaves, slot)`, covering a left-packed block of leaves;
339 // folding that block up `level_from_leaves` levels with the same
340 // self-pair-the-last-node rule as `MerkleTree::build_next_level` yields the
341 // identical node (including the `node_hash(x, x)` self-pair when the block
342 // is the tree's odd tail at some level). `fold_to_root` stopped at a single
343 // hash and so skipped the self-pair when a truncated block reached length 1
344 // before climbing all the way to the subtree-root level — the geometry bug.
345 let leaf_hashes: Vec<[u8; 32]> = proof
346 .leaves
347 .iter()
348 .map(|l| leaf_hash(&l.key, &l.bytes_hash))
349 .collect();
350 let levels_to_subtree_root = total_depth - path.depth;
351 let mut cur = fold_levels(leaf_hashes, levels_to_subtree_root);
352
353 // Phase B — climb from the subtree root to the tree root using one sibling
354 // cut-hash per level, exactly like `verify_path`: the climb's left/right
355 // choice is the real node-index parity, NOT a nonce bit, and the self-pair
356 // of an odd level's last node falls out naturally when the builder supplied
357 // the chosen node itself as its own sibling. The cut-hashes are root-first,
358 // so we consume them in reverse (lowest climb step uses the last cut-hash).
359 //
360 // We recompute the node index of the subtree root the same way the builder
361 // walked the nonce bits, then halve it as we climb — mirroring `verify_path`.
362 let mut node_index = u64::from(path.slot);
363 for level_above in (0..path.depth).rev() {
364 let Some(sibling) = proof.sibling_cut_hashes.get(level_above as usize) else {
365 return StructureVerdict::Invalid("missing cut-hash");
366 };
367 cur = if node_index % 2 == 0 {
368 node_hash(&cur, sibling)
369 } else {
370 node_hash(sibling, &cur)
371 };
372 node_index /= 2;
373 }
374
375 if cur == commitment.root {
376 StructureVerdict::Valid
377 } else {
378 StructureVerdict::Invalid("root mismatch")
379 }
380}
381
382/// Fold a contiguous, left-aligned block of node hashes up exactly `levels`
383/// levels, applying the same left-packed self-pair rule as
384/// `MerkleTree::build_next_level` (`node_hash(x, x)` for an unpaired last node).
385///
386/// This is the generalisation of a single-leaf inclusion fold to a *range* of
387/// leaves: a subtree root at `(levels, slot)` covers a block whose left edge is
388/// pair-aligned at every sub-level, so the only odd run that can occur is the
389/// tree's genuine odd tail — exactly when `build_next_level` self-pairs. Folding
390/// the block `levels` times therefore reproduces the committed node bit-for-bit,
391/// including the self-pair that `fold_to_root` used to skip by stopping at a
392/// single hash too early.
393///
394/// `levels == 0` returns the block's single element unchanged (the subtree IS
395/// the tree, e.g. the small-tree full-audit case after its own folds, or a
396/// single-leaf tree). An empty input is impossible here (callers guarantee ≥ 1
397/// leaf via the dead-block fix); returns a zero hash defensively.
398#[must_use]
399fn fold_levels(mut level: Vec<[u8; 32]>, levels: u32) -> [u8; 32] {
400 if level.is_empty() {
401 return [0u8; 32];
402 }
403 // Fold up `levels` times using the SAME builder as `MerkleTree::build`
404 // (§10 — `build_next_level`), so the self-pair of an odd tail matches the
405 // committed tree exactly. Within a selected, left-aligned block the only
406 // odd run that can occur is the tree's genuine odd tail.
407 for _ in 0..levels {
408 level = crate::replication::commitment::build_next_level(&level);
409 }
410 // After `levels` folds of a `2^levels`-span left-aligned block, exactly one
411 // node remains; defensively fall back if the block was shorter.
412 level.first().copied().unwrap_or([0u8; 32])
413}
414
415/// Build the per-leaf nonced freshness hash for a subtree leaf (responder
416/// side), reusing the existing audit digest.
417#[must_use]
418pub fn nonced_leaf_hash(
419 nonce: &[u8; 32],
420 challenged_peer_id: &[u8; 32],
421 key: &XorName,
422 record_bytes: &[u8],
423) -> [u8; 32] {
424 compute_audit_digest(nonce, challenged_peer_id, key, record_bytes)
425}
426
427/// Why a responder could not build a subtree proof for a challenge.
428#[derive(Debug, Clone, PartialEq, Eq)]
429pub enum BuildProofError {
430 /// The challenge's `key_count` (from the pinned commitment) is out of
431 /// protocol range. Should never happen for a commitment we built.
432 BadKeyCount,
433 /// A selected leaf's key could not be resolved from the tree (internal
434 /// inconsistency; should never happen).
435 MissingKey {
436 /// The leaf index that could not be resolved.
437 leaf_index: u32,
438 },
439 /// The responder no longer holds the bytes for a selected, committed key.
440 /// This is real storage loss / deliberate non-response — the caller turns
441 /// it into a confirmed audit failure, NOT a benign rejection.
442 MissingBytes {
443 /// The committed key whose bytes are gone.
444 key: XorName,
445 },
446}
447
448/// Build the single-contiguous-subtree proof for `(nonce, tree)` (responder).
449///
450/// `bytes_for(&key)` returns the chunk bytes the responder holds for a key, or
451/// `None` if it cannot read them. Walks the same nonce-selected path the
452/// auditor will re-derive, reads the unselected sibling cut-hashes directly
453/// from the committed tree (so they are provably consistent with the gossiped
454/// root), and builds each selected leaf's plain and nonced hashes from the real
455/// bytes.
456///
457/// # Errors
458///
459/// See [`BuildProofError`]. `MissingBytes` is the one the caller penalises;
460/// the others indicate an internal inconsistency.
461pub fn build_subtree_proof(
462 tree: &super::commitment::MerkleTree,
463 nonce: &[u8; 32],
464 challenged_peer_id: &[u8; 32],
465 bytes_for: impl Fn(&XorName) -> Option<Vec<u8>>,
466) -> Result<SubtreeProof, BuildProofError> {
467 let plan = subtree_plan(tree, nonce)?;
468 let mut leaves = Vec::with_capacity(plan.leaf_keys.len());
469 for key in &plan.leaf_keys {
470 let bytes = bytes_for(key).ok_or(BuildProofError::MissingBytes { key: *key })?;
471 leaves.push(subtree_leaf(nonce, challenged_peer_id, key, &bytes));
472 }
473 Ok(SubtreeProof {
474 leaves,
475 sibling_cut_hashes: plan.sibling_cut_hashes,
476 })
477}
478
479/// The pure (no-bytes) geometry of a subtree proof.
480///
481/// Holds the ordered keys whose bytes the responder must hash and the sibling
482/// cut-hashes read from the tree. Splitting this out lets an async responder
483/// read chunk bytes per leaf without forcing the tree-walking maths to be async.
484#[derive(Debug, Clone)]
485pub struct SubtreePlan {
486 /// The selected leaves' keys, in ascending leaf-index order.
487 pub leaf_keys: Vec<XorName>,
488 /// One sibling cut-hash per level on the path to the subtree root,
489 /// root-first.
490 pub sibling_cut_hashes: Vec<[u8; 32]>,
491}
492
493/// Compute the [`SubtreePlan`] for `(nonce, tree)` — selection geometry only,
494/// no chunk bytes touched.
495///
496/// # Errors
497///
498/// [`BuildProofError::BadKeyCount`] for an out-of-protocol tree;
499/// [`BuildProofError::MissingKey`] if a selected leaf index is not in the tree
500/// (internal inconsistency).
501pub fn subtree_plan(
502 tree: &super::commitment::MerkleTree,
503 nonce: &[u8; 32],
504) -> Result<SubtreePlan, BuildProofError> {
505 let key_count = tree.key_count();
506 let path = select_subtree_path(nonce, key_count).ok_or(BuildProofError::BadKeyCount)?;
507
508 let mut leaf_keys = Vec::with_capacity(path.real_leaf_count() as usize);
509 for idx in path.leaf_start..path.leaf_end {
510 let key = tree
511 .key_at(idx as usize)
512 .ok_or(BuildProofError::MissingKey { leaf_index: idx })?;
513 leaf_keys.push(key);
514 }
515
516 // Sibling cut-hashes, root-first. At descent step `d` (0-based from the
517 // root), the chosen child is on the side the nonce bit picks; the sibling
518 // is the other child at level `total_depth - (d + 1)` (counting up from
519 // leaves). On an odd-length level the missing sibling self-pairs, i.e. the
520 // sibling hash is the chosen node itself.
521 let total_depth = u32::try_from(tree.levels_count().saturating_sub(1)).unwrap_or(0);
522 let mut sibling_cut_hashes = Vec::with_capacity(path.depth as usize);
523 let mut slot = 0u64;
524 for d in 0..path.depth {
525 let go_left = nonce_bit(nonce, d);
526 let child = slot * 2 + u64::from(!go_left);
527 let sibling = child ^ 1;
528 let level_from_leaves = (total_depth - (d + 1)) as usize;
529 let chosen_hash = tree.node_at(level_from_leaves, child);
530 let sib_hash = tree
531 .node_at(level_from_leaves, sibling)
532 .or(chosen_hash)
533 .ok_or(BuildProofError::BadKeyCount)?;
534 sibling_cut_hashes.push(sib_hash);
535 slot = child;
536 }
537
538 Ok(SubtreePlan {
539 leaf_keys,
540 sibling_cut_hashes,
541 })
542}
543
544/// Build one subtree leaf from its key and the chunk bytes the responder holds.
545#[must_use]
546pub fn subtree_leaf(
547 nonce: &[u8; 32],
548 challenged_peer_id: &[u8; 32],
549 key: &XorName,
550 bytes: &[u8],
551) -> SubtreeLeaf {
552 SubtreeLeaf {
553 key: *key,
554 bytes_hash: *blake3::hash(bytes).as_bytes(),
555 nonced_hash: nonced_leaf_hash(nonce, challenged_peer_id, key, bytes),
556 }
557}
558
559#[cfg(test)]
560#[allow(clippy::unwrap_used, clippy::expect_used, clippy::panic)]
561mod tests {
562 use super::*;
563 use crate::replication::commitment::MerkleTree;
564
565 fn xn_u32(i: u32) -> XorName {
566 let mut k = [0u8; 32];
567 k[..4].copy_from_slice(&i.to_be_bytes()); // big-endian so numeric order == sort order
568 k
569 }
570 fn nonce_of(seed: u8) -> [u8; 32] {
571 [seed; 32]
572 }
573
574 // ---- sqrt_floor -------------------------------------------------------
575
576 #[test]
577 fn sqrt_floor_is_exact_ceil() {
578 assert_eq!(sqrt_floor(1), 1);
579 assert_eq!(sqrt_floor(4), 2);
580 assert_eq!(sqrt_floor(5), 3); // ceil(sqrt(5)) = 3
581 assert_eq!(sqrt_floor(9), 3);
582 assert_eq!(sqrt_floor(10), 4);
583 assert_eq!(sqrt_floor(100), 10);
584 assert_eq!(sqrt_floor(101), 11);
585 assert_eq!(sqrt_floor(1_000_000), 1000);
586 }
587
588 // ---- real_leaves_under ------------------------------------------------
589
590 #[test]
591 fn real_leaves_under_root_is_all() {
592 let d = tree_depth(100).unwrap();
593 assert_eq!(real_leaves_under(0, 0, 100, d), 100);
594 }
595
596 #[test]
597 fn real_leaves_under_padding_slot_is_zero() {
598 // key_count = 5, total_depth = 3 (next_pow2(5)=8). Leaf slots 5,6,7
599 // at the bottom are padding. The right half at depth 1 (slot 1) covers
600 // leaves [4,8) → only leaf 4 is real.
601 let d = tree_depth(5).unwrap();
602 assert_eq!(d, 3);
603 assert_eq!(real_leaves_under(1, 0, 5, d), 4); // [0,4)
604 assert_eq!(real_leaves_under(1, 1, 5, d), 1); // [4,8) ∩ [0,5) = {4}
605 assert_eq!(real_leaves_under(3, 7, 5, d), 0); // pure padding leaf
606 assert_eq!(real_leaves_under(2, 3, 5, d), 0); // [6,8) pure padding
607 }
608
609 // ---- select_subtree_path: dead-block regression -----------------------
610
611 #[test]
612 fn selection_never_empty_across_many_sizes_and_nonces() {
613 for n in [
614 5u32, 6, 7, 9, 13, 17, 33, 65, 100, 129, 333, 1000, 1024, 1025,
615 ] {
616 let floor = sqrt_floor(n);
617 for seed in 0u8..=255 {
618 let path = select_subtree_path(&nonce_of(seed), n).unwrap();
619 assert!(
620 path.real_leaf_count() >= floor.min(n),
621 "n={n} seed={seed}: real={} < floor={floor}",
622 path.real_leaf_count()
623 );
624 assert!(
625 path.real_leaf_count() >= 1,
626 "n={n} seed={seed}: empty selection"
627 );
628 assert!(path.leaf_end <= n);
629 assert!(path.leaf_start < path.leaf_end);
630 }
631 }
632 }
633
634 #[test]
635 fn small_trees_select_whole_tree() {
636 for n in 1..=SMALL_TREE_FULL_AUDIT_FLOOR {
637 let path = select_subtree_path(&nonce_of(7), n).unwrap();
638 assert_eq!(path.depth, 0);
639 assert_eq!(path.leaf_start, 0);
640 assert_eq!(path.leaf_end, n);
641 }
642 }
643
644 #[test]
645 fn selection_is_deterministic() {
646 let n = 500;
647 let a = select_subtree_path(&nonce_of(42), n).unwrap();
648 let b = select_subtree_path(&nonce_of(42), n).unwrap();
649 assert_eq!(a, b);
650 }
651
652 #[test]
653 fn different_nonces_cover_different_branches_over_time() {
654 // Not every nonce differs, but the set of selected ranges must be > 1.
655 let n = 1024;
656 let mut starts = std::collections::HashSet::new();
657 for seed in 0u8..=255 {
658 let p = select_subtree_path(&nonce_of(seed), n).unwrap();
659 starts.insert(p.leaf_start);
660 }
661 assert!(
662 starts.len() > 4,
663 "nonce should spread selection: {}",
664 starts.len()
665 );
666 }
667
668 /// Deterministic per-trial nonce (no RNG): hash a counter.
669 fn nonce_for_trial(i: u32) -> [u8; 32] {
670 let mut h = blake3::Hasher::new();
671 h.update(b"detection-sim-trial");
672 h.update(&i.to_le_bytes());
673 *h.finalize().as_bytes()
674 }
675
676 /// Catch rate over `trials` audits: fraction whose nonce-selected subtree
677 /// overlaps at least one deleted leaf index.
678 fn catch_rate(n: u32, deleted: &std::collections::HashSet<u32>, trials: u32) -> f64 {
679 let mut caught = 0u32;
680 for t in 0..trials {
681 let path = select_subtree_path(&nonce_for_trial(t), n).unwrap();
682 if (path.leaf_start..path.leaf_end).any(|i| deleted.contains(&i)) {
683 caught += 1;
684 }
685 }
686 f64::from(caught) / f64::from(trials)
687 }
688
689 #[test]
690 fn detection_uniform_fast_clustered_floor() {
691 // ADR-0002 Validation: uniform deletions are caught fast; clustered
692 // (contiguous-block) deletions are caught at roughly the deleted
693 // fraction per audit (a floor), much slower. This encodes the core
694 // security claim that the audit RATE (not per-audit cleverness) is the
695 // lever against a clustered deleter.
696 let n = 1024u32; // sqrt = 32
697 let del_count = n / 10; // delete 10% ≈ 102
698
699 // Uniform: spread deletions evenly across the keyspace.
700 let uniform: std::collections::HashSet<u32> =
701 (0..del_count).map(|i| (i * n / del_count) % n).collect();
702 let uniform_rate = catch_rate(n, &uniform, 256);
703
704 // Clustered: one contiguous block of the same size.
705 let clustered: std::collections::HashSet<u32> = (0..del_count).collect();
706 let clustered_rate = catch_rate(n, &clustered, 256);
707
708 // Uniform should be caught on essentially every audit (spread across the
709 // whole tree; any selected subtree overlaps some deletion).
710 assert!(
711 uniform_rate > 0.95,
712 "uniform deletions should be caught almost every audit, got {uniform_rate}"
713 );
714 // Clustered (one contiguous f-block) is a floor NEAR the deleted
715 // fraction f=0.1 — the quantitative ADR claim. The exact rate depends on
716 // selection geometry (a block of ~102 leaves is hit when the selected
717 // ~sqrt(N) subtree overlaps it), but it must sit in a tight band around
718 // f, well below the uniform rate. We bound it to [0.04, 0.30].
719 assert!(
720 (0.04..=0.30).contains(&clustered_rate),
721 "clustered catch-rate should be near f=0.1, got {clustered_rate}"
722 );
723 assert!(
724 uniform_rate > clustered_rate * 2.0,
725 "uniform ({uniform_rate}) must be far easier to catch than clustered ({clustered_rate})"
726 );
727 }
728
729 #[test]
730 fn subtree_size_near_sqrt_for_balanced_tree() {
731 // For a power-of-two tree the selection should land near sqrt(N).
732 let n = 1024; // sqrt = 32, floor = 32
733 let path = select_subtree_path(&nonce_of(3), n).unwrap();
734 // It stops as soon as a child would drop below floor; the subtree size
735 // is between floor and 2*floor for a balanced tree.
736 assert!(path.real_leaf_count() >= 32);
737 assert!(
738 path.real_leaf_count() <= 64,
739 "got {}",
740 path.real_leaf_count()
741 );
742 }
743
744 // ---- end-to-end proof build + verify ----------------------------------
745
746 /// Deterministic chunk bytes for a key (test fixture). The tree is built
747 /// from `BLAKE3` of exactly these bytes, so the proof and the committed
748 /// root agree — mirroring how a real responder hashes the chunk it holds.
749 fn chunk_bytes(key: &XorName) -> Vec<u8> {
750 // Distinct, non-trivial bytes derived from the key.
751 let mut v = key.to_vec();
752 v.extend_from_slice(b"chunk-body");
753 v
754 }
755
756 /// Build tree entries `(key, BLAKE3(chunk_bytes(key)))` for `n` keys.
757 fn entries_for(n: u32) -> Vec<(XorName, [u8; 32])> {
758 (0..n)
759 .map(|i| {
760 let key = xn_u32(i);
761 let bytes_hash = *blake3::hash(&chunk_bytes(&key)).as_bytes();
762 (key, bytes_hash)
763 })
764 .collect()
765 }
766
767 /// Reference responder: build a real subtree proof via the production
768 /// [`build_subtree_proof`] from a `MerkleTree` over `entries`. Leaves are
769 /// hashed from `chunk_bytes(key)` — the same bytes whose hash built the
770 /// tree — so an honest proof verifies. This makes the tests exercise the
771 /// exact builder the responder runs.
772 fn build_proof(
773 entries: &[(XorName, [u8; 32])],
774 nonce: &[u8; 32],
775 peer_id: &[u8; 32],
776 ) -> (SubtreeProof, StorageCommitment) {
777 let tree = MerkleTree::build(entries.to_vec()).unwrap();
778 let key_count = tree.key_count();
779 let proof = build_subtree_proof(&tree, nonce, peer_id, |k| Some(chunk_bytes(k))).unwrap();
780 let commitment = fake_commitment(tree.root(), key_count, *peer_id);
781 (proof, commitment)
782 }
783
784 fn fake_commitment(root: [u8; 32], key_count: u32, peer: [u8; 32]) -> StorageCommitment {
785 StorageCommitment {
786 root,
787 key_count,
788 sender_peer_id: peer,
789 sender_public_key: vec![0u8; 1952],
790 signature: vec![0u8; 3293],
791 }
792 }
793
794 #[test]
795 fn honest_proof_verifies_at_many_sizes() {
796 let peer = [0xABu8; 32];
797 for n in [5u32, 8, 13, 17, 64, 100, 256, 1000] {
798 let entries = entries_for(n);
799 for seed in [1u8, 2, 7, 42, 200] {
800 let nonce = nonce_of(seed);
801 let (proof, commitment) = build_proof(&entries, &nonce, &peer);
802 assert_eq!(
803 verify_subtree_proof(&proof, &nonce, &commitment),
804 StructureVerdict::Valid,
805 "n={n} seed={seed}"
806 );
807 }
808 }
809 }
810
811 #[test]
812 fn honest_proof_verifies_for_every_size_and_nonce() {
813 // Regression for the left-packed self-pairing geometry bug: the proof
814 // reconstruction must match the committed root for EVERY key count
815 // (not just powers of two / cherry-picked sizes) and every nonce. An
816 // earlier perfect-tree model false-failed honest nodes for ~70% of
817 // sizes; this guards against any reintroduction.
818 let peer = [7u8; 32];
819 for n in 5u32..=600 {
820 let entries = entries_for(n);
821 for seed in 0u8..32 {
822 let nonce = nonce_of(seed.wrapping_mul(17).wrapping_add(3));
823 let (proof, commitment) = build_proof(&entries, &nonce, &peer);
824 assert_eq!(
825 verify_subtree_proof(&proof, &nonce, &commitment),
826 StructureVerdict::Valid,
827 "honest proof must verify at n={n} seed={seed}"
828 );
829 }
830 }
831 }
832
833 #[test]
834 fn tampered_leaf_breaks_root() {
835 let peer = [9u8; 32];
836 let entries = entries_for(100);
837 let nonce = nonce_of(5);
838 let (mut proof, commitment) = build_proof(&entries, &nonce, &peer);
839 proof.leaves[0].bytes_hash[0] ^= 0x01;
840 assert!(matches!(
841 verify_subtree_proof(&proof, &nonce, &commitment),
842 StructureVerdict::Invalid(_)
843 ));
844 }
845
846 #[test]
847 fn tampered_cut_hash_breaks_root() {
848 let peer = [9u8; 32];
849 let entries = entries_for(256);
850 let nonce = nonce_of(11);
851 let (mut proof, commitment) = build_proof(&entries, &nonce, &peer);
852 if let Some(c) = proof.sibling_cut_hashes.first_mut() {
853 c[0] ^= 0x01;
854 }
855 assert!(matches!(
856 verify_subtree_proof(&proof, &nonce, &commitment),
857 StructureVerdict::Invalid(_)
858 ));
859 }
860
861 #[test]
862 fn wrong_leaf_count_rejected() {
863 let peer = [9u8; 32];
864 let entries = entries_for(100);
865 let nonce = nonce_of(5);
866 let (mut proof, commitment) = build_proof(&entries, &nonce, &peer);
867 proof.leaves.pop();
868 assert_eq!(
869 verify_subtree_proof(&proof, &nonce, &commitment),
870 StructureVerdict::Invalid("wrong leaf count")
871 );
872 }
873
874 #[test]
875 fn non_ascending_leaves_rejected() {
876 let peer = [9u8; 32];
877 let entries = entries_for(100);
878 let nonce = nonce_of(5);
879 let (mut proof, commitment) = build_proof(&entries, &nonce, &peer);
880 if proof.leaves.len() >= 2 {
881 proof.leaves.swap(0, 1);
882 }
883 assert!(matches!(
884 verify_subtree_proof(&proof, &nonce, &commitment),
885 StructureVerdict::Invalid(_)
886 ));
887 }
888
889 // ---- spot-check selection ---------------------------------------------
890
891 #[test]
892 fn spotcheck_indices_in_range_and_distinct() {
893 let n = 1024;
894 let nonce = nonce_of(3);
895 let path = select_subtree_path(&nonce, n).unwrap();
896 let k = 8;
897 let idxs = select_spotcheck_indices(&nonce, &path, k);
898 assert_eq!(
899 u32::try_from(idxs.len()).unwrap(),
900 k.min(path.real_leaf_count())
901 );
902 let mut seen = std::collections::HashSet::new();
903 for i in &idxs {
904 assert!(*i < path.real_leaf_count());
905 assert!(seen.insert(*i), "duplicate spot-check index {i}");
906 }
907 }
908
909 #[test]
910 fn build_proof_reports_missing_bytes() {
911 // A responder that no longer holds a selected, committed key's bytes
912 // must surface MissingBytes (the caller turns this into a confirmed
913 // failure, not a benign rejection).
914 let entries = entries_for(100);
915 let tree = MerkleTree::build(entries).unwrap();
916 let nonce = nonce_of(5);
917 let path = select_subtree_path(&nonce, tree.key_count()).unwrap();
918 let victim = tree.key_at(path.leaf_start as usize).unwrap();
919 let err = build_subtree_proof(&tree, &nonce, &[1u8; 32], |k| {
920 if *k == victim {
921 None
922 } else {
923 Some(chunk_bytes(k))
924 }
925 })
926 .unwrap_err();
927 assert_eq!(err, BuildProofError::MissingBytes { key: victim });
928 }
929
930 #[test]
931 fn spotcheck_returns_all_when_subtree_small() {
932 // Construct a path with few real leaves.
933 let path = SubtreePath {
934 depth: 0,
935 slot: 0,
936 leaf_start: 0,
937 leaf_end: 3,
938 };
939 let idxs = select_spotcheck_indices(&nonce_of(1), &path, 8);
940 assert_eq!(idxs, vec![0, 1, 2]);
941 }
942
943 #[test]
944 fn spotcheck_always_yields_exactly_min_k_n_distinct_indices() {
945 // The byte sample must NEVER be silently smaller than requested: a
946 // short sample weakens round 2 without anyone noticing. Exercise many
947 // nonces, subtree sizes, and k values, and require exactly min(k, n)
948 // distinct in-range indices every time — plus determinism (auditor and
949 // responder must derive the same set).
950 for size in [1u32, 2, 3, 7, 8, 64, 1000] {
951 let path = SubtreePath {
952 depth: 0,
953 slot: 0,
954 leaf_start: 0,
955 leaf_end: size,
956 };
957 for k in [1u32, 3, 5, 8] {
958 for seed in 0..32u8 {
959 let nonce = nonce_of(seed);
960 let idxs = select_spotcheck_indices(&nonce, &path, k);
961 let expected = k.min(path.real_leaf_count()) as usize;
962 assert_eq!(
963 idxs.len(),
964 expected,
965 "size={size} k={k} seed={seed}: must yield exactly min(k, n)"
966 );
967 let mut seen = std::collections::HashSet::new();
968 for i in &idxs {
969 assert!(*i < path.real_leaf_count(), "index out of range");
970 assert!(seen.insert(*i), "duplicate index {i}");
971 }
972 assert_eq!(
973 idxs,
974 select_spotcheck_indices(&nonce, &path, k),
975 "selection must be deterministic"
976 );
977 }
978 }
979 }
980 }
981
982 #[test]
983 fn fabricated_nonced_hash_caught_by_spotcheck_probability() {
984 // Simulate the realness check: a responder fabricates a fraction x of
985 // nonced hashes. The auditor spot-checks k leaves; probability all k
986 // land on honest leaves is (1-x)^k. Here we just assert the auditor
987 // *would* catch a fabricated leaf when it samples that position.
988 let peer = [1u8; 32];
989 let entries = entries_for(400);
990 let nonce = nonce_of(9);
991 let (mut proof, _commitment) = build_proof(&entries, &nonce, &peer);
992 // Fabricate the nonced hash on the first subtree leaf (wrong bytes).
993 proof.leaves[0].nonced_hash[0] ^= 0xFF;
994 // The realness check the caller runs: recompute from the real chunk
995 // bytes (the same fixture the honest tree was built from).
996 let leaf = &proof.leaves[0];
997 let real_bytes = chunk_bytes(&leaf.key);
998 let expected = nonced_leaf_hash(&nonce, &peer, &leaf.key, &real_bytes);
999 assert_ne!(
1000 leaf.nonced_hash, expected,
1001 "fabricated nonced hash must differ from real"
1002 );
1003 }
1004
1005 // ---- branch-substitution attack ---------------------------------------
1006
1007 #[test]
1008 fn responder_cannot_substitute_a_different_branch() {
1009 // ADR-0002 "Subtree selection": the random value alone fixes WHICH
1010 // branch is selected, so "the audited node cannot choose a convenient
1011 // branch to present." This is the load-bearing anti-substitution claim
1012 // and no existing test exercises it — the tamper tests only mangle a
1013 // hash within the *correct* branch.
1014 //
1015 // Attack: the responder builds a fully valid, internally-consistent
1016 // subtree proof for a DIFFERENT nonce (which the selection maps to a
1017 // different branch of the same committed tree), then presents it as the
1018 // answer to the auditor's nonce. Every leaf hash and every cut-hash is
1019 // genuine, the leaves are strictly ascending, and we deliberately pick
1020 // a decoy whose branch has the SAME leaf count and SAME depth as the
1021 // honest branch — so the cheap "wrong leaf count" / "wrong cut-hash
1022 // count" gates do NOT fire. The ONLY thing that can reject it is the
1023 // structural root re-derivation, which climbs using the auditor's
1024 // nonce-derived slot parity and position. It must reject.
1025 let peer = [0x5Au8; 32];
1026 let n = 1024u32; // balanced tree; sqrt floor = 32
1027 let entries = entries_for(n);
1028
1029 let audit_nonce = nonce_of(7);
1030 let audit_path = select_subtree_path(&audit_nonce, n).unwrap();
1031
1032 // Find a decoy nonce whose selected branch is a DIFFERENT slot but the
1033 // SAME depth (hence same real-leaf count for this balanced tree). This
1034 // forces rejection via the root check rather than a count mismatch.
1035 let mut decoy: Option<([u8; 32], SubtreePath)> = None;
1036 for seed in 0u8..=255 {
1037 let cand_nonce = nonce_of(seed);
1038 let cand = select_subtree_path(&cand_nonce, n).unwrap();
1039 if cand.depth == audit_path.depth
1040 && cand.slot != audit_path.slot
1041 && cand.real_leaf_count() == audit_path.real_leaf_count()
1042 {
1043 decoy = Some((cand_nonce, cand));
1044 break;
1045 }
1046 }
1047 let (decoy_nonce, decoy_path) =
1048 decoy.expect("a same-depth, different-slot decoy branch must exist for n=1024");
1049
1050 // Sanity: the decoy really is a different, equally-shaped branch.
1051 assert_ne!(decoy_path.slot, audit_path.slot);
1052 assert_eq!(decoy_path.depth, audit_path.depth);
1053 assert_eq!(decoy_path.real_leaf_count(), audit_path.real_leaf_count());
1054
1055 // The responder builds a genuine proof for the DECOY branch. Note the
1056 // nonced hashes are built with the decoy nonce too — but that does not
1057 // matter: the structural check below never inspects nonced hashes, and
1058 // the attack must already die on structure.
1059 let tree = MerkleTree::build(entries).unwrap();
1060 let decoy_proof =
1061 build_subtree_proof(&tree, &decoy_nonce, &peer, |k| Some(chunk_bytes(k))).unwrap();
1062
1063 // Pin the auditor's commitment to the genuine root of the same tree.
1064 let commitment = fake_commitment(tree.root(), n, peer);
1065
1066 // The honest answer to the SAME commitment + decoy nonce verifies, so
1067 // the proof itself is well-formed — it is only "wrong" relative to the
1068 // auditor's nonce.
1069 assert_eq!(
1070 verify_subtree_proof(&decoy_proof, &decoy_nonce, &commitment),
1071 StructureVerdict::Valid,
1072 "the decoy proof must be a genuinely valid proof for its own nonce"
1073 );
1074
1075 // The attack: present the decoy-branch proof against the AUDIT nonce.
1076 // The count gates cannot fire (same depth + leaf count by construction),
1077 // so this is the root re-derivation rejecting a substituted branch.
1078 let verdict = verify_subtree_proof(&decoy_proof, &audit_nonce, &commitment);
1079 assert_eq!(
1080 verdict,
1081 StructureVerdict::Invalid("root mismatch"),
1082 "substituting a different valid branch must be rejected by the root check, got {verdict:?}"
1083 );
1084 }
1085}