ant_node/replication/
subtree.rs

1//! Gossip-triggered contiguous-subtree storage proof (ADR-0002).
2//!
3//! Pure, network-free core of the audit redesign. Given a peer's signed
4//! [`StorageCommitment`] and an auditor-chosen random nonce, both sides
5//! deterministically select **one contiguous subtree** of the committed
6//! Merkle tree; the responder expands that subtree to its leaves plus the
7//! sibling cut-hashes on the path to the root; the auditor rebuilds the root
8//! and spot-checks a few leaves against real chunk bytes.
9//!
10//! Three independent checks (ADR-0002 "Verification, three independent
11//! checks"); this module owns the first two — the third (response deadline)
12//! is enforced by the caller:
13//!
14//! 1. **Structure** — [`verify_subtree_proof`] re-derives the selected branch
15//!    from `(nonce, key_count)`, rebuilds the root from the returned leaves and
16//!    cut-hashes, and requires it to equal the pinned root.
17//! 2. **Real bytes** — [`select_spotcheck_indices`] picks a few leaves within
18//!    the subtree; the caller fetches their bytes and checks both the plain
19//!    content hash and the nonce freshness hash. Faking a fraction `x` of
20//!    leaves survives only `(1 - x)^k`.
21//!
22//! ## Tree geometry (must match [`super::commitment::MerkleTree`])
23//!
24//! Leaves are sorted by key and fill positions `0..N`. The tree is
25//! left-packed: when a level has an odd number of nodes the last node is
26//! paired with itself (`node_hash(x, x)`). There are no explicit padding
27//! leaves; "padding" is the empty right side of a subtree slot that extends
28//! past `N`. Depth `D = ceil(log2(N))`. A node identified by `(depth, slot)`
29//! (depth measured from the root, slot in `0..2^depth`) covers the contiguous
30//! leaf range `[slot * span, (slot + 1) * span)` where `span = 2^(D - depth)`,
31//! intersected with `0..N`.
32
33use super::commitment::{leaf_hash, node_hash, StorageCommitment, MAX_COMMITMENT_KEY_COUNT};
34use super::protocol::compute_audit_digest;
35use crate::ant_protocol::XorName;
36use serde::{Deserialize, Serialize};
37
38/// Below this key count the whole tree is challenged; `sqrt` rounding is
39/// meaningless for tiny trees and a full proof is cheap.
40pub const SMALL_TREE_FULL_AUDIT_FLOOR: u32 = 4;
41
42/// One leaf of the selected subtree, as returned by the responder.
43#[derive(Debug, Clone, Serialize, Deserialize, PartialEq, Eq)]
44pub struct SubtreeLeaf {
45    /// The committed key (chunk address) at this leaf position.
46    pub key: XorName,
47    /// `BLAKE3(record_bytes)` — the plain content hash. This is also the
48    /// chunk's network address, so it is public; possessing it does NOT prove
49    /// possession of the bytes (that is what `nonced_hash` is for).
50    pub bytes_hash: [u8; 32],
51    /// `compute_audit_digest(nonce, peer_id, key, record_bytes)` — the
52    /// freshness hash. Only a holder of the actual bytes can produce it for a
53    /// fresh nonce, so a spot-check on it proves real possession.
54    pub nonced_hash: [u8; 32],
55}
56
57/// A responder's single-contiguous-subtree proof (ADR-0002 "The proof").
58#[derive(Debug, Clone, Serialize, Deserialize, PartialEq, Eq)]
59pub struct SubtreeProof {
60    /// Every leaf of the selected subtree, in ascending leaf-index order.
61    pub leaves: Vec<SubtreeLeaf>,
62    /// One sibling cut-hash per level on the path from the root down to the
63    /// selected subtree root, ordered root-first. Each is the plain hash of
64    /// the unselected sibling node at that level.
65    pub sibling_cut_hashes: Vec<[u8; 32]>,
66}
67
68/// The deterministically-selected contiguous subtree, derived from
69/// `(nonce, key_count)` and agreed by both sides.
70#[derive(Debug, Clone, Copy, PartialEq, Eq)]
71pub struct SubtreePath {
72    /// Depth of the subtree root below the tree root (0 = whole tree).
73    pub depth: u32,
74    /// Slot index of the subtree root within its level, in `0..2^depth`.
75    pub slot: u32,
76    /// First real leaf index covered (inclusive).
77    pub leaf_start: u32,
78    /// One past the last real leaf index covered (exclusive). Always
79    /// `leaf_end > leaf_start`, so the selection never covers zero real
80    /// leaves — this is the ADR's dead-block fix.
81    pub leaf_end: u32,
82}
83
84impl SubtreePath {
85    /// Number of real (non-padding) leaves in the selected subtree.
86    #[must_use]
87    pub fn real_leaf_count(&self) -> u32 {
88        self.leaf_end - self.leaf_start
89    }
90}
91
92/// Tree depth `D = ceil(log2(key_count))`, matching `MerkleTree` / `verify_path`.
93///
94/// `key_count == 1` → depth 0 (the single leaf is the root). Returns `None`
95/// for an out-of-protocol `key_count` so callers reject it before any work.
96#[must_use]
97fn tree_depth(key_count: u32) -> Option<u32> {
98    if key_count == 0 || key_count > MAX_COMMITMENT_KEY_COUNT {
99        return None;
100    }
101    // checked_next_power_of_two cannot fail under the cap above, but the
102    // explicit check keeps behaviour identical across debug/release.
103    let rounded = key_count.checked_next_power_of_two()?;
104    Some(rounded.trailing_zeros())
105}
106
107/// Count real leaves under the node at `(depth, slot)` for a tree of `key_count`
108/// leaves. Pure function of geometry — identical on auditor and responder.
109///
110/// `span = 2^(total_depth - depth)`; the node covers `[slot*span, (slot+1)*span)`
111/// clamped to `0..key_count`.
112#[must_use]
113fn real_leaves_under(depth: u32, slot: u64, key_count: u32, total_depth: u32) -> u32 {
114    let levels_below = total_depth - depth;
115    // span fits in u64: total_depth <= 20 for key_count <= 1e6.
116    let span = 1u64 << levels_below;
117    let start = slot.saturating_mul(span).min(u64::from(key_count));
118    let end = slot
119        .saturating_add(1)
120        .saturating_mul(span)
121        .min(u64::from(key_count));
122    // end >= start always; difference fits in u32 (<= key_count).
123    u32::try_from(end - start).unwrap_or(0)
124}
125
126/// `ceil(sqrt(key_count))` — the real-leaf floor a selected subtree must meet.
127#[must_use]
128fn sqrt_floor(key_count: u32) -> u32 {
129    // Exact integer ceil(sqrt(n)), float-free and MSRV-safe (no u64::isqrt).
130    // Newton's method converges to floor(sqrt(n)); then round up unless n is a
131    // perfect square. Always at least 1.
132    let n = u64::from(key_count);
133    if n <= 1 {
134        return 1;
135    }
136    let mut x = n;
137    let mut y = x.div_ceil(2);
138    while y < x {
139        x = y;
140        y = (x + n / x) / 2;
141    }
142    // x == floor(sqrt(n)) here.
143    let ceil = if x.saturating_mul(x) == n { x } else { x + 1 };
144    u32::try_from(ceil.max(1)).unwrap_or(u32::MAX)
145}
146
147/// Read bit `index` of the nonce (bit 0 = MSB of byte 0), `index` 0-based.
148///
149/// `1 → left child, 0 → right child` (ADR). With a 256-bit nonce and a tree
150/// depth ≤ 20 we never run out of bits.
151#[must_use]
152fn nonce_bit(nonce: &[u8; 32], index: u32) -> bool {
153    let byte = (index / 8) as usize;
154    let bit = 7 - (index % 8);
155    // byte < 32 because index < 256 for any reachable depth; guard anyway.
156    nonce.get(byte).is_some_and(|b| (b >> bit) & 1 == 1)
157}
158
159/// Deterministically select one contiguous subtree from `(nonce, key_count)`.
160///
161/// Walks the nonce bits from the root, descending into the child the bit picks,
162/// and **stops at the smallest branch whose real-leaf count is still ≥
163/// `ceil(sqrt(key_count))`**. Because an all-padding child has zero real leaves
164/// (< the floor), the walk never descends into one — so the selection always
165/// covers ≥ `sqrt` real leaves and is never empty (ADR dead-block fix).
166///
167/// For `key_count <= SMALL_TREE_FULL_AUDIT_FLOOR` the whole tree is selected.
168///
169/// Returns `None` only for an out-of-protocol `key_count` (caller rejects).
170#[must_use]
171pub fn select_subtree_path(nonce: &[u8; 32], key_count: u32) -> Option<SubtreePath> {
172    let total_depth = tree_depth(key_count)?;
173
174    // Tiny trees: challenge everything.
175    if key_count <= SMALL_TREE_FULL_AUDIT_FLOOR {
176        return Some(SubtreePath {
177            depth: 0,
178            slot: 0,
179            leaf_start: 0,
180            leaf_end: key_count,
181        });
182    }
183
184    let floor = sqrt_floor(key_count);
185    let mut depth = 0u32;
186    let mut slot = 0u64; // slot within the current level
187
188    // Descend while the chosen child still meets the floor.
189    while depth < total_depth {
190        let go_left = nonce_bit(nonce, depth);
191        // 1 = left child (bit set), 0 = right child. Right child is the odd slot.
192        let child_slot = slot * 2 + u64::from(!go_left);
193        let child_real = real_leaves_under(depth + 1, child_slot, key_count, total_depth);
194        if child_real < floor {
195            break; // descending would drop below the floor → stay here
196        }
197        depth += 1;
198        slot = child_slot;
199    }
200
201    let span = 1u64 << (total_depth - depth);
202    let leaf_start =
203        u32::try_from(slot.saturating_mul(span).min(u64::from(key_count))).unwrap_or(key_count);
204    let leaf_end = u32::try_from(
205        slot.saturating_add(1)
206            .saturating_mul(span)
207            .min(u64::from(key_count)),
208    )
209    .unwrap_or(key_count);
210
211    Some(SubtreePath {
212        depth,
213        slot: u32::try_from(slot).unwrap_or(u32::MAX),
214        leaf_start,
215        leaf_end,
216    })
217}
218
219/// Pick `k` distinct nonce-derived leaf positions within the selected subtree.
220///
221/// Returned as indices into `path.real_leaf_count()` (0-based within the
222/// subtree). DETERMINISTIC from the nonce.
223///
224/// **NOT used for the live round-2 sample.** Deriving the byte-challenge sample
225/// from the round-1 nonce is unsound: the structural root check binds only
226/// `(key, bytes_hash)` (both public), not `nonced_hash`, so a responder that
227/// knows the nonce at proof-build time could fabricate `nonced_hash` on every
228/// un-sampled leaf and fetch only the predictable sample. The auditor therefore
229/// chooses the round-2 sample with fresh CSPRNG randomness *after* receiving the
230/// proof (`storage_commitment_audit::random_spotcheck_leaves`). This
231/// deterministic helper is retained only for tests/observers that need a
232/// reproducible selection.
233#[must_use]
234pub fn select_spotcheck_indices(nonce: &[u8; 32], path: &SubtreePath, k: u32) -> Vec<u32> {
235    let n = path.real_leaf_count();
236    if n == 0 {
237        return Vec::new();
238    }
239    if n <= k {
240        return (0..n).collect();
241    }
242    // Derive a stream of indices by hashing (nonce || counter) and reducing
243    // mod n; skip collisions. Bounded: k is small (clamped to the 3..=5 band)
244    // and n > k.
245    let mut out: Vec<u32> = Vec::with_capacity(k as usize);
246    let mut counter: u32 = 0;
247    while u32::try_from(out.len()).unwrap_or(u32::MAX) < k {
248        let mut h = blake3::Hasher::new();
249        h.update(b"autonomi.ant.replication.audit_spotcheck.v1");
250        h.update(nonce);
251        h.update(&counter.to_le_bytes());
252        let digest = *h.finalize().as_bytes();
253        let mut word = [0u8; 4];
254        word.copy_from_slice(&digest[..4]);
255        let idx = u32::from_le_bytes(word) % n;
256        if !out.contains(&idx) {
257            out.push(idx);
258        }
259        counter = counter.wrapping_add(1);
260        // Bound the hash stream (vanishingly unlikely to bite with n > k).
261        if counter > k.saturating_mul(64) {
262            break;
263        }
264    }
265    // Top up deterministically if the bounded hash stream collided too often:
266    // take the lowest indices not yet selected. Still nonce-independent only in
267    // this (astronomically rare) tail, and identical on every observer — the
268    // caller is guaranteed exactly min(k, n) distinct indices, so the byte
269    // sample is never silently smaller than requested.
270    let mut candidate: u32 = 0;
271    while u32::try_from(out.len()).unwrap_or(u32::MAX) < k && candidate < n {
272        if !out.contains(&candidate) {
273            out.push(candidate);
274        }
275        candidate += 1;
276    }
277    out
278}
279
280/// Verdict from [`verify_subtree_proof`]'s structural check.
281#[derive(Debug, Clone, PartialEq, Eq)]
282pub enum StructureVerdict {
283    /// Proof is well-formed and its root matches the pinned commitment.
284    Valid,
285    /// Proof is malformed or its root does not match. Carries a static reason
286    /// for logging; all variants are confirmed failures, not benign.
287    Invalid(&'static str),
288}
289
290/// Structural verification (ADR-0002 check 1): the returned subtree genuinely
291/// belongs to the committed tree.
292///
293/// Re-derives the selected branch from `(nonce, commitment.key_count)`,
294/// rebuilds the root from `proof.leaves` and `proof.sibling_cut_hashes`, and
295/// requires it to equal `commitment.root`. Also checks leaf count and
296/// ascending-key order (the committed tree sorts leaves by key).
297///
298/// This does NOT verify possession of bytes — that is the caller's spot-check
299/// using [`select_spotcheck_indices`]. It only proves the structure.
300#[must_use]
301pub fn verify_subtree_proof(
302    proof: &SubtreeProof,
303    nonce: &[u8; 32],
304    commitment: &StorageCommitment,
305) -> StructureVerdict {
306    let Some(path) = select_subtree_path(nonce, commitment.key_count) else {
307        return StructureVerdict::Invalid("out-of-protocol key_count");
308    };
309
310    // Leaf count must equal the agreed subtree's real-leaf count exactly.
311    let expected_leaves = path.real_leaf_count() as usize;
312    if proof.leaves.len() != expected_leaves {
313        return StructureVerdict::Invalid("wrong leaf count");
314    }
315    // Sibling cut-hashes: one per level on the path to the subtree root.
316    if proof.sibling_cut_hashes.len() != path.depth as usize {
317        return StructureVerdict::Invalid("wrong cut-hash count");
318    }
319
320    // Leaves must be strictly ascending by key (matches MerkleTree sort), which
321    // also rejects duplicates.
322    for w in proof.leaves.windows(2) {
323        if let [a, b] = w {
324            if a.key >= b.key {
325                return StructureVerdict::Invalid("leaves not strictly ascending");
326            }
327        }
328    }
329
330    // Out-of-protocol key_count cannot happen here (select_subtree_path already
331    // returned Some), but recompute total_depth defensively for the climb maths.
332    let Some(total_depth) = tree_depth(commitment.key_count) else {
333        return StructureVerdict::Invalid("out-of-protocol key_count");
334    };
335
336    // Phase A — reconstruct the selected subtree's root NODE exactly as the
337    // committed tree's level-by-level build produces it. The subtree root sits
338    // at `(level_from_leaves, slot)`, covering a left-packed block of leaves;
339    // folding that block up `level_from_leaves` levels with the same
340    // self-pair-the-last-node rule as `MerkleTree::build_next_level` yields the
341    // identical node (including the `node_hash(x, x)` self-pair when the block
342    // is the tree's odd tail at some level). `fold_to_root` stopped at a single
343    // hash and so skipped the self-pair when a truncated block reached length 1
344    // before climbing all the way to the subtree-root level — the geometry bug.
345    let leaf_hashes: Vec<[u8; 32]> = proof
346        .leaves
347        .iter()
348        .map(|l| leaf_hash(&l.key, &l.bytes_hash))
349        .collect();
350    let levels_to_subtree_root = total_depth - path.depth;
351    let mut cur = fold_levels(leaf_hashes, levels_to_subtree_root);
352
353    // Phase B — climb from the subtree root to the tree root using one sibling
354    // cut-hash per level, exactly like `verify_path`: the climb's left/right
355    // choice is the real node-index parity, NOT a nonce bit, and the self-pair
356    // of an odd level's last node falls out naturally when the builder supplied
357    // the chosen node itself as its own sibling. The cut-hashes are root-first,
358    // so we consume them in reverse (lowest climb step uses the last cut-hash).
359    //
360    // We recompute the node index of the subtree root the same way the builder
361    // walked the nonce bits, then halve it as we climb — mirroring `verify_path`.
362    let mut node_index = u64::from(path.slot);
363    for level_above in (0..path.depth).rev() {
364        let Some(sibling) = proof.sibling_cut_hashes.get(level_above as usize) else {
365            return StructureVerdict::Invalid("missing cut-hash");
366        };
367        cur = if node_index % 2 == 0 {
368            node_hash(&cur, sibling)
369        } else {
370            node_hash(sibling, &cur)
371        };
372        node_index /= 2;
373    }
374
375    if cur == commitment.root {
376        StructureVerdict::Valid
377    } else {
378        StructureVerdict::Invalid("root mismatch")
379    }
380}
381
382/// Fold a contiguous, left-aligned block of node hashes up exactly `levels`
383/// levels, applying the same left-packed self-pair rule as
384/// `MerkleTree::build_next_level` (`node_hash(x, x)` for an unpaired last node).
385///
386/// This is the generalisation of a single-leaf inclusion fold to a *range* of
387/// leaves: a subtree root at `(levels, slot)` covers a block whose left edge is
388/// pair-aligned at every sub-level, so the only odd run that can occur is the
389/// tree's genuine odd tail — exactly when `build_next_level` self-pairs. Folding
390/// the block `levels` times therefore reproduces the committed node bit-for-bit,
391/// including the self-pair that `fold_to_root` used to skip by stopping at a
392/// single hash too early.
393///
394/// `levels == 0` returns the block's single element unchanged (the subtree IS
395/// the tree, e.g. the small-tree full-audit case after its own folds, or a
396/// single-leaf tree). An empty input is impossible here (callers guarantee ≥ 1
397/// leaf via the dead-block fix); returns a zero hash defensively.
398#[must_use]
399fn fold_levels(mut level: Vec<[u8; 32]>, levels: u32) -> [u8; 32] {
400    if level.is_empty() {
401        return [0u8; 32];
402    }
403    // Fold up `levels` times using the SAME builder as `MerkleTree::build`
404    // (§10 — `build_next_level`), so the self-pair of an odd tail matches the
405    // committed tree exactly. Within a selected, left-aligned block the only
406    // odd run that can occur is the tree's genuine odd tail.
407    for _ in 0..levels {
408        level = crate::replication::commitment::build_next_level(&level);
409    }
410    // After `levels` folds of a `2^levels`-span left-aligned block, exactly one
411    // node remains; defensively fall back if the block was shorter.
412    level.first().copied().unwrap_or([0u8; 32])
413}
414
415/// Build the per-leaf nonced freshness hash for a subtree leaf (responder
416/// side), reusing the existing audit digest.
417#[must_use]
418pub fn nonced_leaf_hash(
419    nonce: &[u8; 32],
420    challenged_peer_id: &[u8; 32],
421    key: &XorName,
422    record_bytes: &[u8],
423) -> [u8; 32] {
424    compute_audit_digest(nonce, challenged_peer_id, key, record_bytes)
425}
426
427/// Why a responder could not build a subtree proof for a challenge.
428#[derive(Debug, Clone, PartialEq, Eq)]
429pub enum BuildProofError {
430    /// The challenge's `key_count` (from the pinned commitment) is out of
431    /// protocol range. Should never happen for a commitment we built.
432    BadKeyCount,
433    /// A selected leaf's key could not be resolved from the tree (internal
434    /// inconsistency; should never happen).
435    MissingKey {
436        /// The leaf index that could not be resolved.
437        leaf_index: u32,
438    },
439    /// The responder no longer holds the bytes for a selected, committed key.
440    /// This is real storage loss / deliberate non-response — the caller turns
441    /// it into a confirmed audit failure, NOT a benign rejection.
442    MissingBytes {
443        /// The committed key whose bytes are gone.
444        key: XorName,
445    },
446}
447
448/// Build the single-contiguous-subtree proof for `(nonce, tree)` (responder).
449///
450/// `bytes_for(&key)` returns the chunk bytes the responder holds for a key, or
451/// `None` if it cannot read them. Walks the same nonce-selected path the
452/// auditor will re-derive, reads the unselected sibling cut-hashes directly
453/// from the committed tree (so they are provably consistent with the gossiped
454/// root), and builds each selected leaf's plain and nonced hashes from the real
455/// bytes.
456///
457/// # Errors
458///
459/// See [`BuildProofError`]. `MissingBytes` is the one the caller penalises;
460/// the others indicate an internal inconsistency.
461pub fn build_subtree_proof(
462    tree: &super::commitment::MerkleTree,
463    nonce: &[u8; 32],
464    challenged_peer_id: &[u8; 32],
465    bytes_for: impl Fn(&XorName) -> Option<Vec<u8>>,
466) -> Result<SubtreeProof, BuildProofError> {
467    let plan = subtree_plan(tree, nonce)?;
468    let mut leaves = Vec::with_capacity(plan.leaf_keys.len());
469    for key in &plan.leaf_keys {
470        let bytes = bytes_for(key).ok_or(BuildProofError::MissingBytes { key: *key })?;
471        leaves.push(subtree_leaf(nonce, challenged_peer_id, key, &bytes));
472    }
473    Ok(SubtreeProof {
474        leaves,
475        sibling_cut_hashes: plan.sibling_cut_hashes,
476    })
477}
478
479/// The pure (no-bytes) geometry of a subtree proof.
480///
481/// Holds the ordered keys whose bytes the responder must hash and the sibling
482/// cut-hashes read from the tree. Splitting this out lets an async responder
483/// read chunk bytes per leaf without forcing the tree-walking maths to be async.
484#[derive(Debug, Clone)]
485pub struct SubtreePlan {
486    /// The selected leaves' keys, in ascending leaf-index order.
487    pub leaf_keys: Vec<XorName>,
488    /// One sibling cut-hash per level on the path to the subtree root,
489    /// root-first.
490    pub sibling_cut_hashes: Vec<[u8; 32]>,
491}
492
493/// Compute the [`SubtreePlan`] for `(nonce, tree)` — selection geometry only,
494/// no chunk bytes touched.
495///
496/// # Errors
497///
498/// [`BuildProofError::BadKeyCount`] for an out-of-protocol tree;
499/// [`BuildProofError::MissingKey`] if a selected leaf index is not in the tree
500/// (internal inconsistency).
501pub fn subtree_plan(
502    tree: &super::commitment::MerkleTree,
503    nonce: &[u8; 32],
504) -> Result<SubtreePlan, BuildProofError> {
505    let key_count = tree.key_count();
506    let path = select_subtree_path(nonce, key_count).ok_or(BuildProofError::BadKeyCount)?;
507
508    let mut leaf_keys = Vec::with_capacity(path.real_leaf_count() as usize);
509    for idx in path.leaf_start..path.leaf_end {
510        let key = tree
511            .key_at(idx as usize)
512            .ok_or(BuildProofError::MissingKey { leaf_index: idx })?;
513        leaf_keys.push(key);
514    }
515
516    // Sibling cut-hashes, root-first. At descent step `d` (0-based from the
517    // root), the chosen child is on the side the nonce bit picks; the sibling
518    // is the other child at level `total_depth - (d + 1)` (counting up from
519    // leaves). On an odd-length level the missing sibling self-pairs, i.e. the
520    // sibling hash is the chosen node itself.
521    let total_depth = u32::try_from(tree.levels_count().saturating_sub(1)).unwrap_or(0);
522    let mut sibling_cut_hashes = Vec::with_capacity(path.depth as usize);
523    let mut slot = 0u64;
524    for d in 0..path.depth {
525        let go_left = nonce_bit(nonce, d);
526        let child = slot * 2 + u64::from(!go_left);
527        let sibling = child ^ 1;
528        let level_from_leaves = (total_depth - (d + 1)) as usize;
529        let chosen_hash = tree.node_at(level_from_leaves, child);
530        let sib_hash = tree
531            .node_at(level_from_leaves, sibling)
532            .or(chosen_hash)
533            .ok_or(BuildProofError::BadKeyCount)?;
534        sibling_cut_hashes.push(sib_hash);
535        slot = child;
536    }
537
538    Ok(SubtreePlan {
539        leaf_keys,
540        sibling_cut_hashes,
541    })
542}
543
544/// Build one subtree leaf from its key and the chunk bytes the responder holds.
545#[must_use]
546pub fn subtree_leaf(
547    nonce: &[u8; 32],
548    challenged_peer_id: &[u8; 32],
549    key: &XorName,
550    bytes: &[u8],
551) -> SubtreeLeaf {
552    SubtreeLeaf {
553        key: *key,
554        bytes_hash: *blake3::hash(bytes).as_bytes(),
555        nonced_hash: nonced_leaf_hash(nonce, challenged_peer_id, key, bytes),
556    }
557}
558
559#[cfg(test)]
560#[allow(clippy::unwrap_used, clippy::expect_used, clippy::panic)]
561mod tests {
562    use super::*;
563    use crate::replication::commitment::MerkleTree;
564
565    fn xn_u32(i: u32) -> XorName {
566        let mut k = [0u8; 32];
567        k[..4].copy_from_slice(&i.to_be_bytes()); // big-endian so numeric order == sort order
568        k
569    }
570    fn nonce_of(seed: u8) -> [u8; 32] {
571        [seed; 32]
572    }
573
574    // ---- sqrt_floor -------------------------------------------------------
575
576    #[test]
577    fn sqrt_floor_is_exact_ceil() {
578        assert_eq!(sqrt_floor(1), 1);
579        assert_eq!(sqrt_floor(4), 2);
580        assert_eq!(sqrt_floor(5), 3); // ceil(sqrt(5)) = 3
581        assert_eq!(sqrt_floor(9), 3);
582        assert_eq!(sqrt_floor(10), 4);
583        assert_eq!(sqrt_floor(100), 10);
584        assert_eq!(sqrt_floor(101), 11);
585        assert_eq!(sqrt_floor(1_000_000), 1000);
586    }
587
588    // ---- real_leaves_under ------------------------------------------------
589
590    #[test]
591    fn real_leaves_under_root_is_all() {
592        let d = tree_depth(100).unwrap();
593        assert_eq!(real_leaves_under(0, 0, 100, d), 100);
594    }
595
596    #[test]
597    fn real_leaves_under_padding_slot_is_zero() {
598        // key_count = 5, total_depth = 3 (next_pow2(5)=8). Leaf slots 5,6,7
599        // at the bottom are padding. The right half at depth 1 (slot 1) covers
600        // leaves [4,8) → only leaf 4 is real.
601        let d = tree_depth(5).unwrap();
602        assert_eq!(d, 3);
603        assert_eq!(real_leaves_under(1, 0, 5, d), 4); // [0,4)
604        assert_eq!(real_leaves_under(1, 1, 5, d), 1); // [4,8) ∩ [0,5) = {4}
605        assert_eq!(real_leaves_under(3, 7, 5, d), 0); // pure padding leaf
606        assert_eq!(real_leaves_under(2, 3, 5, d), 0); // [6,8) pure padding
607    }
608
609    // ---- select_subtree_path: dead-block regression -----------------------
610
611    #[test]
612    fn selection_never_empty_across_many_sizes_and_nonces() {
613        for n in [
614            5u32, 6, 7, 9, 13, 17, 33, 65, 100, 129, 333, 1000, 1024, 1025,
615        ] {
616            let floor = sqrt_floor(n);
617            for seed in 0u8..=255 {
618                let path = select_subtree_path(&nonce_of(seed), n).unwrap();
619                assert!(
620                    path.real_leaf_count() >= floor.min(n),
621                    "n={n} seed={seed}: real={} < floor={floor}",
622                    path.real_leaf_count()
623                );
624                assert!(
625                    path.real_leaf_count() >= 1,
626                    "n={n} seed={seed}: empty selection"
627                );
628                assert!(path.leaf_end <= n);
629                assert!(path.leaf_start < path.leaf_end);
630            }
631        }
632    }
633
634    #[test]
635    fn small_trees_select_whole_tree() {
636        for n in 1..=SMALL_TREE_FULL_AUDIT_FLOOR {
637            let path = select_subtree_path(&nonce_of(7), n).unwrap();
638            assert_eq!(path.depth, 0);
639            assert_eq!(path.leaf_start, 0);
640            assert_eq!(path.leaf_end, n);
641        }
642    }
643
644    #[test]
645    fn selection_is_deterministic() {
646        let n = 500;
647        let a = select_subtree_path(&nonce_of(42), n).unwrap();
648        let b = select_subtree_path(&nonce_of(42), n).unwrap();
649        assert_eq!(a, b);
650    }
651
652    #[test]
653    fn different_nonces_cover_different_branches_over_time() {
654        // Not every nonce differs, but the set of selected ranges must be > 1.
655        let n = 1024;
656        let mut starts = std::collections::HashSet::new();
657        for seed in 0u8..=255 {
658            let p = select_subtree_path(&nonce_of(seed), n).unwrap();
659            starts.insert(p.leaf_start);
660        }
661        assert!(
662            starts.len() > 4,
663            "nonce should spread selection: {}",
664            starts.len()
665        );
666    }
667
668    /// Deterministic per-trial nonce (no RNG): hash a counter.
669    fn nonce_for_trial(i: u32) -> [u8; 32] {
670        let mut h = blake3::Hasher::new();
671        h.update(b"detection-sim-trial");
672        h.update(&i.to_le_bytes());
673        *h.finalize().as_bytes()
674    }
675
676    /// Catch rate over `trials` audits: fraction whose nonce-selected subtree
677    /// overlaps at least one deleted leaf index.
678    fn catch_rate(n: u32, deleted: &std::collections::HashSet<u32>, trials: u32) -> f64 {
679        let mut caught = 0u32;
680        for t in 0..trials {
681            let path = select_subtree_path(&nonce_for_trial(t), n).unwrap();
682            if (path.leaf_start..path.leaf_end).any(|i| deleted.contains(&i)) {
683                caught += 1;
684            }
685        }
686        f64::from(caught) / f64::from(trials)
687    }
688
689    #[test]
690    fn detection_uniform_fast_clustered_floor() {
691        // ADR-0002 Validation: uniform deletions are caught fast; clustered
692        // (contiguous-block) deletions are caught at roughly the deleted
693        // fraction per audit (a floor), much slower. This encodes the core
694        // security claim that the audit RATE (not per-audit cleverness) is the
695        // lever against a clustered deleter.
696        let n = 1024u32; // sqrt = 32
697        let del_count = n / 10; // delete 10% ≈ 102
698
699        // Uniform: spread deletions evenly across the keyspace.
700        let uniform: std::collections::HashSet<u32> =
701            (0..del_count).map(|i| (i * n / del_count) % n).collect();
702        let uniform_rate = catch_rate(n, &uniform, 256);
703
704        // Clustered: one contiguous block of the same size.
705        let clustered: std::collections::HashSet<u32> = (0..del_count).collect();
706        let clustered_rate = catch_rate(n, &clustered, 256);
707
708        // Uniform should be caught on essentially every audit (spread across the
709        // whole tree; any selected subtree overlaps some deletion).
710        assert!(
711            uniform_rate > 0.95,
712            "uniform deletions should be caught almost every audit, got {uniform_rate}"
713        );
714        // Clustered (one contiguous f-block) is a floor NEAR the deleted
715        // fraction f=0.1 — the quantitative ADR claim. The exact rate depends on
716        // selection geometry (a block of ~102 leaves is hit when the selected
717        // ~sqrt(N) subtree overlaps it), but it must sit in a tight band around
718        // f, well below the uniform rate. We bound it to [0.04, 0.30].
719        assert!(
720            (0.04..=0.30).contains(&clustered_rate),
721            "clustered catch-rate should be near f=0.1, got {clustered_rate}"
722        );
723        assert!(
724            uniform_rate > clustered_rate * 2.0,
725            "uniform ({uniform_rate}) must be far easier to catch than clustered ({clustered_rate})"
726        );
727    }
728
729    #[test]
730    fn subtree_size_near_sqrt_for_balanced_tree() {
731        // For a power-of-two tree the selection should land near sqrt(N).
732        let n = 1024; // sqrt = 32, floor = 32
733        let path = select_subtree_path(&nonce_of(3), n).unwrap();
734        // It stops as soon as a child would drop below floor; the subtree size
735        // is between floor and 2*floor for a balanced tree.
736        assert!(path.real_leaf_count() >= 32);
737        assert!(
738            path.real_leaf_count() <= 64,
739            "got {}",
740            path.real_leaf_count()
741        );
742    }
743
744    // ---- end-to-end proof build + verify ----------------------------------
745
746    /// Deterministic chunk bytes for a key (test fixture). The tree is built
747    /// from `BLAKE3` of exactly these bytes, so the proof and the committed
748    /// root agree — mirroring how a real responder hashes the chunk it holds.
749    fn chunk_bytes(key: &XorName) -> Vec<u8> {
750        // Distinct, non-trivial bytes derived from the key.
751        let mut v = key.to_vec();
752        v.extend_from_slice(b"chunk-body");
753        v
754    }
755
756    /// Build tree entries `(key, BLAKE3(chunk_bytes(key)))` for `n` keys.
757    fn entries_for(n: u32) -> Vec<(XorName, [u8; 32])> {
758        (0..n)
759            .map(|i| {
760                let key = xn_u32(i);
761                let bytes_hash = *blake3::hash(&chunk_bytes(&key)).as_bytes();
762                (key, bytes_hash)
763            })
764            .collect()
765    }
766
767    /// Reference responder: build a real subtree proof via the production
768    /// [`build_subtree_proof`] from a `MerkleTree` over `entries`. Leaves are
769    /// hashed from `chunk_bytes(key)` — the same bytes whose hash built the
770    /// tree — so an honest proof verifies. This makes the tests exercise the
771    /// exact builder the responder runs.
772    fn build_proof(
773        entries: &[(XorName, [u8; 32])],
774        nonce: &[u8; 32],
775        peer_id: &[u8; 32],
776    ) -> (SubtreeProof, StorageCommitment) {
777        let tree = MerkleTree::build(entries.to_vec()).unwrap();
778        let key_count = tree.key_count();
779        let proof = build_subtree_proof(&tree, nonce, peer_id, |k| Some(chunk_bytes(k))).unwrap();
780        let commitment = fake_commitment(tree.root(), key_count, *peer_id);
781        (proof, commitment)
782    }
783
784    fn fake_commitment(root: [u8; 32], key_count: u32, peer: [u8; 32]) -> StorageCommitment {
785        StorageCommitment {
786            root,
787            key_count,
788            sender_peer_id: peer,
789            sender_public_key: vec![0u8; 1952],
790            signature: vec![0u8; 3293],
791        }
792    }
793
794    #[test]
795    fn honest_proof_verifies_at_many_sizes() {
796        let peer = [0xABu8; 32];
797        for n in [5u32, 8, 13, 17, 64, 100, 256, 1000] {
798            let entries = entries_for(n);
799            for seed in [1u8, 2, 7, 42, 200] {
800                let nonce = nonce_of(seed);
801                let (proof, commitment) = build_proof(&entries, &nonce, &peer);
802                assert_eq!(
803                    verify_subtree_proof(&proof, &nonce, &commitment),
804                    StructureVerdict::Valid,
805                    "n={n} seed={seed}"
806                );
807            }
808        }
809    }
810
811    #[test]
812    fn honest_proof_verifies_for_every_size_and_nonce() {
813        // Regression for the left-packed self-pairing geometry bug: the proof
814        // reconstruction must match the committed root for EVERY key count
815        // (not just powers of two / cherry-picked sizes) and every nonce. An
816        // earlier perfect-tree model false-failed honest nodes for ~70% of
817        // sizes; this guards against any reintroduction.
818        let peer = [7u8; 32];
819        for n in 5u32..=600 {
820            let entries = entries_for(n);
821            for seed in 0u8..32 {
822                let nonce = nonce_of(seed.wrapping_mul(17).wrapping_add(3));
823                let (proof, commitment) = build_proof(&entries, &nonce, &peer);
824                assert_eq!(
825                    verify_subtree_proof(&proof, &nonce, &commitment),
826                    StructureVerdict::Valid,
827                    "honest proof must verify at n={n} seed={seed}"
828                );
829            }
830        }
831    }
832
833    #[test]
834    fn tampered_leaf_breaks_root() {
835        let peer = [9u8; 32];
836        let entries = entries_for(100);
837        let nonce = nonce_of(5);
838        let (mut proof, commitment) = build_proof(&entries, &nonce, &peer);
839        proof.leaves[0].bytes_hash[0] ^= 0x01;
840        assert!(matches!(
841            verify_subtree_proof(&proof, &nonce, &commitment),
842            StructureVerdict::Invalid(_)
843        ));
844    }
845
846    #[test]
847    fn tampered_cut_hash_breaks_root() {
848        let peer = [9u8; 32];
849        let entries = entries_for(256);
850        let nonce = nonce_of(11);
851        let (mut proof, commitment) = build_proof(&entries, &nonce, &peer);
852        if let Some(c) = proof.sibling_cut_hashes.first_mut() {
853            c[0] ^= 0x01;
854        }
855        assert!(matches!(
856            verify_subtree_proof(&proof, &nonce, &commitment),
857            StructureVerdict::Invalid(_)
858        ));
859    }
860
861    #[test]
862    fn wrong_leaf_count_rejected() {
863        let peer = [9u8; 32];
864        let entries = entries_for(100);
865        let nonce = nonce_of(5);
866        let (mut proof, commitment) = build_proof(&entries, &nonce, &peer);
867        proof.leaves.pop();
868        assert_eq!(
869            verify_subtree_proof(&proof, &nonce, &commitment),
870            StructureVerdict::Invalid("wrong leaf count")
871        );
872    }
873
874    #[test]
875    fn non_ascending_leaves_rejected() {
876        let peer = [9u8; 32];
877        let entries = entries_for(100);
878        let nonce = nonce_of(5);
879        let (mut proof, commitment) = build_proof(&entries, &nonce, &peer);
880        if proof.leaves.len() >= 2 {
881            proof.leaves.swap(0, 1);
882        }
883        assert!(matches!(
884            verify_subtree_proof(&proof, &nonce, &commitment),
885            StructureVerdict::Invalid(_)
886        ));
887    }
888
889    // ---- spot-check selection ---------------------------------------------
890
891    #[test]
892    fn spotcheck_indices_in_range_and_distinct() {
893        let n = 1024;
894        let nonce = nonce_of(3);
895        let path = select_subtree_path(&nonce, n).unwrap();
896        let k = 8;
897        let idxs = select_spotcheck_indices(&nonce, &path, k);
898        assert_eq!(
899            u32::try_from(idxs.len()).unwrap(),
900            k.min(path.real_leaf_count())
901        );
902        let mut seen = std::collections::HashSet::new();
903        for i in &idxs {
904            assert!(*i < path.real_leaf_count());
905            assert!(seen.insert(*i), "duplicate spot-check index {i}");
906        }
907    }
908
909    #[test]
910    fn build_proof_reports_missing_bytes() {
911        // A responder that no longer holds a selected, committed key's bytes
912        // must surface MissingBytes (the caller turns this into a confirmed
913        // failure, not a benign rejection).
914        let entries = entries_for(100);
915        let tree = MerkleTree::build(entries).unwrap();
916        let nonce = nonce_of(5);
917        let path = select_subtree_path(&nonce, tree.key_count()).unwrap();
918        let victim = tree.key_at(path.leaf_start as usize).unwrap();
919        let err = build_subtree_proof(&tree, &nonce, &[1u8; 32], |k| {
920            if *k == victim {
921                None
922            } else {
923                Some(chunk_bytes(k))
924            }
925        })
926        .unwrap_err();
927        assert_eq!(err, BuildProofError::MissingBytes { key: victim });
928    }
929
930    #[test]
931    fn spotcheck_returns_all_when_subtree_small() {
932        // Construct a path with few real leaves.
933        let path = SubtreePath {
934            depth: 0,
935            slot: 0,
936            leaf_start: 0,
937            leaf_end: 3,
938        };
939        let idxs = select_spotcheck_indices(&nonce_of(1), &path, 8);
940        assert_eq!(idxs, vec![0, 1, 2]);
941    }
942
943    #[test]
944    fn spotcheck_always_yields_exactly_min_k_n_distinct_indices() {
945        // The byte sample must NEVER be silently smaller than requested: a
946        // short sample weakens round 2 without anyone noticing. Exercise many
947        // nonces, subtree sizes, and k values, and require exactly min(k, n)
948        // distinct in-range indices every time — plus determinism (auditor and
949        // responder must derive the same set).
950        for size in [1u32, 2, 3, 7, 8, 64, 1000] {
951            let path = SubtreePath {
952                depth: 0,
953                slot: 0,
954                leaf_start: 0,
955                leaf_end: size,
956            };
957            for k in [1u32, 3, 5, 8] {
958                for seed in 0..32u8 {
959                    let nonce = nonce_of(seed);
960                    let idxs = select_spotcheck_indices(&nonce, &path, k);
961                    let expected = k.min(path.real_leaf_count()) as usize;
962                    assert_eq!(
963                        idxs.len(),
964                        expected,
965                        "size={size} k={k} seed={seed}: must yield exactly min(k, n)"
966                    );
967                    let mut seen = std::collections::HashSet::new();
968                    for i in &idxs {
969                        assert!(*i < path.real_leaf_count(), "index out of range");
970                        assert!(seen.insert(*i), "duplicate index {i}");
971                    }
972                    assert_eq!(
973                        idxs,
974                        select_spotcheck_indices(&nonce, &path, k),
975                        "selection must be deterministic"
976                    );
977                }
978            }
979        }
980    }
981
982    #[test]
983    fn fabricated_nonced_hash_caught_by_spotcheck_probability() {
984        // Simulate the realness check: a responder fabricates a fraction x of
985        // nonced hashes. The auditor spot-checks k leaves; probability all k
986        // land on honest leaves is (1-x)^k. Here we just assert the auditor
987        // *would* catch a fabricated leaf when it samples that position.
988        let peer = [1u8; 32];
989        let entries = entries_for(400);
990        let nonce = nonce_of(9);
991        let (mut proof, _commitment) = build_proof(&entries, &nonce, &peer);
992        // Fabricate the nonced hash on the first subtree leaf (wrong bytes).
993        proof.leaves[0].nonced_hash[0] ^= 0xFF;
994        // The realness check the caller runs: recompute from the real chunk
995        // bytes (the same fixture the honest tree was built from).
996        let leaf = &proof.leaves[0];
997        let real_bytes = chunk_bytes(&leaf.key);
998        let expected = nonced_leaf_hash(&nonce, &peer, &leaf.key, &real_bytes);
999        assert_ne!(
1000            leaf.nonced_hash, expected,
1001            "fabricated nonced hash must differ from real"
1002        );
1003    }
1004
1005    // ---- branch-substitution attack ---------------------------------------
1006
1007    #[test]
1008    fn responder_cannot_substitute_a_different_branch() {
1009        // ADR-0002 "Subtree selection": the random value alone fixes WHICH
1010        // branch is selected, so "the audited node cannot choose a convenient
1011        // branch to present." This is the load-bearing anti-substitution claim
1012        // and no existing test exercises it — the tamper tests only mangle a
1013        // hash within the *correct* branch.
1014        //
1015        // Attack: the responder builds a fully valid, internally-consistent
1016        // subtree proof for a DIFFERENT nonce (which the selection maps to a
1017        // different branch of the same committed tree), then presents it as the
1018        // answer to the auditor's nonce. Every leaf hash and every cut-hash is
1019        // genuine, the leaves are strictly ascending, and we deliberately pick
1020        // a decoy whose branch has the SAME leaf count and SAME depth as the
1021        // honest branch — so the cheap "wrong leaf count" / "wrong cut-hash
1022        // count" gates do NOT fire. The ONLY thing that can reject it is the
1023        // structural root re-derivation, which climbs using the auditor's
1024        // nonce-derived slot parity and position. It must reject.
1025        let peer = [0x5Au8; 32];
1026        let n = 1024u32; // balanced tree; sqrt floor = 32
1027        let entries = entries_for(n);
1028
1029        let audit_nonce = nonce_of(7);
1030        let audit_path = select_subtree_path(&audit_nonce, n).unwrap();
1031
1032        // Find a decoy nonce whose selected branch is a DIFFERENT slot but the
1033        // SAME depth (hence same real-leaf count for this balanced tree). This
1034        // forces rejection via the root check rather than a count mismatch.
1035        let mut decoy: Option<([u8; 32], SubtreePath)> = None;
1036        for seed in 0u8..=255 {
1037            let cand_nonce = nonce_of(seed);
1038            let cand = select_subtree_path(&cand_nonce, n).unwrap();
1039            if cand.depth == audit_path.depth
1040                && cand.slot != audit_path.slot
1041                && cand.real_leaf_count() == audit_path.real_leaf_count()
1042            {
1043                decoy = Some((cand_nonce, cand));
1044                break;
1045            }
1046        }
1047        let (decoy_nonce, decoy_path) =
1048            decoy.expect("a same-depth, different-slot decoy branch must exist for n=1024");
1049
1050        // Sanity: the decoy really is a different, equally-shaped branch.
1051        assert_ne!(decoy_path.slot, audit_path.slot);
1052        assert_eq!(decoy_path.depth, audit_path.depth);
1053        assert_eq!(decoy_path.real_leaf_count(), audit_path.real_leaf_count());
1054
1055        // The responder builds a genuine proof for the DECOY branch. Note the
1056        // nonced hashes are built with the decoy nonce too — but that does not
1057        // matter: the structural check below never inspects nonced hashes, and
1058        // the attack must already die on structure.
1059        let tree = MerkleTree::build(entries).unwrap();
1060        let decoy_proof =
1061            build_subtree_proof(&tree, &decoy_nonce, &peer, |k| Some(chunk_bytes(k))).unwrap();
1062
1063        // Pin the auditor's commitment to the genuine root of the same tree.
1064        let commitment = fake_commitment(tree.root(), n, peer);
1065
1066        // The honest answer to the SAME commitment + decoy nonce verifies, so
1067        // the proof itself is well-formed — it is only "wrong" relative to the
1068        // auditor's nonce.
1069        assert_eq!(
1070            verify_subtree_proof(&decoy_proof, &decoy_nonce, &commitment),
1071            StructureVerdict::Valid,
1072            "the decoy proof must be a genuinely valid proof for its own nonce"
1073        );
1074
1075        // The attack: present the decoy-branch proof against the AUDIT nonce.
1076        // The count gates cannot fire (same depth + leaf count by construction),
1077        // so this is the root re-derivation rejecting a substituted branch.
1078        let verdict = verify_subtree_proof(&decoy_proof, &audit_nonce, &commitment);
1079        assert_eq!(
1080            verdict,
1081            StructureVerdict::Invalid("root mismatch"),
1082            "substituting a different valid branch must be rejected by the root check, got {verdict:?}"
1083        );
1084    }
1085}
ant_node/replication/subtree.rs

ant_node/replication/
subtree.rs