Skip to main content

bee/manifest/
node.rs

1//! `MantarayNode`, `Fork`, and the trie operations (`add_fork`,
2//! `find`, `find_closest`, `remove_fork`). The marshal / unmarshal
3//! pair lives in [`crate::manifest::wire`].
4
5use std::collections::BTreeMap;
6
7use crate::swarm::{Error, Reference};
8
9use super::helpers::common_prefix;
10
11// ---- type bitfield (used by the wire format) ---------------------------
12
13/// Type bit: this node has a non-null target address.
14pub const TYPE_VALUE: u8 = 2;
15/// Type bit: this node has child forks.
16pub const TYPE_EDGE: u8 = 4;
17/// Type bit: this node's path contains a non-leading `/`.
18pub const TYPE_WITH_PATH_SEPARATOR: u8 = 8;
19/// Type bit: this fork carries metadata.
20pub const TYPE_WITH_METADATA: u8 = 16;
21
22/// Path separator byte.
23pub const PATH_SEPARATOR: u8 = b'/';
24
25/// Per-fork prefix length cap. Longer paths are split into chained
26/// nodes with prefixes ≤ [`MAX_PREFIX_LENGTH`] each.
27pub const MAX_PREFIX_LENGTH: usize = 30;
28
29/// All-zero 32-byte target address — the canonical "no target" value.
30pub const NULL_ADDRESS: [u8; 32] = [0; 32];
31
32/// True iff `b` is empty or all zeros (the canonical null target).
33pub fn is_null_address(b: &[u8]) -> bool {
34    b.is_empty() || b.iter().all(|&x| x == 0)
35}
36
37// ---- types -------------------------------------------------------------
38
39/// One edge of the trie. `prefix` is the path bytes that select this
40/// child; `node` is the child node.
41#[derive(Clone, Debug, PartialEq, Eq)]
42pub struct Fork {
43    /// Path bytes selecting this child.
44    pub prefix: Vec<u8>,
45    /// Child node.
46    pub node: MantarayNode,
47}
48
49/// One node of the Mantaray trie. Field names mirror bee-js / bee-go.
50///
51/// - `obfuscation_key`: 32-byte XOR mask used when serializing this
52///   node. Freshly-constructed nodes use a zero key.
53/// - `self_address`: chunk address of the marshaled node, populated
54///   after [`crate::manifest::wire::calculate_self_address`] /
55///   `save_recursively`. `None` means "not computed".
56/// - `target_address`: file reference at this node, or `NULL_ADDRESS`
57///   for pure directory nodes.
58/// - `path`: edge label inherited from the parent fork's prefix; the
59///   root has an empty path.
60/// - `forks`: child edges keyed by first prefix byte (ordered by key).
61/// - `metadata`: optional key/value annotations carried in the wire
62///   format; ordered by key so JSON serialization is deterministic.
63#[derive(Clone, Debug, PartialEq, Eq)]
64pub struct MantarayNode {
65    /// 32-byte XOR mask used during marshal.
66    pub obfuscation_key: [u8; 32],
67    /// Chunk address of the marshaled node, when known.
68    pub self_address: Option<[u8; 32]>,
69    /// File reference at this node (32 bytes; `NULL_ADDRESS` = none).
70    pub target_address: [u8; 32],
71    /// Optional metadata annotations.
72    pub metadata: Option<BTreeMap<String, String>>,
73    /// Edge label inherited from the parent fork.
74    pub path: Vec<u8>,
75    /// Child edges keyed by first prefix byte.
76    pub forks: BTreeMap<u8, Fork>,
77}
78
79impl Default for MantarayNode {
80    fn default() -> Self {
81        Self::new()
82    }
83}
84
85impl MantarayNode {
86    /// Empty Mantaray root.
87    pub fn new() -> Self {
88        Self {
89            obfuscation_key: [0; 32],
90            self_address: None,
91            target_address: NULL_ADDRESS,
92            metadata: None,
93            path: Vec::new(),
94            forks: BTreeMap::new(),
95        }
96    }
97
98    /// True iff this node's `target_address` is the null address.
99    pub fn is_null_target(&self) -> bool {
100        is_null_address(&self.target_address)
101    }
102
103    /// Pack the type bitfield used in this node's outgoing fork
104    /// record.
105    pub fn determine_type(&self) -> u8 {
106        let mut t = 0u8;
107        let path_is_root_dir = self.path.len() == 1 && self.path[0] == PATH_SEPARATOR;
108
109        if !self.is_null_target() || path_is_root_dir {
110            t |= TYPE_VALUE;
111        }
112        if !self.forks.is_empty() {
113            t |= TYPE_EDGE;
114        }
115        if self.path.contains(&PATH_SEPARATOR) && !path_is_root_dir {
116            t |= TYPE_WITH_PATH_SEPARATOR;
117        }
118        if self.metadata.is_some() {
119            t |= TYPE_WITH_METADATA;
120        }
121        t
122    }
123
124    // ---- trie traversal -------------------------------------------------
125
126    /// Walk down forks while `path` matches. Returns the deepest node
127    /// reached and the bytes of `path` matched along the way.
128    pub fn find_closest<'a>(&'a self, path: &[u8]) -> (&'a MantarayNode, Vec<u8>) {
129        let mut cur = self;
130        let mut matched: Vec<u8> = Vec::new();
131        let mut remaining = path;
132        loop {
133            if remaining.is_empty() {
134                return (cur, matched);
135            }
136            let Some(fork) = cur.forks.get(&remaining[0]) else {
137                return (cur, matched);
138            };
139            let common = common_prefix(&fork.prefix, remaining);
140            if common.len() == fork.prefix.len() {
141                matched.extend_from_slice(&fork.prefix);
142                remaining = &remaining[fork.prefix.len()..];
143                cur = &fork.node;
144                continue;
145            }
146            return (cur, matched);
147        }
148    }
149
150    /// Return the node whose full path equals `path`, or `None`.
151    pub fn find(&self, path: &[u8]) -> Option<&MantarayNode> {
152        let (closest, matched) = self.find_closest(path);
153        if matched.len() == path.len() {
154            Some(closest)
155        } else {
156            None
157        }
158    }
159
160    // ---- collect helpers ------------------------------------------------
161
162    /// All descendants whose `target_address` is non-null, with their
163    /// full paths. The path is the concatenation of every fork prefix
164    /// from the root down to the node.
165    pub fn collect(&self) -> Vec<(Vec<u8>, &MantarayNode)> {
166        let mut out = Vec::new();
167        self.collect_into(&[], &mut out);
168        out
169    }
170
171    fn collect_into<'a>(&'a self, prefix: &[u8], out: &mut Vec<(Vec<u8>, &'a MantarayNode)>) {
172        for fork in self.forks.values() {
173            let mut full = prefix.to_vec();
174            full.extend_from_slice(&fork.prefix);
175            if !fork.node.is_null_target() {
176                out.push((full.clone(), &fork.node));
177            }
178            fork.node.collect_into(&full, out);
179        }
180    }
181
182    /// `{full_path: reference}` for every leaf with a valid reference.
183    /// Mirrors bee-go `CollectAndMap`.
184    pub fn collect_and_map(&self) -> std::collections::HashMap<String, Reference> {
185        let mut out = std::collections::HashMap::new();
186        for (path, node) in self.collect() {
187            if let Ok(r) = Reference::new(&node.target_address) {
188                out.insert(String::from_utf8_lossy(&path).into_owned(), r);
189            }
190        }
191        out
192    }
193
194    // ---- mutation: add_fork --------------------------------------------
195
196    /// Insert a `(path, target, metadata)` entry. Long paths are
197    /// chunked into chained nodes of up to [`MAX_PREFIX_LENGTH`]
198    /// bytes each. Metadata is attached to the terminal node only.
199    ///
200    /// Invalidates `self_address` on every node touched along the
201    /// way; call `calculate_self_address` (or `save_recursively`)
202    /// before marshaling.
203    pub fn add_fork(
204        &mut self,
205        path: &[u8],
206        target: Option<&Reference>,
207        metadata: Option<&BTreeMap<String, String>>,
208    ) {
209        self.self_address = None;
210        self.add_fork_inner(path, target, metadata);
211    }
212
213    fn add_fork_inner(
214        &mut self,
215        path: &[u8],
216        target: Option<&Reference>,
217        metadata: Option<&BTreeMap<String, String>>,
218    ) {
219        if path.is_empty() {
220            // Empty path: setting target/metadata on this node directly.
221            // Bee-go's AddFork doesn't ever call into this with empty path
222            // because chunks are always non-empty; this branch covers the
223            // case where a fork's prefix equals path exactly.
224            if let Some(r) = target {
225                let bytes = r.as_bytes();
226                let n = bytes.len().min(32);
227                let mut ta = NULL_ADDRESS;
228                ta[..n].copy_from_slice(&bytes[..n]);
229                self.target_address = ta;
230            }
231            if let Some(m) = metadata {
232                self.metadata = Some(m.clone());
233            }
234            return;
235        }
236
237        let chunk_len = path.len().min(MAX_PREFIX_LENGTH);
238        let chunk: Vec<u8> = path[..chunk_len].to_vec();
239        let after: Vec<u8> = path[chunk_len..].to_vec();
240        let is_last = after.is_empty();
241        let key = chunk[0];
242
243        if let Some(existing) = self.forks.get(&key) {
244            let common_len = common_prefix(&existing.prefix, &chunk).len();
245            if common_len == existing.prefix.len() {
246                // Existing fork's prefix is a prefix of (or equals) chunk;
247                // descend into the existing node with whatever's left of
248                // chunk plus `after`.
249                let mut next: Vec<u8> = chunk[common_len..].to_vec();
250                next.extend_from_slice(&after);
251                let existing_mut = self.forks.get_mut(&key).unwrap();
252                existing_mut.node.self_address = None;
253                existing_mut.node.add_fork_inner(&next, target, metadata);
254                return;
255            }
256            // Split: common_len < existing.prefix.len(). Take ownership
257            // of the existing fork, build a new fork for `chunk` (leaf
258            // values applied only if this chunk is the last), and merge.
259            let existing_fork = self.forks.remove(&key).unwrap();
260            let new_node = make_subtree_node_for_chunk(&chunk, is_last, target, metadata);
261            let new_fork = Fork {
262                prefix: chunk.clone(),
263                node: new_node,
264            };
265            let merged = split_forks(new_fork, existing_fork);
266            self.forks.insert(key, merged);
267
268            if !is_last {
269                // Descend to the node corresponding to `chunk` and continue
270                // adding `after` underneath it.
271                let merged_ref = self.forks.get_mut(&key).unwrap();
272                if merged_ref.prefix == chunk {
273                    // Case 1: chunk became parent of the existing fork.
274                    merged_ref.node.add_fork_inner(&after, target, metadata);
275                } else {
276                    // Case 3: branch became parent; chunk's node sits one
277                    // level below, keyed by chunk[merged_ref.prefix.len()].
278                    let descend_byte = chunk[merged_ref.prefix.len()];
279                    let chunk_fork = merged_ref
280                        .node
281                        .forks
282                        .get_mut(&descend_byte)
283                        .expect("split must produce chunk fork");
284                    chunk_fork.node.add_fork_inner(&after, target, metadata);
285                }
286            }
287            return;
288        }
289
290        // No existing fork at this key — insert a fresh one and recurse if
291        // there's still more path left.
292        let new_node = make_subtree_node_for_chunk(&chunk, is_last, target, metadata);
293        self.forks.insert(
294            key,
295            Fork {
296                prefix: chunk,
297                node: new_node,
298            },
299        );
300        if !is_last {
301            let inserted = &mut self.forks.get_mut(&key).unwrap().node;
302            inserted.add_fork_inner(&after, target, metadata);
303        }
304    }
305
306    // ---- mutation: remove_fork -----------------------------------------
307
308    /// Delete the fork rooted at `path`. If the removed node had
309    /// children, they are re-inserted under the parent so the trie
310    /// stays consistent.
311    pub fn remove_fork(&mut self, path: &[u8]) -> Result<(), Error> {
312        if path.is_empty() {
313            return Err(Error::argument("remove_fork: path cannot be empty"));
314        }
315        if self.find(path).is_none() {
316            return Err(Error::argument("remove_fork: not found"));
317        }
318        self.self_address = None;
319
320        // Walk to the parent, recording the prefixes consumed at each
321        // step so we can climb back down with mutable access.
322        let mut chain: Vec<u8> = Vec::new(); // sequence of fork keys
323        {
324            let mut cur: &MantarayNode = self;
325            let mut remaining: &[u8] = path;
326            while !remaining.is_empty() {
327                let key = remaining[0];
328                let fork = cur
329                    .forks
330                    .get(&key)
331                    .ok_or_else(|| Error::argument("remove_fork: chain broken"))?;
332                let consumed = fork.prefix.len();
333                chain.push(key);
334                remaining = &remaining[consumed..];
335                cur = &fork.node;
336            }
337        }
338
339        let last_key = *chain.last().expect("chain non-empty when find found path");
340
341        // Climb back down with mutable access to the parent.
342        let mut parent: &mut MantarayNode = self;
343        for key in &chain[..chain.len() - 1] {
344            parent = &mut parent
345                .forks
346                .get_mut(key)
347                .ok_or_else(|| Error::argument("remove_fork: chain broken"))?
348                .node;
349        }
350        let removed = parent
351            .forks
352            .remove(&last_key)
353            .ok_or_else(|| Error::argument("remove_fork: missing fork"))?;
354        parent.self_address = None;
355
356        // Re-insert each grandchild under self (root) using the
357        // original full path of the removed node + each grandchild's
358        // prefix. For each grandchild we know:
359        //   * its prefix == full path of grandchild relative to removed node
360        //   * its node may itself have grandchildren, so we recurse via
361        //     its own collect to enumerate every leaf.
362        for fork in removed.node.forks.into_values() {
363            reinsert_subtree(self, path, &fork.prefix, fork.node);
364        }
365        Ok(())
366    }
367}
368
369// ---- helpers used by add_fork / remove_fork ----------------------------
370
371fn make_subtree_node_for_chunk(
372    chunk: &[u8],
373    is_last: bool,
374    target: Option<&Reference>,
375    metadata: Option<&BTreeMap<String, String>>,
376) -> MantarayNode {
377    let mut n = MantarayNode::new();
378    n.path = chunk.to_vec();
379    if is_last {
380        if let Some(r) = target {
381            let bytes = r.as_bytes();
382            let len = bytes.len().min(32);
383            n.target_address[..len].copy_from_slice(&bytes[..len]);
384        }
385        if let Some(m) = metadata {
386            n.metadata = Some(m.clone());
387        }
388    }
389    n
390}
391
392/// Re-insert `subtree` into `root` under the path
393/// `path_prefix_under_root || subtree_prefix`. Walks the subtree's
394/// own children and re-inserts each leaf separately so the
395/// re-insertion process passes through `add_fork`'s normal split
396/// logic.
397fn reinsert_subtree(
398    root: &mut MantarayNode,
399    path_prefix_under_root: &[u8],
400    subtree_prefix: &[u8],
401    subtree: MantarayNode,
402) {
403    // Its own leaf, if any.
404    let mut full_path = path_prefix_under_root.to_vec();
405    full_path.extend_from_slice(subtree_prefix);
406
407    if !subtree.is_null_target() {
408        let target = Reference::new(&subtree.target_address).ok();
409        root.add_fork(&full_path, target.as_ref(), subtree.metadata.as_ref());
410    } else if subtree.metadata.is_some() {
411        // Metadata-only intermediate — preserve.
412        root.add_fork(&full_path, None, subtree.metadata.as_ref());
413    }
414
415    // Recurse into subtree's children.
416    for fork in subtree.forks.into_values() {
417        reinsert_subtree(root, &full_path, &fork.prefix, fork.node);
418    }
419}
420
421// ---- fork split (resolves prefix collisions) ---------------------------
422
423/// Resolve a prefix collision between two forks at the same first
424/// byte. Returns the fork the parent should store, reshuffling node
425/// paths as needed to keep the patricia invariant. Mirrors bee-js
426/// `Fork.split`.
427pub(super) fn split_forks(mut a: Fork, mut b: Fork) -> Fork {
428    let common = common_prefix(&a.prefix, &b.prefix).to_vec();
429
430    if common.len() == a.prefix.len() {
431        // b lives under a.
432        let remaining = b.prefix[common.len()..].to_vec();
433        b.node.path = remaining.clone();
434        b.prefix = remaining;
435        a.node.forks.insert(b.prefix[0], b);
436        return a;
437    }
438    if common.len() == b.prefix.len() {
439        // a lives under b.
440        let remaining = a.prefix[common.len()..].to_vec();
441        a.node.path = remaining.clone();
442        a.prefix = remaining;
443        b.node.forks.insert(a.prefix[0], a);
444        return b;
445    }
446
447    // Neither contains the other: insert a branching node.
448    let mut branch = MantarayNode::new();
449    branch.path = common.clone();
450    let a_remaining = a.prefix[common.len()..].to_vec();
451    let b_remaining = b.prefix[common.len()..].to_vec();
452    a.node.path = a_remaining.clone();
453    b.node.path = b_remaining.clone();
454    a.prefix = a_remaining.clone();
455    b.prefix = b_remaining.clone();
456    branch.forks.insert(a.prefix[0], a);
457    branch.forks.insert(b.prefix[0], b);
458
459    Fork {
460        prefix: common,
461        node: branch,
462    }
463}
464
465#[cfg(test)]
466mod tests {
467    use super::*;
468
469    fn r32(byte: u8) -> Reference {
470        Reference::new(&[byte; 32]).unwrap()
471    }
472
473    #[test]
474    fn add_and_find_simple() {
475        let mut root = MantarayNode::new();
476        root.add_fork(b"hello", Some(&r32(0x11)), None);
477        let n = root.find(b"hello").unwrap();
478        assert_eq!(n.target_address, [0x11; 32]);
479    }
480
481    #[test]
482    fn find_closest_returns_match_only_when_descended() {
483        // bee-go / bee-js semantics: matched is the path actually consumed
484        // by descending into existing forks. If a fork's prefix diverges
485        // from the search path before being fully consumed, we don't
486        // descend and the matched length stays at the last fully-consumed
487        // boundary.
488        let mut root = MantarayNode::new();
489        root.add_fork(b"hello", Some(&r32(0x11)), None);
490        // "helicopter" diverges inside fork prefix "hello" → no descent.
491        let (_, matched) = root.find_closest(b"helicopter");
492        assert_eq!(matched, b"");
493        // "hello/world" descends through fork "hello" and stops there.
494        let (_, matched) = root.find_closest(b"hello/world");
495        assert_eq!(matched, b"hello");
496    }
497
498    #[test]
499    fn add_two_paths_share_prefix() {
500        let mut root = MantarayNode::new();
501        root.add_fork(b"hello", Some(&r32(0x11)), None);
502        root.add_fork(b"help", Some(&r32(0x22)), None);
503        assert_eq!(root.find(b"hello").unwrap().target_address, [0x11; 32]);
504        assert_eq!(root.find(b"help").unwrap().target_address, [0x22; 32]);
505    }
506
507    #[test]
508    fn long_path_chunks_into_chained_nodes() {
509        let mut root = MantarayNode::new();
510        let path = vec![b'a'; 100]; // > MAX_PREFIX_LENGTH
511        root.add_fork(&path, Some(&r32(0x33)), None);
512        assert_eq!(root.find(&path).unwrap().target_address, [0x33; 32]);
513    }
514
515    #[test]
516    fn remove_fork_removes_and_keeps_other_paths() {
517        let mut root = MantarayNode::new();
518        root.add_fork(b"foo/a", Some(&r32(0xaa)), None);
519        root.add_fork(b"foo/b", Some(&r32(0xbb)), None);
520        root.remove_fork(b"foo/a").unwrap();
521        assert!(root.find(b"foo/a").is_none());
522        assert_eq!(root.find(b"foo/b").unwrap().target_address, [0xbb; 32]);
523    }
524
525    #[test]
526    fn remove_fork_reinserts_grandchildren() {
527        // Add three leaves so the removed leaf has siblings whose
528        // common-prefix structure must be rebuilt after re-insertion.
529        let mut root = MantarayNode::new();
530        root.add_fork(b"a/b/c", Some(&r32(0x01)), None);
531        root.add_fork(b"a/b/d", Some(&r32(0x02)), None);
532        root.add_fork(b"a/x", Some(&r32(0x03)), None);
533        // Remove an exact leaf path; the trie should still contain
534        // every other leaf.
535        root.remove_fork(b"a/b/c").unwrap();
536        assert!(root.find(b"a/b/c").is_none());
537        assert_eq!(root.find(b"a/b/d").unwrap().target_address, [0x02; 32]);
538        assert_eq!(root.find(b"a/x").unwrap().target_address, [0x03; 32]);
539    }
540
541    #[test]
542    fn remove_fork_unknown_path_errors() {
543        let mut root = MantarayNode::new();
544        root.add_fork(b"hello", Some(&r32(0x01)), None);
545        assert!(root.remove_fork(b"world").is_err());
546    }
547
548    #[test]
549    fn collect_returns_full_paths_for_leaves() {
550        let mut root = MantarayNode::new();
551        root.add_fork(b"a", Some(&r32(0x01)), None);
552        root.add_fork(b"ab", Some(&r32(0x02)), None);
553        root.add_fork(b"abc", Some(&r32(0x03)), None);
554        let mut paths: Vec<Vec<u8>> = root.collect().into_iter().map(|(p, _)| p).collect();
555        paths.sort();
556        assert_eq!(paths, vec![b"a".to_vec(), b"ab".to_vec(), b"abc".to_vec()]);
557    }
558
559    #[test]
560    fn collect_and_map_returns_string_keyed_refs() {
561        let mut root = MantarayNode::new();
562        root.add_fork(b"hello.txt", Some(&r32(0xee)), None);
563        let map = root.collect_and_map();
564        assert_eq!(map.len(), 1);
565        let r = map.get("hello.txt").unwrap();
566        assert_eq!(r.as_bytes(), &[0xee; 32]);
567    }
568
569    #[test]
570    fn determine_type_packs_bits() {
571        let mut root = MantarayNode::new();
572        root.add_fork(b"hello", Some(&r32(0x11)), None);
573        let leaf = root.find(b"hello").unwrap();
574        let t = leaf.determine_type();
575        assert!(t & TYPE_VALUE != 0);
576        assert!(t & TYPE_EDGE == 0);
577    }
578
579    #[test]
580    fn metadata_is_preserved() {
581        let mut root = MantarayNode::new();
582        let meta: BTreeMap<String, String> =
583            [("k".to_string(), "v".to_string())].into_iter().collect();
584        root.add_fork(b"foo", Some(&r32(0xaa)), Some(&meta));
585        assert_eq!(root.find(b"foo").unwrap().metadata, Some(meta));
586    }
587}