Skip to main content

rouchdb_core/
rev_tree.rs

1/// Revision tree data structure.
2///
3/// Mirrors PouchDB's `pouchdb-merge` tree representation. A document's full
4/// history is a `RevTree` — a list of `RevPath` roots. Each root has a
5/// starting position (`pos`) and a tree of `RevNode`s.
6///
7/// Multiple roots arise when revisions are stemmed (pruned) and later a
8/// previously-stemmed branch is re-introduced during replication.
9/// Status of a revision's stored data.
10#[derive(Debug, Clone, PartialEq, Eq)]
11pub enum RevStatus {
12    /// Full document data is stored for this revision.
13    Available,
14    /// This revision was compacted; only the hash remains.
15    Missing,
16}
17
18/// A single node in the revision tree.
19#[derive(Debug, Clone)]
20pub struct RevNode {
21    /// The hash portion of the revision id.
22    pub hash: String,
23    /// Whether this revision's data is still stored.
24    pub status: RevStatus,
25    /// Optional metadata attached to this node (e.g., `_deleted` flag).
26    pub opts: NodeOpts,
27    /// Child revisions. Multiple children represent a conflict branch.
28    pub children: Vec<RevNode>,
29}
30
31/// Per-node metadata flags.
32#[derive(Debug, Clone, Default)]
33pub struct NodeOpts {
34    pub deleted: bool,
35}
36
37/// A rooted path in the revision tree.
38///
39/// `pos` is the generation number of the root node. For example, if the
40/// earliest stored revision is `3-abc`, then `pos = 3`.
41#[derive(Debug, Clone)]
42pub struct RevPath {
43    pub pos: u64,
44    pub tree: RevNode,
45}
46
47/// The complete revision tree for a document. Multiple entries occur when
48/// stemming creates disjoint subtrees.
49pub type RevTree = Vec<RevPath>;
50
51/// Information about a leaf node in the tree.
52#[derive(Debug, Clone)]
53pub struct LeafInfo {
54    pub pos: u64,
55    pub hash: String,
56    pub deleted: bool,
57    pub status: RevStatus,
58}
59
60impl LeafInfo {
61    pub fn rev_string(&self) -> String {
62        format!("{}-{}", self.pos, self.hash)
63    }
64}
65
66// ---------------------------------------------------------------------------
67// Traversal helpers
68// ---------------------------------------------------------------------------
69
70/// Depth-first traversal of the revision tree, calling `f` for each node.
71/// The callback receives `(depth_from_root, node, root_pos)`.
72pub fn traverse_rev_tree<F>(tree: &RevTree, mut f: F)
73where
74    F: FnMut(u64, &RevNode, u64),
75{
76    for path in tree {
77        // Stack: (node, depth_from_root)
78        let mut stack: Vec<(&RevNode, u64)> = vec![(&path.tree, 0)];
79        while let Some((node, depth)) = stack.pop() {
80            f(path.pos + depth, node, path.pos);
81            for child in &node.children {
82                stack.push((child, depth + 1));
83            }
84        }
85    }
86}
87
88/// Collect all leaf nodes (nodes with no children) from the tree.
89/// Returns them sorted by: non-deleted first, then highest pos, then
90/// lexicographic hash — matching CouchDB's deterministic order.
91pub fn collect_leaves(tree: &RevTree) -> Vec<LeafInfo> {
92    let mut leaves = Vec::new();
93    traverse_rev_tree(tree, |pos, node, _root_pos| {
94        if node.children.is_empty() {
95            leaves.push(LeafInfo {
96                pos,
97                hash: node.hash.clone(),
98                deleted: node.opts.deleted,
99                status: node.status.clone(),
100            });
101        }
102    });
103    // Sort: non-deleted first, then by pos desc, then hash desc
104    leaves.sort_by(|a, b| {
105        a.deleted
106            .cmp(&b.deleted)
107            .then_with(|| b.pos.cmp(&a.pos))
108            .then_with(|| b.hash.cmp(&a.hash))
109    });
110    leaves
111}
112
113/// Decompose the tree into all root-to-leaf paths.
114type LeafPath = (u64, Vec<(String, NodeOpts, RevStatus)>);
115
116pub fn root_to_leaf(tree: &RevTree) -> Vec<LeafPath> {
117    let mut paths = Vec::new();
118    for path in tree {
119        fn walk(
120            node: &RevNode,
121            current: &mut Vec<(String, NodeOpts, RevStatus)>,
122            paths: &mut Vec<Vec<(String, NodeOpts, RevStatus)>>,
123        ) {
124            current.push((node.hash.clone(), node.opts.clone(), node.status.clone()));
125            if node.children.is_empty() {
126                paths.push(current.clone());
127            } else {
128                for child in &node.children {
129                    walk(child, current, paths);
130                }
131            }
132            current.pop();
133        }
134        let mut current = Vec::new();
135        let mut collected = Vec::new();
136        walk(&path.tree, &mut current, &mut collected);
137        for p in collected {
138            paths.push((path.pos, p));
139        }
140    }
141    paths
142}
143
144/// Check if a specific revision exists anywhere in the tree.
145pub fn rev_exists(tree: &RevTree, pos: u64, hash: &str) -> bool {
146    let mut found = false;
147    traverse_rev_tree(tree, |node_pos, node, _| {
148        if node_pos == pos && node.hash == hash {
149            found = true;
150        }
151    });
152    found
153}
154
155// ---------------------------------------------------------------------------
156// Building paths from revision arrays (for merging incoming revisions)
157// ---------------------------------------------------------------------------
158
159/// Build a single-path `RevPath` from a list of revision hashes.
160///
161/// `revs` is oldest-first: `[oldest_hash, ..., newest_hash]`.
162/// `pos` is the generation of the *newest* (leaf) revision.
163/// `opts` are the metadata flags for the *leaf* node.
164/// `status` is applied to the leaf; all ancestors are `Missing`.
165pub fn build_path_from_revs(
166    pos: u64,
167    revs: &[String],
168    opts: NodeOpts,
169    status: RevStatus,
170) -> RevPath {
171    assert!(!revs.is_empty());
172    let len = revs.len() as u64;
173    let root_pos = pos - len + 1;
174
175    // Build from leaf to root
176    let mut node: Option<RevNode> = None;
177    for (i, hash) in revs.iter().enumerate() {
178        let is_leaf = i == 0;
179        let n = RevNode {
180            hash: hash.clone(),
181            status: if is_leaf {
182                status.clone()
183            } else {
184                RevStatus::Missing
185            },
186            opts: if is_leaf {
187                opts.clone()
188            } else {
189                NodeOpts::default()
190            },
191            children: node.into_iter().collect(),
192        };
193        node = Some(n);
194    }
195
196    RevPath {
197        pos: root_pos,
198        tree: node.unwrap(),
199    }
200}
201
202// ---------------------------------------------------------------------------
203// Ancestry lookup (for replication — provides _revisions data)
204// ---------------------------------------------------------------------------
205
206/// Find the revision ancestry chain for a specific revision in the tree.
207///
208/// Returns hashes from leaf to root: `[target_hash, parent_hash, grandparent_hash, ...]`
209/// or `None` if the revision is not found in the tree.
210pub fn find_rev_ancestry(
211    tree: &RevTree,
212    target_pos: u64,
213    target_hash: &str,
214) -> Option<Vec<String>> {
215    for path in tree {
216        if let Some(chain) = find_chain_in_node(&path.tree, path.pos, target_pos, target_hash) {
217            return Some(chain);
218        }
219    }
220    None
221}
222
223fn find_chain_in_node(
224    node: &RevNode,
225    current_pos: u64,
226    target_pos: u64,
227    target_hash: &str,
228) -> Option<Vec<String>> {
229    if current_pos == target_pos && node.hash == target_hash {
230        return Some(vec![node.hash.clone()]);
231    }
232
233    for child in &node.children {
234        if let Some(mut chain) = find_chain_in_node(child, current_pos + 1, target_pos, target_hash)
235        {
236            chain.push(node.hash.clone());
237            return Some(chain);
238        }
239    }
240
241    None
242}
243
244// ---------------------------------------------------------------------------
245// Tests
246// ---------------------------------------------------------------------------
247
248#[cfg(test)]
249mod tests {
250    use super::*;
251
252    fn leaf(hash: &str) -> RevNode {
253        RevNode {
254            hash: hash.into(),
255            status: RevStatus::Available,
256            opts: NodeOpts::default(),
257            children: vec![],
258        }
259    }
260
261    fn node(hash: &str, children: Vec<RevNode>) -> RevNode {
262        RevNode {
263            hash: hash.into(),
264            status: RevStatus::Available,
265            opts: NodeOpts::default(),
266            children,
267        }
268    }
269
270    #[test]
271    fn collect_leaves_simple_chain() {
272        // 1-a -> 2-b -> 3-c
273        let tree = vec![RevPath {
274            pos: 1,
275            tree: node("a", vec![node("b", vec![leaf("c")])]),
276        }];
277        let leaves = collect_leaves(&tree);
278        assert_eq!(leaves.len(), 1);
279        assert_eq!(leaves[0].pos, 3);
280        assert_eq!(leaves[0].hash, "c");
281    }
282
283    #[test]
284    fn collect_leaves_with_conflict() {
285        // 1-a -> 2-b (branch 1)
286        //     -> 2-c (branch 2)
287        let tree = vec![RevPath {
288            pos: 1,
289            tree: node("a", vec![leaf("b"), leaf("c")]),
290        }];
291        let leaves = collect_leaves(&tree);
292        assert_eq!(leaves.len(), 2);
293        // Sorted by hash desc: c before b
294        assert_eq!(leaves[0].hash, "c");
295        assert_eq!(leaves[1].hash, "b");
296    }
297
298    #[test]
299    fn rev_exists_finds_nodes() {
300        let tree = vec![RevPath {
301            pos: 1,
302            tree: node("a", vec![leaf("b")]),
303        }];
304        assert!(rev_exists(&tree, 1, "a"));
305        assert!(rev_exists(&tree, 2, "b"));
306        assert!(!rev_exists(&tree, 1, "z"));
307        assert!(!rev_exists(&tree, 3, "a"));
308    }
309
310    #[test]
311    fn root_to_leaf_paths() {
312        // 1-a -> 2-b -> 3-c
313        //            -> 3-d
314        let tree = vec![RevPath {
315            pos: 1,
316            tree: node("a", vec![node("b", vec![leaf("c"), leaf("d")])]),
317        }];
318        let paths = root_to_leaf(&tree);
319        assert_eq!(paths.len(), 2);
320        assert_eq!(paths[0].0, 1); // root pos
321        assert_eq!(paths[0].1.len(), 3); // a, b, c
322        assert_eq!(paths[1].1.len(), 3); // a, b, d
323    }
324
325    #[test]
326    fn find_rev_ancestry_linear_chain() {
327        // 1-a -> 2-b -> 3-c
328        let tree = vec![RevPath {
329            pos: 1,
330            tree: node("a", vec![node("b", vec![leaf("c")])]),
331        }];
332        let ancestry = find_rev_ancestry(&tree, 3, "c").unwrap();
333        assert_eq!(ancestry, vec!["c", "b", "a"]);
334
335        let ancestry = find_rev_ancestry(&tree, 2, "b").unwrap();
336        assert_eq!(ancestry, vec!["b", "a"]);
337
338        let ancestry = find_rev_ancestry(&tree, 1, "a").unwrap();
339        assert_eq!(ancestry, vec!["a"]);
340
341        assert!(find_rev_ancestry(&tree, 3, "z").is_none());
342    }
343
344    #[test]
345    fn build_path_from_revs_works() {
346        // revs: ["c", "b", "a"] (leaf first), pos: 3
347        let path = build_path_from_revs(
348            3,
349            &["c".into(), "b".into(), "a".into()],
350            NodeOpts::default(),
351            RevStatus::Available,
352        );
353        assert_eq!(path.pos, 1);
354        assert_eq!(path.tree.hash, "a");
355        assert_eq!(path.tree.status, RevStatus::Missing);
356        assert_eq!(path.tree.children.len(), 1);
357        assert_eq!(path.tree.children[0].hash, "b");
358        assert_eq!(path.tree.children[0].children[0].hash, "c");
359        assert_eq!(
360            path.tree.children[0].children[0].status,
361            RevStatus::Available
362        );
363    }
364}