Skip to main content

rouchdb_core/
rev_tree.rs

1/// Revision tree data structure.
2///
3/// Mirrors PouchDB's `pouchdb-merge` tree representation. A document's full
4/// history is a `RevTree` — a list of `RevPath` roots. Each root has a
5/// starting position (`pos`) and a tree of `RevNode`s.
6///
7/// Multiple roots arise when revisions are stemmed (pruned) and later a
8/// previously-stemmed branch is re-introduced during replication.
9/// Status of a revision's stored data.
10#[derive(Debug, Clone, PartialEq, Eq)]
11pub enum RevStatus {
12    /// Full document data is stored for this revision.
13    Available,
14    /// This revision was compacted; only the hash remains.
15    Missing,
16}
17
18/// A single node in the revision tree.
19#[derive(Debug, Clone)]
20pub struct RevNode {
21    /// The hash portion of the revision id.
22    pub hash: String,
23    /// Whether this revision's data is still stored.
24    pub status: RevStatus,
25    /// Optional metadata attached to this node (e.g., `_deleted` flag).
26    pub opts: NodeOpts,
27    /// Child revisions. Multiple children represent a conflict branch.
28    pub children: Vec<RevNode>,
29}
30
31/// Per-node metadata flags.
32#[derive(Debug, Clone, Default)]
33pub struct NodeOpts {
34    pub deleted: bool,
35}
36
37/// A rooted path in the revision tree.
38///
39/// `pos` is the generation number of the root node. For example, if the
40/// earliest stored revision is `3-abc`, then `pos = 3`.
41#[derive(Debug, Clone)]
42pub struct RevPath {
43    pub pos: u64,
44    pub tree: RevNode,
45}
46
47/// The complete revision tree for a document. Multiple entries occur when
48/// stemming creates disjoint subtrees.
49pub type RevTree = Vec<RevPath>;
50
51/// Information about a leaf node in the tree.
52#[derive(Debug, Clone)]
53pub struct LeafInfo {
54    pub pos: u64,
55    pub hash: String,
56    pub deleted: bool,
57    pub status: RevStatus,
58}
59
60impl LeafInfo {
61    pub fn rev_string(&self) -> String {
62        format!("{}-{}", self.pos, self.hash)
63    }
64}
65
66// ---------------------------------------------------------------------------
67// Traversal helpers
68// ---------------------------------------------------------------------------
69
70/// Depth-first traversal of the revision tree, calling `f` for each node.
71/// The callback receives `(depth_from_root, node, root_pos)`.
72pub fn traverse_rev_tree<F>(tree: &RevTree, mut f: F)
73where
74    F: FnMut(u64, &RevNode, u64),
75{
76    for path in tree {
77        // Stack: (node, depth_from_root)
78        let mut stack: Vec<(&RevNode, u64)> = vec![(&path.tree, 0)];
79        while let Some((node, depth)) = stack.pop() {
80            f(path.pos + depth, node, path.pos);
81            for child in &node.children {
82                stack.push((child, depth + 1));
83            }
84        }
85    }
86}
87
88/// Collect all leaf nodes (nodes with no children) from the tree.
89/// Returns them sorted by: non-deleted first, then highest pos, then
90/// lexicographic hash — matching CouchDB's deterministic order.
91pub fn collect_leaves(tree: &RevTree) -> Vec<LeafInfo> {
92    let mut leaves = Vec::new();
93    traverse_rev_tree(tree, |pos, node, _root_pos| {
94        if node.children.is_empty() {
95            leaves.push(LeafInfo {
96                pos,
97                hash: node.hash.clone(),
98                deleted: node.opts.deleted,
99                status: node.status.clone(),
100            });
101        }
102    });
103    // Sort: non-deleted first, then by pos desc, then hash desc
104    leaves.sort_by(|a, b| {
105        a.deleted
106            .cmp(&b.deleted)
107            .then_with(|| b.pos.cmp(&a.pos))
108            .then_with(|| b.hash.cmp(&a.hash))
109    });
110    leaves
111}
112
113/// Decompose the tree into all root-to-leaf paths.
114type LeafPath = (u64, Vec<(String, NodeOpts, RevStatus)>);
115
116pub fn root_to_leaf(tree: &RevTree) -> Vec<LeafPath> {
117    let mut paths = Vec::new();
118    for path in tree {
119        fn walk(
120            node: &RevNode,
121            current: &mut Vec<(String, NodeOpts, RevStatus)>,
122            paths: &mut Vec<Vec<(String, NodeOpts, RevStatus)>>,
123        ) {
124            current.push((node.hash.clone(), node.opts.clone(), node.status.clone()));
125            if node.children.is_empty() {
126                paths.push(current.clone());
127            } else {
128                for child in &node.children {
129                    walk(child, current, paths);
130                }
131            }
132            current.pop();
133        }
134        let mut current = Vec::new();
135        let mut collected = Vec::new();
136        walk(&path.tree, &mut current, &mut collected);
137        for p in collected {
138            paths.push((path.pos, p));
139        }
140    }
141    paths
142}
143
144/// Check if a specific revision exists anywhere in the tree.
145pub fn rev_exists(tree: &RevTree, pos: u64, hash: &str) -> bool {
146    let mut found = false;
147    traverse_rev_tree(tree, |node_pos, node, _| {
148        if node_pos == pos && node.hash == hash {
149            found = true;
150        }
151    });
152    found
153}
154
155// ---------------------------------------------------------------------------
156// Building paths from revision arrays (for merging incoming revisions)
157// ---------------------------------------------------------------------------
158
159/// Build a single-path `RevPath` from a list of revision hashes.
160///
161/// `revs` is oldest-first: `[oldest_hash, ..., newest_hash]`.
162/// `pos` is the generation of the *newest* (leaf) revision.
163/// `opts` are the metadata flags for the *leaf* node.
164/// `status` is applied to the leaf; all ancestors are `Missing`.
165pub fn build_path_from_revs(
166    pos: u64,
167    revs: &[String],
168    opts: NodeOpts,
169    status: RevStatus,
170) -> RevPath {
171    if revs.is_empty() {
172        // Return a degenerate single-node path rather than panicking.
173        return RevPath {
174            pos,
175            tree: RevNode {
176                hash: String::new(),
177                status,
178                opts,
179                children: vec![],
180            },
181        };
182    }
183    let len = revs.len() as u64;
184    let root_pos = pos.saturating_sub(len.saturating_sub(1));
185
186    // Build from leaf to root
187    let mut node: Option<RevNode> = None;
188    for (i, hash) in revs.iter().enumerate() {
189        let is_leaf = i == 0;
190        let n = RevNode {
191            hash: hash.clone(),
192            status: if is_leaf {
193                status.clone()
194            } else {
195                RevStatus::Missing
196            },
197            opts: if is_leaf {
198                opts.clone()
199            } else {
200                NodeOpts::default()
201            },
202            children: node.into_iter().collect(),
203        };
204        node = Some(n);
205    }
206
207    RevPath {
208        pos: root_pos,
209        tree: node.expect("node must exist after building from non-empty revs"),
210    }
211}
212
213// ---------------------------------------------------------------------------
214// Ancestry lookup (for replication — provides _revisions data)
215// ---------------------------------------------------------------------------
216
217/// Find the revision ancestry chain for a specific revision in the tree.
218///
219/// Returns hashes from leaf to root: `[target_hash, parent_hash, grandparent_hash, ...]`
220/// or `None` if the revision is not found in the tree.
221pub fn find_rev_ancestry(
222    tree: &RevTree,
223    target_pos: u64,
224    target_hash: &str,
225) -> Option<Vec<String>> {
226    for path in tree {
227        if let Some(chain) = find_chain_in_node(&path.tree, path.pos, target_pos, target_hash) {
228            return Some(chain);
229        }
230    }
231    None
232}
233
234fn find_chain_in_node(
235    node: &RevNode,
236    current_pos: u64,
237    target_pos: u64,
238    target_hash: &str,
239) -> Option<Vec<String>> {
240    if current_pos == target_pos && node.hash == target_hash {
241        return Some(vec![node.hash.clone()]);
242    }
243
244    for child in &node.children {
245        if let Some(mut chain) = find_chain_in_node(child, current_pos + 1, target_pos, target_hash)
246        {
247            chain.push(node.hash.clone());
248            return Some(chain);
249        }
250    }
251
252    None
253}
254
255// ---------------------------------------------------------------------------
256// Tests
257// ---------------------------------------------------------------------------
258
259#[cfg(test)]
260mod tests {
261    use super::*;
262
263    fn leaf(hash: &str) -> RevNode {
264        RevNode {
265            hash: hash.into(),
266            status: RevStatus::Available,
267            opts: NodeOpts::default(),
268            children: vec![],
269        }
270    }
271
272    fn node(hash: &str, children: Vec<RevNode>) -> RevNode {
273        RevNode {
274            hash: hash.into(),
275            status: RevStatus::Available,
276            opts: NodeOpts::default(),
277            children,
278        }
279    }
280
281    #[test]
282    fn collect_leaves_simple_chain() {
283        // 1-a -> 2-b -> 3-c
284        let tree = vec![RevPath {
285            pos: 1,
286            tree: node("a", vec![node("b", vec![leaf("c")])]),
287        }];
288        let leaves = collect_leaves(&tree);
289        assert_eq!(leaves.len(), 1);
290        assert_eq!(leaves[0].pos, 3);
291        assert_eq!(leaves[0].hash, "c");
292    }
293
294    #[test]
295    fn collect_leaves_with_conflict() {
296        // 1-a -> 2-b (branch 1)
297        //     -> 2-c (branch 2)
298        let tree = vec![RevPath {
299            pos: 1,
300            tree: node("a", vec![leaf("b"), leaf("c")]),
301        }];
302        let leaves = collect_leaves(&tree);
303        assert_eq!(leaves.len(), 2);
304        // Sorted by hash desc: c before b
305        assert_eq!(leaves[0].hash, "c");
306        assert_eq!(leaves[1].hash, "b");
307    }
308
309    #[test]
310    fn rev_exists_finds_nodes() {
311        let tree = vec![RevPath {
312            pos: 1,
313            tree: node("a", vec![leaf("b")]),
314        }];
315        assert!(rev_exists(&tree, 1, "a"));
316        assert!(rev_exists(&tree, 2, "b"));
317        assert!(!rev_exists(&tree, 1, "z"));
318        assert!(!rev_exists(&tree, 3, "a"));
319    }
320
321    #[test]
322    fn root_to_leaf_paths() {
323        // 1-a -> 2-b -> 3-c
324        //            -> 3-d
325        let tree = vec![RevPath {
326            pos: 1,
327            tree: node("a", vec![node("b", vec![leaf("c"), leaf("d")])]),
328        }];
329        let paths = root_to_leaf(&tree);
330        assert_eq!(paths.len(), 2);
331        assert_eq!(paths[0].0, 1); // root pos
332        assert_eq!(paths[0].1.len(), 3); // a, b, c
333        assert_eq!(paths[1].1.len(), 3); // a, b, d
334    }
335
336    #[test]
337    fn find_rev_ancestry_linear_chain() {
338        // 1-a -> 2-b -> 3-c
339        let tree = vec![RevPath {
340            pos: 1,
341            tree: node("a", vec![node("b", vec![leaf("c")])]),
342        }];
343        let ancestry = find_rev_ancestry(&tree, 3, "c").unwrap();
344        assert_eq!(ancestry, vec!["c", "b", "a"]);
345
346        let ancestry = find_rev_ancestry(&tree, 2, "b").unwrap();
347        assert_eq!(ancestry, vec!["b", "a"]);
348
349        let ancestry = find_rev_ancestry(&tree, 1, "a").unwrap();
350        assert_eq!(ancestry, vec!["a"]);
351
352        assert!(find_rev_ancestry(&tree, 3, "z").is_none());
353    }
354
355    #[test]
356    fn build_path_from_revs_works() {
357        // revs: ["c", "b", "a"] (leaf first), pos: 3
358        let path = build_path_from_revs(
359            3,
360            &["c".into(), "b".into(), "a".into()],
361            NodeOpts::default(),
362            RevStatus::Available,
363        );
364        assert_eq!(path.pos, 1);
365        assert_eq!(path.tree.hash, "a");
366        assert_eq!(path.tree.status, RevStatus::Missing);
367        assert_eq!(path.tree.children.len(), 1);
368        assert_eq!(path.tree.children[0].hash, "b");
369        assert_eq!(path.tree.children[0].children[0].hash, "c");
370        assert_eq!(
371            path.tree.children[0].children[0].status,
372            RevStatus::Available
373        );
374    }
375}