Skip to main content

rouchdb_core/
rev_tree.rs

1/// Revision tree data structure.
2///
3/// Mirrors PouchDB's `pouchdb-merge` tree representation. A document's full
4/// history is a `RevTree` — a list of `RevPath` roots. Each root has a
5/// starting position (`pos`) and a tree of `RevNode`s.
6///
7/// Multiple roots arise when revisions are stemmed (pruned) and later a
8/// previously-stemmed branch is re-introduced during replication.
9
10/// Status of a revision's stored data.
11#[derive(Debug, Clone, PartialEq, Eq)]
12pub enum RevStatus {
13    /// Full document data is stored for this revision.
14    Available,
15    /// This revision was compacted; only the hash remains.
16    Missing,
17}
18
19/// A single node in the revision tree.
20#[derive(Debug, Clone)]
21pub struct RevNode {
22    /// The hash portion of the revision id.
23    pub hash: String,
24    /// Whether this revision's data is still stored.
25    pub status: RevStatus,
26    /// Optional metadata attached to this node (e.g., `_deleted` flag).
27    pub opts: NodeOpts,
28    /// Child revisions. Multiple children represent a conflict branch.
29    pub children: Vec<RevNode>,
30}
31
32/// Per-node metadata flags.
33#[derive(Debug, Clone, Default)]
34pub struct NodeOpts {
35    pub deleted: bool,
36}
37
38/// A rooted path in the revision tree.
39///
40/// `pos` is the generation number of the root node. For example, if the
41/// earliest stored revision is `3-abc`, then `pos = 3`.
42#[derive(Debug, Clone)]
43pub struct RevPath {
44    pub pos: u64,
45    pub tree: RevNode,
46}
47
48/// The complete revision tree for a document. Multiple entries occur when
49/// stemming creates disjoint subtrees.
50pub type RevTree = Vec<RevPath>;
51
52/// Information about a leaf node in the tree.
53#[derive(Debug, Clone)]
54pub struct LeafInfo {
55    pub pos: u64,
56    pub hash: String,
57    pub deleted: bool,
58    pub status: RevStatus,
59}
60
61impl LeafInfo {
62    pub fn rev_string(&self) -> String {
63        format!("{}-{}", self.pos, self.hash)
64    }
65}
66
67// ---------------------------------------------------------------------------
68// Traversal helpers
69// ---------------------------------------------------------------------------
70
71/// Depth-first traversal of the revision tree, calling `f` for each node.
72/// The callback receives `(depth_from_root, node, root_pos)`.
73pub fn traverse_rev_tree<F>(tree: &RevTree, mut f: F)
74where
75    F: FnMut(u64, &RevNode, u64),
76{
77    for path in tree {
78        // Stack: (node, depth_from_root)
79        let mut stack: Vec<(&RevNode, u64)> = vec![(&path.tree, 0)];
80        while let Some((node, depth)) = stack.pop() {
81            f(path.pos + depth, node, path.pos);
82            for child in &node.children {
83                stack.push((child, depth + 1));
84            }
85        }
86    }
87}
88
89/// Collect all leaf nodes (nodes with no children) from the tree.
90/// Returns them sorted by: non-deleted first, then highest pos, then
91/// lexicographic hash — matching CouchDB's deterministic order.
92pub fn collect_leaves(tree: &RevTree) -> Vec<LeafInfo> {
93    let mut leaves = Vec::new();
94    traverse_rev_tree(tree, |pos, node, _root_pos| {
95        if node.children.is_empty() {
96            leaves.push(LeafInfo {
97                pos,
98                hash: node.hash.clone(),
99                deleted: node.opts.deleted,
100                status: node.status.clone(),
101            });
102        }
103    });
104    // Sort: non-deleted first, then by pos desc, then hash desc
105    leaves.sort_by(|a, b| {
106        a.deleted
107            .cmp(&b.deleted)
108            .then_with(|| b.pos.cmp(&a.pos))
109            .then_with(|| b.hash.cmp(&a.hash))
110    });
111    leaves
112}
113
114/// Decompose the tree into all root-to-leaf paths.
115pub fn root_to_leaf(tree: &RevTree) -> Vec<(u64, Vec<(String, NodeOpts, RevStatus)>)> {
116    let mut paths = Vec::new();
117    for path in tree {
118        fn walk(
119            node: &RevNode,
120            current: &mut Vec<(String, NodeOpts, RevStatus)>,
121            paths: &mut Vec<Vec<(String, NodeOpts, RevStatus)>>,
122        ) {
123            current.push((node.hash.clone(), node.opts.clone(), node.status.clone()));
124            if node.children.is_empty() {
125                paths.push(current.clone());
126            } else {
127                for child in &node.children {
128                    walk(child, current, paths);
129                }
130            }
131            current.pop();
132        }
133        let mut current = Vec::new();
134        let mut collected = Vec::new();
135        walk(&path.tree, &mut current, &mut collected);
136        for p in collected {
137            paths.push((path.pos, p));
138        }
139    }
140    paths
141}
142
143/// Check if a specific revision exists anywhere in the tree.
144pub fn rev_exists(tree: &RevTree, pos: u64, hash: &str) -> bool {
145    let mut found = false;
146    traverse_rev_tree(tree, |node_pos, node, _| {
147        if node_pos == pos && node.hash == hash {
148            found = true;
149        }
150    });
151    found
152}
153
154// ---------------------------------------------------------------------------
155// Building paths from revision arrays (for merging incoming revisions)
156// ---------------------------------------------------------------------------
157
158/// Build a single-path `RevPath` from a list of revision hashes.
159///
160/// `revs` is oldest-first: `[oldest_hash, ..., newest_hash]`.
161/// `pos` is the generation of the *newest* (leaf) revision.
162/// `opts` are the metadata flags for the *leaf* node.
163/// `status` is applied to the leaf; all ancestors are `Missing`.
164pub fn build_path_from_revs(
165    pos: u64,
166    revs: &[String],
167    opts: NodeOpts,
168    status: RevStatus,
169) -> RevPath {
170    assert!(!revs.is_empty());
171    let len = revs.len() as u64;
172    let root_pos = pos - len + 1;
173
174    // Build from leaf to root
175    let mut node: Option<RevNode> = None;
176    for (i, hash) in revs.iter().enumerate() {
177        let is_leaf = i == 0;
178        let n = RevNode {
179            hash: hash.clone(),
180            status: if is_leaf {
181                status.clone()
182            } else {
183                RevStatus::Missing
184            },
185            opts: if is_leaf {
186                opts.clone()
187            } else {
188                NodeOpts::default()
189            },
190            children: node.into_iter().collect(),
191        };
192        node = Some(n);
193    }
194
195    RevPath {
196        pos: root_pos,
197        tree: node.unwrap(),
198    }
199}
200
201// ---------------------------------------------------------------------------
202// Ancestry lookup (for replication — provides _revisions data)
203// ---------------------------------------------------------------------------
204
205/// Find the revision ancestry chain for a specific revision in the tree.
206///
207/// Returns hashes from leaf to root: `[target_hash, parent_hash, grandparent_hash, ...]`
208/// or `None` if the revision is not found in the tree.
209pub fn find_rev_ancestry(tree: &RevTree, target_pos: u64, target_hash: &str) -> Option<Vec<String>> {
210    for path in tree {
211        if let Some(chain) = find_chain_in_node(&path.tree, path.pos, target_pos, target_hash) {
212            return Some(chain);
213        }
214    }
215    None
216}
217
218fn find_chain_in_node(
219    node: &RevNode,
220    current_pos: u64,
221    target_pos: u64,
222    target_hash: &str,
223) -> Option<Vec<String>> {
224    if current_pos == target_pos && node.hash == target_hash {
225        return Some(vec![node.hash.clone()]);
226    }
227
228    for child in &node.children {
229        if let Some(mut chain) = find_chain_in_node(child, current_pos + 1, target_pos, target_hash)
230        {
231            chain.push(node.hash.clone());
232            return Some(chain);
233        }
234    }
235
236    None
237}
238
239// ---------------------------------------------------------------------------
240// Tests
241// ---------------------------------------------------------------------------
242
243#[cfg(test)]
244mod tests {
245    use super::*;
246
247    fn leaf(hash: &str) -> RevNode {
248        RevNode {
249            hash: hash.into(),
250            status: RevStatus::Available,
251            opts: NodeOpts::default(),
252            children: vec![],
253        }
254    }
255
256    fn node(hash: &str, children: Vec<RevNode>) -> RevNode {
257        RevNode {
258            hash: hash.into(),
259            status: RevStatus::Available,
260            opts: NodeOpts::default(),
261            children,
262        }
263    }
264
265    #[test]
266    fn collect_leaves_simple_chain() {
267        // 1-a -> 2-b -> 3-c
268        let tree = vec![RevPath {
269            pos: 1,
270            tree: node("a", vec![node("b", vec![leaf("c")])]),
271        }];
272        let leaves = collect_leaves(&tree);
273        assert_eq!(leaves.len(), 1);
274        assert_eq!(leaves[0].pos, 3);
275        assert_eq!(leaves[0].hash, "c");
276    }
277
278    #[test]
279    fn collect_leaves_with_conflict() {
280        // 1-a -> 2-b (branch 1)
281        //     -> 2-c (branch 2)
282        let tree = vec![RevPath {
283            pos: 1,
284            tree: node("a", vec![leaf("b"), leaf("c")]),
285        }];
286        let leaves = collect_leaves(&tree);
287        assert_eq!(leaves.len(), 2);
288        // Sorted by hash desc: c before b
289        assert_eq!(leaves[0].hash, "c");
290        assert_eq!(leaves[1].hash, "b");
291    }
292
293    #[test]
294    fn rev_exists_finds_nodes() {
295        let tree = vec![RevPath {
296            pos: 1,
297            tree: node("a", vec![leaf("b")]),
298        }];
299        assert!(rev_exists(&tree, 1, "a"));
300        assert!(rev_exists(&tree, 2, "b"));
301        assert!(!rev_exists(&tree, 1, "z"));
302        assert!(!rev_exists(&tree, 3, "a"));
303    }
304
305    #[test]
306    fn root_to_leaf_paths() {
307        // 1-a -> 2-b -> 3-c
308        //            -> 3-d
309        let tree = vec![RevPath {
310            pos: 1,
311            tree: node("a", vec![node("b", vec![leaf("c"), leaf("d")])]),
312        }];
313        let paths = root_to_leaf(&tree);
314        assert_eq!(paths.len(), 2);
315        assert_eq!(paths[0].0, 1); // root pos
316        assert_eq!(paths[0].1.len(), 3); // a, b, c
317        assert_eq!(paths[1].1.len(), 3); // a, b, d
318    }
319
320    #[test]
321    fn find_rev_ancestry_linear_chain() {
322        // 1-a -> 2-b -> 3-c
323        let tree = vec![RevPath {
324            pos: 1,
325            tree: node("a", vec![node("b", vec![leaf("c")])]),
326        }];
327        let ancestry = find_rev_ancestry(&tree, 3, "c").unwrap();
328        assert_eq!(ancestry, vec!["c", "b", "a"]);
329
330        let ancestry = find_rev_ancestry(&tree, 2, "b").unwrap();
331        assert_eq!(ancestry, vec!["b", "a"]);
332
333        let ancestry = find_rev_ancestry(&tree, 1, "a").unwrap();
334        assert_eq!(ancestry, vec!["a"]);
335
336        assert!(find_rev_ancestry(&tree, 3, "z").is_none());
337    }
338
339    #[test]
340    fn build_path_from_revs_works() {
341        // revs: ["c", "b", "a"] (leaf first), pos: 3
342        let path = build_path_from_revs(
343            3,
344            &["c".into(), "b".into(), "a".into()],
345            NodeOpts::default(),
346            RevStatus::Available,
347        );
348        assert_eq!(path.pos, 1);
349        assert_eq!(path.tree.hash, "a");
350        assert_eq!(path.tree.status, RevStatus::Missing);
351        assert_eq!(path.tree.children.len(), 1);
352        assert_eq!(path.tree.children[0].hash, "b");
353        assert_eq!(path.tree.children[0].children[0].hash, "c");
354        assert_eq!(
355            path.tree.children[0].children[0].status,
356            RevStatus::Available
357        );
358    }
359}