Skip to main content

merge_engine/
vsa.rs

1//! Version Space Algebra (VSA) for merge conflict resolution.
2//!
3//! Implements the approach from Zhu & He (OOPSLA 2018) / AutoMerge.
4//! When structured merge detects a conflict, VSA enumerates all possible
5//! resolution candidates by combining edit operations from both sides.
6//!
7//! The three-kind taxonomy of AST nodes maps to VSA as follows:
8//! - **Leaf**: trivially a single-element version space
9//! - **Constructed node**: cross-product (Join) of children version spaces
10//! - **List node**: extended with List Join for variable-length combinations
11//!
12//! Candidates are ranked by parent similarity (Campos Junior et al., TOSEM 2025)
13//! and enumerated lazily from highest to lowest score.
14
15use crate::matcher::tree_similarity;
16use crate::types::{
17    Confidence, CstNode, ListOrdering, MergeScenario, ResolutionCandidate, ResolutionStrategy,
18};
19
20/// A version space representing a set of possible AST subtrees.
21#[derive(Debug, Clone)]
22pub enum VersionSpace {
23    /// A single concrete tree (leaf of the version space).
24    Atom(CstNode),
25    /// Cross product: pick one child from each sub-space.
26    Join {
27        kind: String,
28        children: Vec<VersionSpace>,
29    },
30    /// Union: pick from either sub-space.
31    Union(Vec<VersionSpace>),
32    /// List join: ordered combination of sub-spaces, allowing interleaving.
33    ListJoin {
34        kind: String,
35        ordering: ListOrdering,
36        left_items: Vec<VersionSpace>,
37        right_items: Vec<VersionSpace>,
38        base_items: Vec<VersionSpace>,
39    },
40}
41
42impl VersionSpace {
43    /// Count the total number of candidate programs in this version space.
44    /// Returns None if the count is too large (> threshold).
45    pub fn count(&self, max: usize) -> Option<usize> {
46        match self {
47            VersionSpace::Atom(_) => Some(1),
48            VersionSpace::Join { children, .. } => {
49                let mut total = 1usize;
50                for child in children {
51                    let c = child.count(max)?;
52                    total = total.checked_mul(c)?;
53                    if total > max {
54                        return None;
55                    }
56                }
57                Some(total)
58            }
59            VersionSpace::Union(options) => {
60                let mut total = 0usize;
61                for opt in options {
62                    let c = opt.count(max)?;
63                    total = total.checked_add(c)?;
64                    if total > max {
65                        return None;
66                    }
67                }
68                Some(total)
69            }
70            VersionSpace::ListJoin {
71                left_items,
72                right_items,
73                base_items,
74                ..
75            } => {
76                // Conservative estimate: each list merge has multiple interleavings
77                let n = left_items.len() + right_items.len() + base_items.len();
78                if n > 20 {
79                    return None;
80                }
81                // Number of interleavings is bounded by C(l+r, l) * product of item counts
82                Some(2usize.pow(n.min(30) as u32).min(max))
83            }
84        }
85    }
86
87    /// Enumerate all concrete trees in this version space, up to a limit.
88    pub fn enumerate(&self, max: usize) -> Vec<CstNode> {
89        let mut results = Vec::new();
90        self.enumerate_inner(&mut results, max);
91        results
92    }
93
94    fn enumerate_inner(&self, out: &mut Vec<CstNode>, max: usize) {
95        if out.len() >= max {
96            return;
97        }
98        match self {
99            VersionSpace::Atom(node) => {
100                out.push(node.clone());
101            }
102            VersionSpace::Join { kind, children } => {
103                // Cross product of all children
104                let child_options: Vec<Vec<CstNode>> =
105                    children.iter().map(|c| c.enumerate(max)).collect();
106
107                let mut combos = vec![Vec::new()];
108                for options in &child_options {
109                    let mut new_combos = Vec::new();
110                    for combo in &combos {
111                        for opt in options {
112                            if new_combos.len() >= max {
113                                break;
114                            }
115                            let mut new_combo = combo.clone();
116                            new_combo.push(opt.clone());
117                            new_combos.push(new_combo);
118                        }
119                    }
120                    combos = new_combos;
121                }
122
123                for combo in combos {
124                    if out.len() >= max {
125                        break;
126                    }
127                    out.push(CstNode::Constructed {
128                        id: 0,
129                        kind: kind.clone(),
130                        children: combo,
131                    });
132                }
133            }
134            VersionSpace::Union(options) => {
135                for opt in options {
136                    if out.len() >= max {
137                        break;
138                    }
139                    opt.enumerate_inner(out, max);
140                }
141            }
142            VersionSpace::ListJoin {
143                kind,
144                ordering,
145                left_items,
146                right_items,
147                base_items,
148            } => {
149                // Generate candidate lists by interleaving left and right additions
150                // while preserving relative order within each side.
151                let left_nodes: Vec<Vec<CstNode>> =
152                    left_items.iter().map(|vs| vs.enumerate(max)).collect();
153                let right_nodes: Vec<Vec<CstNode>> =
154                    right_items.iter().map(|vs| vs.enumerate(max)).collect();
155                let base_nodes: Vec<CstNode> =
156                    base_items.iter().flat_map(|vs| vs.enumerate(1)).collect();
157
158                // Strategy 1: left before right
159                let mut children1 = base_nodes.clone();
160                for items in &left_nodes {
161                    if let Some(item) = items.first() {
162                        children1.push(item.clone());
163                    }
164                }
165                for items in &right_nodes {
166                    if let Some(item) = items.first() {
167                        children1.push(item.clone());
168                    }
169                }
170                out.push(CstNode::List {
171                    id: 0,
172                    kind: kind.clone(),
173                    ordering: *ordering,
174                    children: children1,
175                });
176
177                if out.len() < max {
178                    // Strategy 2: right before left
179                    let mut children2 = base_nodes;
180                    for items in &right_nodes {
181                        if let Some(item) = items.first() {
182                            children2.push(item.clone());
183                        }
184                    }
185                    for items in &left_nodes {
186                        if let Some(item) = items.first() {
187                            children2.push(item.clone());
188                        }
189                    }
190                    out.push(CstNode::List {
191                        id: 0,
192                        kind: kind.clone(),
193                        ordering: *ordering,
194                        children: children2,
195                    });
196                }
197            }
198        }
199    }
200}
201
202/// Construct a version space from a conflict scenario.
203///
204/// Given base, left, right subtrees that are in conflict, builds a VSA
205/// that represents all plausible resolutions by combining edits from
206/// both sides. Follows Zhu & He's conversion rules.
207pub fn build_version_space(scenario: &MergeScenario<&CstNode>) -> VersionSpace {
208    let base = scenario.base;
209    let left = scenario.left;
210    let right = scenario.right;
211
212    // If both changed to the same thing, the version space is just that
213    if left.structurally_equal(right) {
214        return VersionSpace::Atom(left.clone());
215    }
216
217    // For leaf nodes: the space is the union of both alternatives
218    if base.is_leaf() && left.is_leaf() && right.is_leaf() {
219        return VersionSpace::Union(vec![
220            VersionSpace::Atom(left.clone()),
221            VersionSpace::Atom(right.clone()),
222            VersionSpace::Atom(base.clone()),
223        ]);
224    }
225
226    // For list nodes: use ListJoin to combine both sides' edits
227    if !base.is_leaf() && !left.is_leaf() && !right.is_leaf() {
228        let base_children = base.children();
229        let left_children = left.children();
230        let right_children = right.children();
231
232        // Identify which children are shared vs. unique to each side
233        let mut base_items = Vec::new();
234        let mut left_only = Vec::new();
235        let mut right_only = Vec::new();
236
237        // Simple heuristic: classify children as base/left-only/right-only
238        let mut left_matched = vec![false; left_children.len()];
239        let mut right_matched = vec![false; right_children.len()];
240
241        for bc in base_children {
242            let in_left = left_children
243                .iter()
244                .enumerate()
245                .find(|(i, lc)| !left_matched[*i] && bc.structurally_equal(lc));
246            let in_right = right_children
247                .iter()
248                .enumerate()
249                .find(|(i, rc)| !right_matched[*i] && bc.structurally_equal(rc));
250
251            if let Some((li, _)) = in_left {
252                left_matched[li] = true;
253            }
254            if let Some((ri, _)) = in_right {
255                right_matched[ri] = true;
256            }
257
258            base_items.push(VersionSpace::Atom(bc.clone()));
259        }
260
261        for (i, lc) in left_children.iter().enumerate() {
262            if !left_matched[i] {
263                left_only.push(VersionSpace::Atom(lc.clone()));
264            }
265        }
266        for (i, rc) in right_children.iter().enumerate() {
267            if !right_matched[i] {
268                right_only.push(VersionSpace::Atom(rc.clone()));
269            }
270        }
271
272        let ordering = match base {
273            CstNode::List { ordering, .. } => *ordering,
274            _ => ListOrdering::Ordered,
275        };
276
277        return VersionSpace::ListJoin {
278            kind: base.kind().to_string(),
279            ordering,
280            left_items: left_only,
281            right_items: right_only,
282            base_items,
283        };
284    }
285
286    // Fallback: union of all three versions
287    VersionSpace::Union(vec![
288        VersionSpace::Atom(left.clone()),
289        VersionSpace::Atom(right.clone()),
290        VersionSpace::Atom(base.clone()),
291    ])
292}
293
294/// Rank VSA candidates using parent similarity heuristic.
295///
296/// From Campos Junior et al. (TOSEM 2025): the correct resolution tends to
297/// be similar to both parents (left and right). We score each candidate by
298/// its combined similarity to both parents, normalized by the candidate's size.
299pub fn rank_candidates(
300    candidates: Vec<CstNode>,
301    scenario: &MergeScenario<&CstNode>,
302) -> Vec<ResolutionCandidate> {
303    let mut scored: Vec<(CstNode, f64)> = candidates
304        .into_iter()
305        .map(|candidate| {
306            let left_sim = tree_similarity(&candidate, scenario.left) as f64;
307            let right_sim = tree_similarity(&candidate, scenario.right) as f64;
308            let base_sim = tree_similarity(&candidate, scenario.base) as f64;
309
310            // Parent similarity fitness function (Campos Junior 2025):
311            // Maximize similarity to both parents while diverging from base
312            // (since the resolution should incorporate changes, not revert to base)
313            let parent_similarity = left_sim + right_sim;
314            let base_penalty = base_sim * 0.5;
315            let size_norm = candidate.size().max(1) as f64;
316
317            let score = (parent_similarity - base_penalty) / size_norm;
318            (candidate, score)
319        })
320        .collect();
321
322    // Sort by score descending
323    scored.sort_by(|a, b| b.1.partial_cmp(&a.1).unwrap_or(std::cmp::Ordering::Equal));
324
325    // Remove duplicates
326    let mut seen = Vec::new();
327    scored.retain(|(candidate, _)| {
328        let source = candidate.to_source();
329        if seen.contains(&source) {
330            false
331        } else {
332            seen.push(source);
333            true
334        }
335    });
336
337    scored
338        .into_iter()
339        .enumerate()
340        .map(|(i, (candidate, _score))| {
341            let confidence = if i == 0 {
342                Confidence::Medium
343            } else {
344                Confidence::Low
345            };
346            ResolutionCandidate {
347                content: candidate.to_source(),
348                confidence,
349                strategy: ResolutionStrategy::VersionSpaceAlgebra,
350            }
351        })
352        .collect()
353}
354
355/// Full VSA resolution pipeline: build space → enumerate → rank → return best.
356pub fn resolve_via_vsa(
357    scenario: &MergeScenario<&CstNode>,
358    max_candidates: usize,
359) -> Vec<ResolutionCandidate> {
360    let vsa = build_version_space(scenario);
361    let candidates = vsa.enumerate(max_candidates);
362    rank_candidates(candidates, scenario)
363}
364
365#[cfg(test)]
366mod tests {
367    use super::*;
368
369    fn leaf(id: usize, val: &str) -> CstNode {
370        CstNode::Leaf {
371            id,
372            kind: "ident".into(),
373            value: val.into(),
374        }
375    }
376
377    #[test]
378    fn test_leaf_conflict_vsa() {
379        let base = leaf(1, "x");
380        let left = leaf(2, "y");
381        let right = leaf(3, "z");
382        let scenario = MergeScenario::new(&base, &left, &right);
383
384        let vsa = build_version_space(&scenario);
385        let candidates = vsa.enumerate(10);
386        // Should have at least left, right, and base as options
387        assert!(candidates.len() >= 2);
388    }
389
390    #[test]
391    fn test_rank_prefers_parents() {
392        let base = leaf(1, "x");
393        let left = leaf(2, "y");
394        let right = leaf(3, "y"); // Both changed to same thing
395        let scenario = MergeScenario::new(&base, &left, &right);
396
397        let candidates = resolve_via_vsa(&scenario, 10);
398        assert!(!candidates.is_empty());
399        // Top candidate should be "y" since both parents agree
400        assert_eq!(candidates[0].content, "y");
401    }
402
403    #[test]
404    fn test_vsa_count() {
405        let base = leaf(1, "x");
406        let left = leaf(2, "y");
407        let right = leaf(3, "z");
408        let scenario = MergeScenario::new(&base, &left, &right);
409
410        let vsa = build_version_space(&scenario);
411        let count = vsa.count(1000);
412        assert!(count.is_some());
413        assert!(count.unwrap() >= 2);
414    }
415}