Skip to main content

merge_engine/
search.rs

1//! Search-Based Software Engineering (SBSE) for merge conflict resolution.
2//!
3//! Implements the approach from Campos Junior, Ghiotto, de Menezes, Barros,
4//! van der Hoek, and Murta (ACM TOSEM, July 2025):
5//! "Towards a Feasible Evaluation Function for Search-Based Merge Conflict Resolution"
6//!
7//! The key insight is that a correct merge resolution preserves characteristics
8//! of both conflict parents (left and right), so we can use **parent similarity**
9//! as a fitness function to guide a search over candidate resolutions.
10//!
11//! Search operators:
12//! - **Line interleaving**: combine lines from left and right in different orders
13//! - **Line selection**: pick each line from either left or right
14//! - **Chunking**: take contiguous chunks from each side
15//!
16//! The fitness function evaluates candidates using:
17//! - Token-level Jaccard similarity to left parent
18//! - Token-level Jaccard similarity to right parent
19//! - Penalty for divergence from base (to avoid reverting changes)
20
21use std::collections::HashSet;
22
23use crate::types::{Confidence, MergeScenario, ResolutionCandidate, ResolutionStrategy};
24
25/// Configuration for the search-based resolver.
26pub struct SearchConfig {
27    /// Maximum number of candidates to generate.
28    pub max_candidates: usize,
29    /// Maximum number of generations for the genetic search.
30    pub max_generations: usize,
31    /// Population size per generation.
32    pub population_size: usize,
33    /// Weight for left-parent similarity in fitness [0, 1].
34    pub left_weight: f64,
35    /// Weight for right-parent similarity in fitness [0, 1].
36    pub right_weight: f64,
37    /// Penalty weight for base similarity [0, 1].
38    pub base_penalty: f64,
39}
40
41impl Default for SearchConfig {
42    fn default() -> Self {
43        Self {
44            max_candidates: 50,
45            max_generations: 20,
46            population_size: 30,
47            left_weight: 0.45,
48            right_weight: 0.45,
49            base_penalty: 0.1,
50        }
51    }
52}
53
54/// Run search-based conflict resolution.
55///
56/// Generates candidate resolutions by combining lines from left and right,
57/// then scores them using parent similarity as the fitness function.
58pub fn search_resolve(
59    scenario: &MergeScenario<&str>,
60    config: &SearchConfig,
61) -> Vec<ResolutionCandidate> {
62    let left_lines: Vec<&str> = scenario.left.lines().collect();
63    let right_lines: Vec<&str> = scenario.right.lines().collect();
64    let _base_lines: Vec<&str> = scenario.base.lines().collect();
65
66    // Generate initial population using different strategies
67    let mut population: Vec<String> = Vec::new();
68
69    // Strategy 1: Take left then right
70    population.push(format!("{}\n{}", scenario.left, scenario.right));
71
72    // Strategy 2: Take right then left
73    population.push(format!("{}\n{}", scenario.right, scenario.left));
74
75    // Strategy 3: Take only left
76    population.push(scenario.left.to_string());
77
78    // Strategy 4: Take only right
79    population.push(scenario.right.to_string());
80
81    // Strategy 5: Line-by-line interleaving
82    let interleaved = interleave_lines(&left_lines, &right_lines);
83    population.push(interleaved);
84
85    // Strategy 6: Chunk-based combinations
86    let chunks = generate_chunk_combinations(&left_lines, &right_lines);
87    population.extend(chunks);
88
89    // Strategy 7: Line selection (pick each line from left or right)
90    let selections = generate_line_selections(&left_lines, &right_lines);
91    population.extend(selections);
92
93    // Run evolutionary search for additional generations
94    for _gen in 0..config.max_generations {
95        let mut new_pop = Vec::new();
96        for i in 0..population.len() {
97            for j in (i + 1)..population.len() {
98                if new_pop.len() >= config.population_size {
99                    break;
100                }
101                // Crossover: combine halves of two candidates
102                let child = crossover(&population[i], &population[j]);
103                new_pop.push(child);
104            }
105            if new_pop.len() >= config.population_size {
106                break;
107            }
108        }
109
110        // Mutate: swap random lines
111        for candidate in &population {
112            if new_pop.len() >= config.population_size {
113                break;
114            }
115            let mutated = mutate_swap(candidate);
116            new_pop.push(mutated);
117        }
118
119        // Evaluate and select best
120        population.extend(new_pop);
121        population = select_best(population, scenario, config, config.population_size);
122    }
123
124    // Final scoring and ranking
125    let mut scored: Vec<(String, f64)> = population
126        .into_iter()
127        .map(|candidate| {
128            let score = fitness(&candidate, scenario, config);
129            (candidate, score)
130        })
131        .collect();
132
133    scored.sort_by(|a, b| b.1.partial_cmp(&a.1).unwrap_or(std::cmp::Ordering::Equal));
134
135    // Deduplicate
136    let mut seen = HashSet::new();
137    scored.retain(|(c, _)| seen.insert(c.clone()));
138
139    scored
140        .into_iter()
141        .take(config.max_candidates)
142        .map(|(content, _score)| ResolutionCandidate {
143            content,
144            confidence: Confidence::Low,
145            strategy: ResolutionStrategy::SearchBased,
146        })
147        .collect()
148}
149
150/// Parent similarity fitness function (Campos Junior et al., TOSEM 2025).
151///
152/// Scores a candidate by how well it preserves content from both parents
153/// while incorporating their changes (diverging from base).
154fn fitness(candidate: &str, scenario: &MergeScenario<&str>, config: &SearchConfig) -> f64 {
155    let left_sim = jaccard_similarity(candidate, scenario.left);
156    let right_sim = jaccard_similarity(candidate, scenario.right);
157    let base_sim = jaccard_similarity(candidate, scenario.base);
158
159    config.left_weight * left_sim + config.right_weight * right_sim - config.base_penalty * base_sim
160}
161
162/// Token-level Jaccard similarity between two strings.
163fn jaccard_similarity(a: &str, b: &str) -> f64 {
164    let tokens_a: HashSet<&str> = a.split_whitespace().collect();
165    let tokens_b: HashSet<&str> = b.split_whitespace().collect();
166
167    if tokens_a.is_empty() && tokens_b.is_empty() {
168        return 1.0;
169    }
170
171    let intersection = tokens_a.intersection(&tokens_b).count() as f64;
172    let union = tokens_a.union(&tokens_b).count() as f64;
173
174    if union == 0.0 {
175        0.0
176    } else {
177        intersection / union
178    }
179}
180
181/// Interleave lines from two sequences.
182fn interleave_lines(left: &[&str], right: &[&str]) -> String {
183    let mut result = Vec::new();
184    let max_len = left.len().max(right.len());
185    for i in 0..max_len {
186        if i < left.len() {
187            result.push(left[i]);
188        }
189        if i < right.len() && (i >= left.len() || left[i] != right[i]) {
190            result.push(right[i]);
191        }
192    }
193    result.join("\n")
194}
195
196/// Generate chunk-based combinations.
197fn generate_chunk_combinations(left: &[&str], right: &[&str]) -> Vec<String> {
198    let mut results = Vec::new();
199    if left.is_empty() || right.is_empty() {
200        return results;
201    }
202
203    // Split at various midpoints and combine
204    for split in 1..left.len() {
205        let combo = format!(
206            "{}\n{}",
207            left[..split].join("\n"),
208            right[split.min(right.len())..].join("\n")
209        );
210        results.push(combo);
211    }
212
213    for split in 1..right.len() {
214        let combo = format!(
215            "{}\n{}",
216            right[..split].join("\n"),
217            left[split.min(left.len())..].join("\n")
218        );
219        results.push(combo);
220    }
221
222    results
223}
224
225/// Generate line selections (pick each line from left or right).
226fn generate_line_selections(left: &[&str], right: &[&str]) -> Vec<String> {
227    let mut results = Vec::new();
228    let n = left.len().min(right.len());
229
230    if n == 0 {
231        return results;
232    }
233
234    // Limit to avoid exponential blowup: use greedy heuristic patterns
235    // Pattern: prefer left except where right differs significantly
236    let mut prefer_left = Vec::new();
237    let mut prefer_right = Vec::new();
238    for i in 0..n {
239        prefer_left.push(left[i]);
240        prefer_right.push(right[i]);
241    }
242    results.push(prefer_left.join("\n"));
243    results.push(prefer_right.join("\n"));
244
245    // Alternating pattern
246    let mut alternating = Vec::new();
247    for i in 0..n {
248        if i % 2 == 0 {
249            alternating.push(left[i]);
250        } else {
251            alternating.push(right[i]);
252        }
253    }
254    results.push(alternating.join("\n"));
255
256    results
257}
258
259/// Simple crossover: take first half from one parent, second from the other.
260fn crossover(a: &str, b: &str) -> String {
261    let a_lines: Vec<&str> = a.lines().collect();
262    let b_lines: Vec<&str> = b.lines().collect();
263
264    let mid_a = a_lines.len() / 2;
265    let mid_b = b_lines.len() / 2;
266
267    let mut result: Vec<&str> = Vec::new();
268    result.extend_from_slice(&a_lines[..mid_a]);
269    result.extend_from_slice(&b_lines[mid_b..]);
270    result.join("\n")
271}
272
273/// Simple mutation: swap two adjacent lines.
274fn mutate_swap(candidate: &str) -> String {
275    let mut lines: Vec<&str> = candidate.lines().collect();
276    if lines.len() >= 2 {
277        // Swap the middle two lines as a deterministic "mutation"
278        let mid = lines.len() / 2;
279        lines.swap(mid - 1, mid);
280    }
281    lines.join("\n")
282}
283
284/// Select the best candidates from a population based on fitness.
285fn select_best(
286    population: Vec<String>,
287    scenario: &MergeScenario<&str>,
288    config: &SearchConfig,
289    target_size: usize,
290) -> Vec<String> {
291    let mut scored: Vec<(String, f64)> = population
292        .into_iter()
293        .map(|c| {
294            let f = fitness(&c, scenario, config);
295            (c, f)
296        })
297        .collect();
298
299    scored.sort_by(|a, b| b.1.partial_cmp(&a.1).unwrap_or(std::cmp::Ordering::Equal));
300
301    // Deduplicate
302    let mut seen = HashSet::new();
303    scored.retain(|(c, _)| seen.insert(c.clone()));
304
305    scored
306        .into_iter()
307        .take(target_size)
308        .map(|(c, _)| c)
309        .collect()
310}
311
312#[cfg(test)]
313mod tests {
314    use super::*;
315
316    #[test]
317    fn test_jaccard_similarity() {
318        assert!((jaccard_similarity("a b c", "a b c") - 1.0).abs() < f64::EPSILON);
319        assert!((jaccard_similarity("a b c", "d e f") - 0.0).abs() < f64::EPSILON);
320        assert!(jaccard_similarity("a b c", "a b d") > 0.3);
321    }
322
323    #[test]
324    fn test_search_produces_candidates() {
325        let scenario = MergeScenario::new(
326            "int x = 1;\nint y = 2;",
327            "int x = 10;\nint y = 2;",
328            "int x = 1;\nint y = 20;",
329        );
330        let config = SearchConfig::default();
331        let candidates = search_resolve(&scenario, &config);
332        assert!(!candidates.is_empty());
333    }
334
335    #[test]
336    fn test_fitness_prefers_parents() {
337        let scenario = MergeScenario::new("old code", "new left code", "new right code");
338        let config = SearchConfig::default();
339
340        // A candidate similar to both parents should score higher than one similar to base
341        let good = fitness("new left code new right code", &scenario, &config);
342        let bad = fitness("old code", &scenario, &config);
343        assert!(good > bad);
344    }
345
346    #[test]
347    fn test_interleave() {
348        let left = vec!["a", "b"];
349        let right = vec!["c", "d"];
350        let result = interleave_lines(&left, &right);
351        assert!(result.contains("a"));
352        assert!(result.contains("c"));
353    }
354}