Skip to main content

merge_engine/
types.rs

1//! Core types for the merge engine.
2//!
3//! Based on the structured merge literature (LASTMERGE 2025, AutoMerge/OOPSLA 2018),
4//! we model code as concrete syntax trees with three node kinds:
5//! - **Leaf**: terminal nodes (identifiers, literals, operators)
6//! - **Constructed**: non-terminal with fixed-arity named children
7//! - **List**: non-terminal with variable-length children (ordered or unordered)
8
9use std::fmt;
10use std::hash::Hash;
11
12/// Unique identifier for a tree node within a merge context.
13pub type NodeId = usize;
14
15/// Ordering semantics for list nodes, per LASTMERGE/JDime.
16/// Unordered lists (e.g., import blocks, class members) can be freely permuted
17/// without changing program semantics, enabling better conflict resolution.
18#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash)]
19pub enum ListOrdering {
20    Ordered,
21    Unordered,
22}
23
24/// A concrete syntax tree node, following the three-kind taxonomy from
25/// Zhu & He (OOPSLA 2018) and LASTMERGE (2025).
26#[derive(Debug, Clone)]
27pub enum CstNode {
28    /// Terminal / leaf node — holds the literal text content.
29    Leaf {
30        id: NodeId,
31        kind: String,
32        value: String,
33    },
34    /// Non-terminal with named, fixed-arity children (e.g., if-statement has
35    /// condition, consequence, alternative).
36    Constructed {
37        id: NodeId,
38        kind: String,
39        children: Vec<CstNode>,
40    },
41    /// Non-terminal with a variable-length child list (e.g., block of statements,
42    /// import list). The ordering tag controls matching strategy.
43    List {
44        id: NodeId,
45        kind: String,
46        ordering: ListOrdering,
47        children: Vec<CstNode>,
48    },
49}
50
51impl CstNode {
52    pub fn id(&self) -> NodeId {
53        match self {
54            CstNode::Leaf { id, .. } => *id,
55            CstNode::Constructed { id, .. } => *id,
56            CstNode::List { id, .. } => *id,
57        }
58    }
59
60    pub fn kind(&self) -> &str {
61        match self {
62            CstNode::Leaf { kind, .. } => kind,
63            CstNode::Constructed { kind, .. } => kind,
64            CstNode::List { kind, .. } => kind,
65        }
66    }
67
68    pub fn children(&self) -> &[CstNode] {
69        match self {
70            CstNode::Leaf { .. } => &[],
71            CstNode::Constructed { children, .. } => children,
72            CstNode::List { children, .. } => children,
73        }
74    }
75
76    pub fn children_mut(&mut self) -> &mut Vec<CstNode> {
77        match self {
78            CstNode::Leaf { .. } => panic!("leaf nodes have no children"),
79            CstNode::Constructed { children, .. } => children,
80            CstNode::List { children, .. } => children,
81        }
82    }
83
84    pub fn is_leaf(&self) -> bool {
85        matches!(self, CstNode::Leaf { .. })
86    }
87
88    pub fn leaf_value(&self) -> Option<&str> {
89        match self {
90            CstNode::Leaf { value, .. } => Some(value),
91            _ => None,
92        }
93    }
94
95    /// Compute the total number of nodes in this subtree.
96    pub fn size(&self) -> usize {
97        1 + self.children().iter().map(|c| c.size()).sum::<usize>()
98    }
99
100    /// Collect all leaf values in pre-order.
101    pub fn collect_leaves(&self) -> Vec<&str> {
102        let mut leaves = Vec::new();
103        self.collect_leaves_inner(&mut leaves);
104        leaves
105    }
106
107    fn collect_leaves_inner<'a>(&'a self, out: &mut Vec<&'a str>) {
108        match self {
109            CstNode::Leaf { value, .. } => out.push(value),
110            CstNode::Constructed { children, .. } | CstNode::List { children, .. } => {
111                for c in children {
112                    c.collect_leaves_inner(out);
113                }
114            }
115        }
116    }
117
118    /// Reconstruct source text by concatenating all leaves.
119    pub fn to_source(&self) -> String {
120        self.collect_leaves().join("")
121    }
122
123    /// Structural equality (ignores NodeId).
124    pub fn structurally_equal(&self, other: &CstNode) -> bool {
125        if self.kind() != other.kind() {
126            return false;
127        }
128        match (self, other) {
129            (CstNode::Leaf { value: v1, .. }, CstNode::Leaf { value: v2, .. }) => v1 == v2,
130            (
131                CstNode::Constructed {
132                    children: c1,
133                    kind: k1,
134                    ..
135                },
136                CstNode::Constructed {
137                    children: c2,
138                    kind: k2,
139                    ..
140                },
141            ) => {
142                k1 == k2
143                    && c1.len() == c2.len()
144                    && c1
145                        .iter()
146                        .zip(c2.iter())
147                        .all(|(a, b)| a.structurally_equal(b))
148            }
149            (
150                CstNode::List {
151                    children: c1,
152                    ordering: o1,
153                    kind: k1,
154                    ..
155                },
156                CstNode::List {
157                    children: c2,
158                    ordering: o2,
159                    kind: k2,
160                    ..
161                },
162            ) => {
163                k1 == k2
164                    && o1 == o2
165                    && c1.len() == c2.len()
166                    && c1
167                        .iter()
168                        .zip(c2.iter())
169                        .all(|(a, b)| a.structurally_equal(b))
170            }
171            _ => false,
172        }
173    }
174}
175
176impl fmt::Display for CstNode {
177    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
178        write!(f, "{}", self.to_source())
179    }
180}
181
182/// A matched pair of nodes across two revisions.
183#[derive(Debug, Clone, PartialEq, Eq)]
184pub struct MatchPair {
185    pub left: NodeId,
186    pub right: NodeId,
187    pub score: usize,
188}
189
190/// The three-way merge scenario: base, left, and right revisions.
191#[derive(Debug, Clone)]
192pub struct MergeScenario<T> {
193    pub base: T,
194    pub left: T,
195    pub right: T,
196}
197
198impl<T> MergeScenario<T> {
199    pub fn new(base: T, left: T, right: T) -> Self {
200        Self { base, left, right }
201    }
202}
203
204/// The result of a merge operation on a region.
205#[derive(Debug, Clone, PartialEq, Eq)]
206pub enum MergeResult {
207    /// Clean merge — no conflict.
208    Resolved(String),
209    /// Unresolved conflict with both sides preserved.
210    Conflict {
211        base: String,
212        left: String,
213        right: String,
214    },
215}
216
217impl MergeResult {
218    pub fn is_resolved(&self) -> bool {
219        matches!(self, MergeResult::Resolved(_))
220    }
221
222    pub fn is_conflict(&self) -> bool {
223        matches!(self, MergeResult::Conflict { .. })
224    }
225
226    /// Format as a git-style conflict marker block.
227    pub fn to_string_with_markers(&self) -> String {
228        match self {
229            MergeResult::Resolved(s) => s.clone(),
230            MergeResult::Conflict { base, left, right } => {
231                let mut out = String::new();
232                out.push_str("<<<<<<< LEFT\n");
233                out.push_str(left);
234                if !left.ends_with('\n') {
235                    out.push('\n');
236                }
237                out.push_str("||||||| BASE\n");
238                out.push_str(base);
239                if !base.ends_with('\n') {
240                    out.push('\n');
241                }
242                out.push_str("=======\n");
243                out.push_str(right);
244                if !right.ends_with('\n') {
245                    out.push('\n');
246                }
247                out.push_str(">>>>>>> RIGHT\n");
248                out
249            }
250        }
251    }
252}
253
254/// Supported programming languages for tree-sitter parsing.
255#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash)]
256pub enum Language {
257    Rust,
258    JavaScript,
259    TypeScript,
260    Python,
261    Java,
262    Go,
263    C,
264    Cpp,
265    Kotlin,
266    Toml,
267    Yaml,
268}
269
270impl Language {
271    /// Infer language from a file extension.
272    pub fn from_extension(ext: &str) -> Option<Self> {
273        match ext {
274            "rs" => Some(Language::Rust),
275            "js" | "mjs" | "cjs" => Some(Language::JavaScript),
276            "ts" | "tsx" => Some(Language::TypeScript),
277            "py" => Some(Language::Python),
278            "java" => Some(Language::Java),
279            "go" => Some(Language::Go),
280            "c" | "h" => Some(Language::C),
281            "cpp" | "cc" | "cxx" | "hpp" | "hxx" => Some(Language::Cpp),
282            "kt" | "kts" => Some(Language::Kotlin),
283            "toml" => Some(Language::Toml),
284            "yml" | "yaml" => Some(Language::Yaml),
285            _ => None,
286        }
287    }
288}
289
290/// A text-level hunk from diff3.
291#[derive(Debug, Clone, PartialEq, Eq)]
292pub enum Diff3Hunk {
293    /// All three versions agree.
294    Stable(Vec<String>),
295    /// Only left changed from base.
296    LeftChanged(Vec<String>),
297    /// Only right changed from base.
298    RightChanged(Vec<String>),
299    /// Both changed differently — conflict.
300    Conflict {
301        base: Vec<String>,
302        left: Vec<String>,
303        right: Vec<String>,
304    },
305}
306
307/// Confidence level for an auto-resolution.
308/// Ordered Low < Medium < High so that derived Ord works correctly.
309#[derive(Debug, Clone, Copy, PartialEq, Eq, PartialOrd, Ord)]
310pub enum Confidence {
311    /// Low confidence — search-based best guess.
312    Low,
313    /// Medium confidence — heuristic match.
314    Medium,
315    /// High confidence — structural or pattern-based proof.
316    High,
317}
318
319/// A candidate resolution produced by the resolver pipeline.
320#[derive(Debug, Clone)]
321pub struct ResolutionCandidate {
322    pub content: String,
323    pub confidence: Confidence,
324    pub strategy: ResolutionStrategy,
325}
326
327/// Which strategy produced a resolution.
328#[derive(Debug, Clone, Copy, PartialEq, Eq)]
329pub enum ResolutionStrategy {
330    /// Standard three-way merge (no conflict).
331    Diff3,
332    /// Structured tree merge eliminated a false conflict.
333    StructuredMerge,
334    /// Version Space Algebra enumerated candidates.
335    VersionSpaceAlgebra,
336    /// Pattern-based DSL rule matched.
337    PatternRule,
338    /// Search-based with parent similarity fitness.
339    SearchBased,
340}
341
342impl fmt::Display for ResolutionStrategy {
343    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
344        match self {
345            ResolutionStrategy::Diff3 => write!(f, "diff3"),
346            ResolutionStrategy::StructuredMerge => write!(f, "structured-merge"),
347            ResolutionStrategy::VersionSpaceAlgebra => write!(f, "version-space-algebra"),
348            ResolutionStrategy::PatternRule => write!(f, "pattern-rule"),
349            ResolutionStrategy::SearchBased => write!(f, "search-based"),
350        }
351    }
352}