Skip to main content

vcs_git/
conflict.rs

1//! Typed model of git conflict markers — parse a conflicted file's *content*
2//! into structured regions and write a chosen resolution back. Pure functions
3//! (no subprocess), so everything here is hermetic.
4//!
5//! Handles git's three `merge.conflictStyle`s with one grammar: `merge`
6//! (2-way: ours/theirs), `diff3` (3-way: ours/base/theirs), and `zdiff3`
7//! (same markers as diff3 — the common affixes are already outside the
8//! region). Marker length is variable (`merge.conflictMarkerSize`, default 7)
9//! and is detected per region. Lines are kept verbatim (including `\r\n` and
10//! a missing trailing newline), so [`render`] is a byte-exact roundtrip.
11//!
12//! jj note: files materialized with jj's `ui.conflict-marker-style = "git"`
13//! use this exact grammar (with jj's own labels) and parse here; jj's native
14//! `diff`/`snapshot` styles live in `vcs_jj::conflict`.
15
16use processkit::{Error, Result};
17
18use crate::BINARY;
19
20/// Which side of a conflict a resolution keeps.
21///
22/// Intentionally **exhaustive** (no `#[non_exhaustive]`): a git conflict has
23/// exactly these three sides — the domain is closed, so `#[non_exhaustive]` would
24/// buy no future variant while forcing a wildcard arm on any caller that matches
25/// this (callers usually *construct* it to pass to [`resolve`]) and wrongly
26/// signalling a fourth side could appear.
27#[derive(Debug, Clone, Copy, PartialEq, Eq)]
28pub enum ResolutionSide {
29    /// The `<<<<<<<` side (typically `HEAD`).
30    Ours,
31    /// The `|||||||` base (diff3/zdiff3 only).
32    Base,
33    /// The `>>>>>>>` side (the merged-in branch).
34    Theirs,
35}
36
37/// One conflicted region: the lines of each side plus the verbatim marker
38/// lines (kept so rendering is byte-exact).
39///
40/// All line vectors store lines **with** their original endings; the last
41/// line of a file may have none.
42#[derive(Debug, Clone, PartialEq, Eq)]
43#[non_exhaustive]
44pub struct ConflictRegion {
45    /// Label after the `<<<<<<<` marker (e.g. `HEAD`); empty when absent.
46    pub ours_label: String,
47    /// Label after the `|||||||` marker; `None` for 2-way conflicts.
48    pub base_label: Option<String>,
49    /// Label after the `>>>>>>>` marker (e.g. the branch name).
50    pub theirs_label: String,
51    /// The `<<<<<<<`-side lines.
52    pub ours: Vec<String>,
53    /// The base lines (`diff3`/`zdiff3`); `None` for 2-way conflicts.
54    pub base: Option<Vec<String>>,
55    /// The `>>>>>>>`-side lines.
56    pub theirs: Vec<String>,
57    /// The marker run length (7 unless `merge.conflictMarkerSize` raised it).
58    pub marker_len: usize,
59    // Verbatim marker lines, for byte-exact rendering.
60    marker_ours: String,
61    marker_base: Option<String>,
62    marker_sep: String,
63    marker_end: String,
64}
65
66/// A conflicted file as a sequence of plain-text runs and conflict regions —
67/// the shape that keeps [`render`] a byte-exact roundtrip.
68///
69/// Intentionally **exhaustive**: a file is text-or-conflict, and consumers match
70/// every segment in the resolve/render loop this crate exists to serve, so the
71/// closed enum stays ergonomic. Field-level evolution rides [`ConflictRegion`],
72/// which *is* `#[non_exhaustive]`.
73#[derive(Debug, Clone, PartialEq, Eq)]
74pub enum ConflictSegment {
75    /// Lines outside any conflict (verbatim, endings included).
76    Text(Vec<String>),
77    /// One conflicted region (boxed — much larger than a text run).
78    Conflict(Box<ConflictRegion>),
79}
80
81/// Does `content` contain a line that looks like a conflict-start marker?
82/// Cheap pre-check before a full [`parse_conflicts`].
83pub fn has_conflict_markers(content: &str) -> bool {
84    content
85        .split_inclusive('\n')
86        .any(|line| marker_run(line, '<').is_some_and(|n| n >= 7))
87}
88
89/// The length of the leading `ch` run when `line` is a marker line for it:
90/// the run must be followed by a space + label, or end the line.
91fn marker_run(line: &str, ch: char) -> Option<usize> {
92    let trimmed = line.trim_end_matches(['\r', '\n']);
93    let n = trimmed.chars().take_while(|&c| c == ch).count();
94    if n == 0 {
95        return None;
96    }
97    let rest = &trimmed[n..];
98    (rest.is_empty() || rest.starts_with(' ')).then_some(n)
99}
100
101/// The label after an `n`-char marker run (empty when none).
102fn marker_label(line: &str, n: usize) -> String {
103    line.trim_end_matches(['\r', '\n'])[n..]
104        .trim_start()
105        .to_string()
106}
107
108fn parse_error(message: String) -> Error {
109    Error::Parse {
110        program: BINARY.to_string(),
111        message,
112    }
113}
114
115/// Parse a conflicted file's content into text/conflict segments.
116///
117/// Errors with [`Error::Parse`] only on a genuinely malformed *region*: a
118/// `<<<<<<<`-opened region missing its `=======` separator or `>>>>>>>`
119/// terminator. A `=======`/`>>>>>>>` run **outside** any region is treated as
120/// ordinary content (a Markdown/RST underline, a divider, a quoted email), so a
121/// file with no real conflict — or a real conflict alongside marker-like content
122/// — parses cleanly.
123pub fn parse_conflicts(content: &str) -> Result<Vec<ConflictSegment>> {
124    let mut segments = Vec::new();
125    let mut text: Vec<String> = Vec::new();
126    let mut lines = content.split_inclusive('\n').peekable();
127
128    while let Some(line) = lines.next() {
129        // A region starts at a `<<<<<<<`-run of length ≥ 7. A `=======` / `>>>>>>>`
130        // run *outside* a region is ordinary content — a Markdown/RST setext
131        // underline (`=========`), a `=======` divider banner, a deep `>>>>>>>`
132        // email quote — NOT a malformed conflict, so it is kept verbatim as text
133        // (a real conflict is delimited by a `<<<<<<<` opener; the region loops
134        // below consume the `=`/`>` markers that belong to it). A genuinely broken
135        // region (an opener with no separator/terminator) is still caught inside
136        // those loops.
137        let Some(n) = marker_run(line, '<').filter(|&n| n >= 7) else {
138            text.push(line.to_string());
139            continue;
140        };
141        if !text.is_empty() {
142            segments.push(ConflictSegment::Text(std::mem::take(&mut text)));
143        }
144
145        let marker_ours = line.to_string();
146        let ours_label = marker_label(line, n);
147        let mut ours = Vec::new();
148        let mut base: Option<Vec<String>> = None;
149        let mut marker_base = None;
150        let mut base_label = None;
151
152        // Ours, until the base marker (diff3) or the separator.
153        let marker_sep = loop {
154            let Some(line) = lines.next() else {
155                return Err(parse_error(format!(
156                    "unterminated conflict (no ======= after {:?})",
157                    marker_ours.trim_end()
158                )));
159            };
160            // Only the FIRST `|`-run is the diff3 base marker; a later matching
161            // line is base *content* (a region has exactly one base marker — a
162            // repeated one used to overwrite it and lose a line on render).
163            if base.is_none() && marker_run(line, '|') == Some(n) {
164                base_label = Some(marker_label(line, n));
165                marker_base = Some(line.to_string());
166                base = Some(Vec::new());
167                continue;
168            }
169            if marker_run(line, '=') == Some(n) {
170                break line.to_string();
171            }
172            match &mut base {
173                Some(base_lines) => base_lines.push(line.to_string()),
174                None => ours.push(line.to_string()),
175            }
176        };
177
178        // Theirs, until the end marker.
179        let mut theirs = Vec::new();
180        let marker_end = loop {
181            let Some(line) = lines.next() else {
182                return Err(parse_error(format!(
183                    "unterminated conflict (no >>>>>>> after {:?})",
184                    marker_ours.trim_end()
185                )));
186            };
187            if marker_run(line, '>') == Some(n) {
188                break line.to_string();
189            }
190            theirs.push(line.to_string());
191        };
192        let theirs_label = marker_label(&marker_end, n);
193
194        segments.push(ConflictSegment::Conflict(Box::new(ConflictRegion {
195            ours_label,
196            base_label,
197            theirs_label,
198            ours,
199            base,
200            theirs,
201            marker_len: n,
202            marker_ours,
203            marker_base,
204            marker_sep,
205            marker_end,
206        })));
207    }
208    if !text.is_empty() {
209        segments.push(ConflictSegment::Text(text));
210    }
211    Ok(segments)
212}
213
214/// Re-render segments verbatim — the byte-exact inverse of
215/// [`parse_conflicts`].
216pub fn render(segments: &[ConflictSegment]) -> String {
217    let mut out = String::new();
218    for segment in segments {
219        match segment {
220            ConflictSegment::Text(lines) => lines.iter().for_each(|l| out.push_str(l)),
221            ConflictSegment::Conflict(region) => {
222                out.push_str(&region.marker_ours);
223                region.ours.iter().for_each(|l| out.push_str(l));
224                if let Some(marker) = &region.marker_base {
225                    out.push_str(marker);
226                    if let Some(base) = &region.base {
227                        base.iter().for_each(|l| out.push_str(l));
228                    }
229                }
230                out.push_str(&region.marker_sep);
231                region.theirs.iter().for_each(|l| out.push_str(l));
232                out.push_str(&region.marker_end);
233            }
234        }
235    }
236    out
237}
238
239/// Produce the file content with every conflict resolved to `side`.
240///
241/// Errors with a clear message when `side` is [`ResolutionSide::Base`] and a
242/// region has no base (2-way `merge` style records none).
243pub fn resolve(segments: &[ConflictSegment], side: ResolutionSide) -> Result<String> {
244    let mut out = String::new();
245    for segment in segments {
246        match segment {
247            ConflictSegment::Text(lines) => lines.iter().for_each(|l| out.push_str(l)),
248            ConflictSegment::Conflict(region) => {
249                let chosen = match side {
250                    ResolutionSide::Ours => &region.ours,
251                    ResolutionSide::Theirs => &region.theirs,
252                    ResolutionSide::Base => region.base.as_ref().ok_or_else(|| Error::Spawn {
253                        program: BINARY.to_string(),
254                        source: std::io::Error::new(
255                            std::io::ErrorKind::InvalidInput,
256                            "cannot resolve to Base: this conflict records no base \
257                             (2-way `merge` style; use diff3/zdiff3)",
258                        ),
259                    })?,
260                };
261                chosen.iter().for_each(|l| out.push_str(l));
262            }
263        }
264    }
265    Ok(out)
266}
267
268#[cfg(test)]
269mod tests {
270    use super::*;
271
272    const MERGE_2WAY: &str =
273        "line 1\n<<<<<<< HEAD\nmain line 2\n=======\nfeature line 2\n>>>>>>> feature\nline 3\n";
274    const DIFF3: &str = "line 1\n<<<<<<< HEAD\nmain line 2\n||||||| 0b025ce\nline 2\n=======\nfeature line 2\n>>>>>>> feature\nline 3\n";
275
276    #[test]
277    fn parses_two_way_merge_style() {
278        let segments = parse_conflicts(MERGE_2WAY).expect("parse");
279        assert_eq!(segments.len(), 3);
280        let ConflictSegment::Conflict(region) = &segments[1] else {
281            panic!("expected a conflict, got {segments:?}");
282        };
283        assert_eq!(region.ours_label, "HEAD");
284        assert_eq!(region.theirs_label, "feature");
285        assert_eq!(region.ours, ["main line 2\n"]);
286        assert_eq!(region.theirs, ["feature line 2\n"]);
287        assert!(region.base.is_none());
288        assert_eq!(region.marker_len, 7);
289    }
290
291    #[test]
292    fn parses_diff3_with_base() {
293        let segments = parse_conflicts(DIFF3).expect("parse");
294        let ConflictSegment::Conflict(region) = &segments[1] else {
295            panic!("expected a conflict");
296        };
297        assert_eq!(region.base_label.as_deref(), Some("0b025ce"));
298        assert_eq!(region.base.as_deref(), Some(&["line 2\n".to_string()][..]));
299    }
300
301    // Proptest-found regression (seed committed in proptest-regressions/): a
302    // SECOND `|`-run line inside a diff3 region is base *content*, not a
303    // replacement base marker — the overwrite used to drop a line on render,
304    // breaking the byte-exact roundtrip.
305    #[test]
306    fn repeated_base_marker_line_is_base_content() {
307        let s = "<<<<<<<< HEAD\n|||||||| base\n|||||||| base\n========\n>>>>>>>> branché\n";
308        let segments = parse_conflicts(s).expect("parse");
309        let ConflictSegment::Conflict(region) = &segments[0] else {
310            panic!("expected a conflict, got {segments:?}");
311        };
312        assert_eq!(
313            region.base.as_deref(),
314            Some(&["|||||||| base\n".to_string()][..]),
315            "the second |-run line is content of the base section"
316        );
317        assert_eq!(render(&segments), s, "roundtrip must be byte-exact");
318    }
319
320    // Roundtrip must be byte-exact — including CRLF, custom marker sizes,
321    // and a conflict at EOF with no trailing newline.
322    #[test]
323    fn render_roundtrips_exactly() {
324        let crlf = "a\r\n<<<<<<< HEAD\r\nours\r\n=======\r\ntheirs\r\n>>>>>>> b\r\nz\r\n";
325        let wide = "<<<<<<<<<<<<<<< HEAD\nours\n===============\ntheirs\n>>>>>>>>>>>>>>> b\n";
326        let eof = "x\n<<<<<<< HEAD\nours\n=======\ntheirs\n>>>>>>> b";
327        for sample in [MERGE_2WAY, DIFF3, crlf, wide, eof] {
328            let segments = parse_conflicts(sample).expect("parse");
329            assert_eq!(render(&segments), sample, "roundtrip");
330        }
331        // The wide sample detected the larger marker run.
332        let segments = parse_conflicts(wide).unwrap();
333        let ConflictSegment::Conflict(region) = &segments[0] else {
334            panic!()
335        };
336        assert_eq!(region.marker_len, 15);
337    }
338
339    #[test]
340    fn resolve_takes_one_side_everywhere() {
341        let two = format!("{MERGE_2WAY}between\n{MERGE_2WAY}");
342        let segments = parse_conflicts(&two).expect("parse");
343        assert_eq!(
344            resolve(&segments, ResolutionSide::Ours).unwrap(),
345            "line 1\nmain line 2\nline 3\nbetween\nline 1\nmain line 2\nline 3\n"
346        );
347        assert_eq!(
348            resolve(&segments, ResolutionSide::Theirs).unwrap(),
349            "line 1\nfeature line 2\nline 3\nbetween\nline 1\nfeature line 2\nline 3\n"
350        );
351        // No base recorded in merge style → Base resolution is refused.
352        assert!(resolve(&segments, ResolutionSide::Base).is_err());
353
354        let diff3 = parse_conflicts(DIFF3).expect("parse");
355        assert_eq!(
356            resolve(&diff3, ResolutionSide::Base).unwrap(),
357            "line 1\nline 2\nline 3\n"
358        );
359    }
360
361    #[test]
362    fn empty_sides_and_clean_files_parse() {
363        // One side deleted everything.
364        let deletion = "<<<<<<< HEAD\n=======\nkept\n>>>>>>> b\n";
365        let segments = parse_conflicts(deletion).expect("parse");
366        assert_eq!(resolve(&segments, ResolutionSide::Ours).unwrap(), "");
367        // A file without conflicts is one text segment.
368        let clean = parse_conflicts("just\ntext\n").expect("parse");
369        assert_eq!(clean.len(), 1);
370        assert!(!has_conflict_markers("just\ntext\n"));
371        assert!(has_conflict_markers(MERGE_2WAY));
372    }
373
374    #[test]
375    fn malformed_files_are_parse_errors() {
376        // Only a genuinely broken *region* (an opener with no separator/terminator)
377        // is an error.
378        for bad in [
379            "<<<<<<< HEAD\nours\n",                  // no separator
380            "<<<<<<< HEAD\nours\n=======\ntheirs\n", // no terminator
381        ] {
382            assert!(
383                matches!(parse_conflicts(bad), Err(Error::Parse { .. })),
384                "{bad:?} must fail"
385            );
386        }
387    }
388
389    // A `=======`/`>>>>>>>` run outside any region is ordinary content (Markdown
390    // underline, divider, quoted email), not a malformed conflict — parsed as text,
391    // never an error, and round-trips byte-exact. (H6)
392    #[test]
393    fn marker_like_content_outside_a_region_is_text() {
394        for content in [
395            "Heading\n=======\nbody\n",          // RST/Markdown setext underline
396            "a\n=======================\nb\n",   // divider banner
397            ">>>>>>> deep email quote\nreply\n", // quoted email
398            "code: a <<<<<<< b\n",               // marker run not at line start
399        ] {
400            let segments = parse_conflicts(content).expect("parses as text, no error");
401            assert!(
402                segments
403                    .iter()
404                    .all(|s| matches!(s, ConflictSegment::Text(_))),
405                "{content:?} must be all text, got {segments:?}"
406            );
407            assert_eq!(render(&segments), content, "round-trips byte-exact");
408        }
409    }
410}
411
412// Property-based fuzzing. The marker grammar slices on marker-run lengths and
413// must never panic on a hostile file (a real conflicted file from a git we
414// don't control), and `render(parse(x)?) == x` must hold byte-for-byte — the
415// regression net for the marker-detection / byte-offset logic.
416#[cfg(test)]
417mod proptests {
418    use super::*;
419    use proptest::prelude::*;
420
421    /// A line drawn from the conflict-marker vocabulary plus multibyte text,
422    /// with variable marker-run lengths (7..16) and CRLF, so a joined document
423    /// reaches the marker-slicing branches with adversarial content.
424    fn conflict_line() -> impl Strategy<Value = String> {
425        prop_oneof![
426            (7usize..16).prop_map(|n| format!("{} HEAD\n", "<".repeat(n))),
427            (7usize..16).prop_map(|n| format!("{}\n", "=".repeat(n))),
428            (7usize..16).prop_map(|n| format!("{} branché\n", ">".repeat(n))),
429            (7usize..16).prop_map(|n| format!("{} base\n", "|".repeat(n))),
430            "[a-zé<>=|]{0,14}\r?\n", // text incl. marker-ish chars + multibyte + CRLF
431            Just("\n".to_string()),
432        ]
433    }
434
435    fn conflict_doc() -> impl Strategy<Value = String> {
436        prop::collection::vec(conflict_line(), 0..30).prop_map(|lines| lines.concat())
437    }
438
439    proptest! {
440        #[test]
441        fn parse_never_panics_on_arbitrary_text(s in any::<String>()) {
442            let _ = has_conflict_markers(&s);
443            // Whatever arbitrary text happens to parse must also round-trip
444            // byte-exact — the load-bearing invariant, asserted on this generator
445            // too (not just the structured one below).
446            if let Ok(segments) = parse_conflicts(&s) {
447                prop_assert_eq!(render(&segments), s);
448            }
449        }
450
451        #[test]
452        fn parse_never_panics_on_structured_text(s in conflict_doc()) {
453            let _ = parse_conflicts(&s);
454        }
455
456        // The load-bearing invariant: whenever the file parses, re-rendering is
457        // byte-exact.
458        #[test]
459        fn render_roundtrips_whatever_parses(s in conflict_doc()) {
460            if let Ok(segments) = parse_conflicts(&s) {
461                prop_assert_eq!(render(&segments), s);
462            }
463        }
464
465        // A marker-free file is one Text segment that renders back unchanged.
466        #[test]
467        fn marker_free_files_are_a_single_text_segment(s in "[a-zé \t\r\n]{0,80}") {
468            prop_assume!(!has_conflict_markers(&s));
469            let segments = parse_conflicts(&s).expect("no markers → Ok");
470            prop_assert_eq!(render(&segments), s);
471        }
472    }
473}