Skip to main content

vcs_git/
conflict.rs

1//! Typed model of git conflict markers — parse a conflicted file's *content*
2//! into structured regions and write a chosen resolution back. Pure functions
3//! (no subprocess), so everything here is hermetic.
4//!
5//! Handles git's three `merge.conflictStyle`s with one grammar: `merge`
6//! (2-way: ours/theirs), `diff3` (3-way: ours/base/theirs), and `zdiff3`
7//! (same markers as diff3 — the common affixes are already outside the
8//! region). Marker length is variable (`merge.conflictMarkerSize`, default 7)
9//! and is detected per region. Lines are kept verbatim (including `\r\n` and
10//! a missing trailing newline), so [`render`] is a byte-exact roundtrip.
11//!
12//! jj note: files materialized with jj's `ui.conflict-marker-style = "git"`
13//! use this exact grammar (with jj's own labels) and parse here; jj's native
14//! `diff`/`snapshot` styles live in `vcs_jj::conflict`.
15
16use processkit::{Error, Result};
17
18use crate::BINARY;
19
20/// Which side of a conflict a resolution keeps.
21#[derive(Debug, Clone, Copy, PartialEq, Eq)]
22pub enum ResolutionSide {
23    /// The `<<<<<<<` side (typically `HEAD`).
24    Ours,
25    /// The `|||||||` base (diff3/zdiff3 only).
26    Base,
27    /// The `>>>>>>>` side (the merged-in branch).
28    Theirs,
29}
30
31/// One conflicted region: the lines of each side plus the verbatim marker
32/// lines (kept so rendering is byte-exact).
33///
34/// All line vectors store lines **with** their original endings; the last
35/// line of a file may have none.
36#[derive(Debug, Clone, PartialEq, Eq)]
37#[non_exhaustive]
38pub struct ConflictRegion {
39    /// Label after the `<<<<<<<` marker (e.g. `HEAD`); empty when absent.
40    pub ours_label: String,
41    /// Label after the `|||||||` marker; `None` for 2-way conflicts.
42    pub base_label: Option<String>,
43    /// Label after the `>>>>>>>` marker (e.g. the branch name).
44    pub theirs_label: String,
45    /// The `<<<<<<<`-side lines.
46    pub ours: Vec<String>,
47    /// The base lines (`diff3`/`zdiff3`); `None` for 2-way conflicts.
48    pub base: Option<Vec<String>>,
49    /// The `>>>>>>>`-side lines.
50    pub theirs: Vec<String>,
51    /// The marker run length (7 unless `merge.conflictMarkerSize` raised it).
52    pub marker_len: usize,
53    // Verbatim marker lines, for byte-exact rendering.
54    marker_ours: String,
55    marker_base: Option<String>,
56    marker_sep: String,
57    marker_end: String,
58}
59
60/// A conflicted file as a sequence of plain-text runs and conflict regions —
61/// the shape that keeps [`render`] a byte-exact roundtrip.
62#[derive(Debug, Clone, PartialEq, Eq)]
63pub enum ConflictSegment {
64    /// Lines outside any conflict (verbatim, endings included).
65    Text(Vec<String>),
66    /// One conflicted region (boxed — much larger than a text run).
67    Conflict(Box<ConflictRegion>),
68}
69
70/// Does `content` contain a line that looks like a conflict-start marker?
71/// Cheap pre-check before a full [`parse_conflicts`].
72pub fn has_conflict_markers(content: &str) -> bool {
73    content
74        .split_inclusive('\n')
75        .any(|line| marker_run(line, '<').is_some_and(|n| n >= 7))
76}
77
78/// The length of the leading `ch` run when `line` is a marker line for it:
79/// the run must be followed by a space + label, or end the line.
80fn marker_run(line: &str, ch: char) -> Option<usize> {
81    let trimmed = line.trim_end_matches(['\r', '\n']);
82    let n = trimmed.chars().take_while(|&c| c == ch).count();
83    if n == 0 {
84        return None;
85    }
86    let rest = &trimmed[n..];
87    (rest.is_empty() || rest.starts_with(' ')).then_some(n)
88}
89
90/// The label after an `n`-char marker run (empty when none).
91fn marker_label(line: &str, n: usize) -> String {
92    line.trim_end_matches(['\r', '\n'])[n..]
93        .trim_start()
94        .to_string()
95}
96
97fn parse_error(message: String) -> Error {
98    Error::Parse {
99        program: BINARY.to_string(),
100        message,
101    }
102}
103
104/// Parse a conflicted file's content into text/conflict segments.
105///
106/// Errors with [`Error::Parse`] only on a genuinely malformed *region*: a
107/// `<<<<<<<`-opened region missing its `=======` separator or `>>>>>>>`
108/// terminator. A `=======`/`>>>>>>>` run **outside** any region is treated as
109/// ordinary content (a Markdown/RST underline, a divider, a quoted email), so a
110/// file with no real conflict — or a real conflict alongside marker-like content
111/// — parses cleanly.
112pub fn parse_conflicts(content: &str) -> Result<Vec<ConflictSegment>> {
113    let mut segments = Vec::new();
114    let mut text: Vec<String> = Vec::new();
115    let mut lines = content.split_inclusive('\n').peekable();
116
117    while let Some(line) = lines.next() {
118        // A region starts at a `<<<<<<<`-run of length ≥ 7. A `=======` / `>>>>>>>`
119        // run *outside* a region is ordinary content — a Markdown/RST setext
120        // underline (`=========`), a `=======` divider banner, a deep `>>>>>>>`
121        // email quote — NOT a malformed conflict, so it is kept verbatim as text
122        // (a real conflict is delimited by a `<<<<<<<` opener; the region loops
123        // below consume the `=`/`>` markers that belong to it). A genuinely broken
124        // region (an opener with no separator/terminator) is still caught inside
125        // those loops.
126        let Some(n) = marker_run(line, '<').filter(|&n| n >= 7) else {
127            text.push(line.to_string());
128            continue;
129        };
130        if !text.is_empty() {
131            segments.push(ConflictSegment::Text(std::mem::take(&mut text)));
132        }
133
134        let marker_ours = line.to_string();
135        let ours_label = marker_label(line, n);
136        let mut ours = Vec::new();
137        let mut base: Option<Vec<String>> = None;
138        let mut marker_base = None;
139        let mut base_label = None;
140
141        // Ours, until the base marker (diff3) or the separator.
142        let marker_sep = loop {
143            let Some(line) = lines.next() else {
144                return Err(parse_error(format!(
145                    "unterminated conflict (no ======= after {:?})",
146                    marker_ours.trim_end()
147                )));
148            };
149            // Only the FIRST `|`-run is the diff3 base marker; a later matching
150            // line is base *content* (a region has exactly one base marker — a
151            // repeated one used to overwrite it and lose a line on render).
152            if base.is_none() && marker_run(line, '|') == Some(n) {
153                base_label = Some(marker_label(line, n));
154                marker_base = Some(line.to_string());
155                base = Some(Vec::new());
156                continue;
157            }
158            if marker_run(line, '=') == Some(n) {
159                break line.to_string();
160            }
161            match &mut base {
162                Some(base_lines) => base_lines.push(line.to_string()),
163                None => ours.push(line.to_string()),
164            }
165        };
166
167        // Theirs, until the end marker.
168        let mut theirs = Vec::new();
169        let marker_end = loop {
170            let Some(line) = lines.next() else {
171                return Err(parse_error(format!(
172                    "unterminated conflict (no >>>>>>> after {:?})",
173                    marker_ours.trim_end()
174                )));
175            };
176            if marker_run(line, '>') == Some(n) {
177                break line.to_string();
178            }
179            theirs.push(line.to_string());
180        };
181        let theirs_label = marker_label(&marker_end, n);
182
183        segments.push(ConflictSegment::Conflict(Box::new(ConflictRegion {
184            ours_label,
185            base_label,
186            theirs_label,
187            ours,
188            base,
189            theirs,
190            marker_len: n,
191            marker_ours,
192            marker_base,
193            marker_sep,
194            marker_end,
195        })));
196    }
197    if !text.is_empty() {
198        segments.push(ConflictSegment::Text(text));
199    }
200    Ok(segments)
201}
202
203/// Re-render segments verbatim — the byte-exact inverse of
204/// [`parse_conflicts`].
205pub fn render(segments: &[ConflictSegment]) -> String {
206    let mut out = String::new();
207    for segment in segments {
208        match segment {
209            ConflictSegment::Text(lines) => lines.iter().for_each(|l| out.push_str(l)),
210            ConflictSegment::Conflict(region) => {
211                out.push_str(&region.marker_ours);
212                region.ours.iter().for_each(|l| out.push_str(l));
213                if let Some(marker) = &region.marker_base {
214                    out.push_str(marker);
215                    if let Some(base) = &region.base {
216                        base.iter().for_each(|l| out.push_str(l));
217                    }
218                }
219                out.push_str(&region.marker_sep);
220                region.theirs.iter().for_each(|l| out.push_str(l));
221                out.push_str(&region.marker_end);
222            }
223        }
224    }
225    out
226}
227
228/// Produce the file content with every conflict resolved to `side`.
229///
230/// Errors with a clear message when `side` is [`ResolutionSide::Base`] and a
231/// region has no base (2-way `merge` style records none).
232pub fn resolve(segments: &[ConflictSegment], side: ResolutionSide) -> Result<String> {
233    let mut out = String::new();
234    for segment in segments {
235        match segment {
236            ConflictSegment::Text(lines) => lines.iter().for_each(|l| out.push_str(l)),
237            ConflictSegment::Conflict(region) => {
238                let chosen = match side {
239                    ResolutionSide::Ours => &region.ours,
240                    ResolutionSide::Theirs => &region.theirs,
241                    ResolutionSide::Base => region.base.as_ref().ok_or_else(|| Error::Spawn {
242                        program: BINARY.to_string(),
243                        source: std::io::Error::new(
244                            std::io::ErrorKind::InvalidInput,
245                            "cannot resolve to Base: this conflict records no base \
246                             (2-way `merge` style; use diff3/zdiff3)",
247                        ),
248                    })?,
249                };
250                chosen.iter().for_each(|l| out.push_str(l));
251            }
252        }
253    }
254    Ok(out)
255}
256
257#[cfg(test)]
258mod tests {
259    use super::*;
260
261    const MERGE_2WAY: &str =
262        "line 1\n<<<<<<< HEAD\nmain line 2\n=======\nfeature line 2\n>>>>>>> feature\nline 3\n";
263    const DIFF3: &str = "line 1\n<<<<<<< HEAD\nmain line 2\n||||||| 0b025ce\nline 2\n=======\nfeature line 2\n>>>>>>> feature\nline 3\n";
264
265    #[test]
266    fn parses_two_way_merge_style() {
267        let segments = parse_conflicts(MERGE_2WAY).expect("parse");
268        assert_eq!(segments.len(), 3);
269        let ConflictSegment::Conflict(region) = &segments[1] else {
270            panic!("expected a conflict, got {segments:?}");
271        };
272        assert_eq!(region.ours_label, "HEAD");
273        assert_eq!(region.theirs_label, "feature");
274        assert_eq!(region.ours, ["main line 2\n"]);
275        assert_eq!(region.theirs, ["feature line 2\n"]);
276        assert!(region.base.is_none());
277        assert_eq!(region.marker_len, 7);
278    }
279
280    #[test]
281    fn parses_diff3_with_base() {
282        let segments = parse_conflicts(DIFF3).expect("parse");
283        let ConflictSegment::Conflict(region) = &segments[1] else {
284            panic!("expected a conflict");
285        };
286        assert_eq!(region.base_label.as_deref(), Some("0b025ce"));
287        assert_eq!(region.base.as_deref(), Some(&["line 2\n".to_string()][..]));
288    }
289
290    // Proptest-found regression (seed committed in proptest-regressions/): a
291    // SECOND `|`-run line inside a diff3 region is base *content*, not a
292    // replacement base marker — the overwrite used to drop a line on render,
293    // breaking the byte-exact roundtrip.
294    #[test]
295    fn repeated_base_marker_line_is_base_content() {
296        let s = "<<<<<<<< HEAD\n|||||||| base\n|||||||| base\n========\n>>>>>>>> branché\n";
297        let segments = parse_conflicts(s).expect("parse");
298        let ConflictSegment::Conflict(region) = &segments[0] else {
299            panic!("expected a conflict, got {segments:?}");
300        };
301        assert_eq!(
302            region.base.as_deref(),
303            Some(&["|||||||| base\n".to_string()][..]),
304            "the second |-run line is content of the base section"
305        );
306        assert_eq!(render(&segments), s, "roundtrip must be byte-exact");
307    }
308
309    // Roundtrip must be byte-exact — including CRLF, custom marker sizes,
310    // and a conflict at EOF with no trailing newline.
311    #[test]
312    fn render_roundtrips_exactly() {
313        let crlf = "a\r\n<<<<<<< HEAD\r\nours\r\n=======\r\ntheirs\r\n>>>>>>> b\r\nz\r\n";
314        let wide = "<<<<<<<<<<<<<<< HEAD\nours\n===============\ntheirs\n>>>>>>>>>>>>>>> b\n";
315        let eof = "x\n<<<<<<< HEAD\nours\n=======\ntheirs\n>>>>>>> b";
316        for sample in [MERGE_2WAY, DIFF3, crlf, wide, eof] {
317            let segments = parse_conflicts(sample).expect("parse");
318            assert_eq!(render(&segments), sample, "roundtrip");
319        }
320        // The wide sample detected the larger marker run.
321        let segments = parse_conflicts(wide).unwrap();
322        let ConflictSegment::Conflict(region) = &segments[0] else {
323            panic!()
324        };
325        assert_eq!(region.marker_len, 15);
326    }
327
328    #[test]
329    fn resolve_takes_one_side_everywhere() {
330        let two = format!("{MERGE_2WAY}between\n{MERGE_2WAY}");
331        let segments = parse_conflicts(&two).expect("parse");
332        assert_eq!(
333            resolve(&segments, ResolutionSide::Ours).unwrap(),
334            "line 1\nmain line 2\nline 3\nbetween\nline 1\nmain line 2\nline 3\n"
335        );
336        assert_eq!(
337            resolve(&segments, ResolutionSide::Theirs).unwrap(),
338            "line 1\nfeature line 2\nline 3\nbetween\nline 1\nfeature line 2\nline 3\n"
339        );
340        // No base recorded in merge style → Base resolution is refused.
341        assert!(resolve(&segments, ResolutionSide::Base).is_err());
342
343        let diff3 = parse_conflicts(DIFF3).expect("parse");
344        assert_eq!(
345            resolve(&diff3, ResolutionSide::Base).unwrap(),
346            "line 1\nline 2\nline 3\n"
347        );
348    }
349
350    #[test]
351    fn empty_sides_and_clean_files_parse() {
352        // One side deleted everything.
353        let deletion = "<<<<<<< HEAD\n=======\nkept\n>>>>>>> b\n";
354        let segments = parse_conflicts(deletion).expect("parse");
355        assert_eq!(resolve(&segments, ResolutionSide::Ours).unwrap(), "");
356        // A file without conflicts is one text segment.
357        let clean = parse_conflicts("just\ntext\n").expect("parse");
358        assert_eq!(clean.len(), 1);
359        assert!(!has_conflict_markers("just\ntext\n"));
360        assert!(has_conflict_markers(MERGE_2WAY));
361    }
362
363    #[test]
364    fn malformed_files_are_parse_errors() {
365        // Only a genuinely broken *region* (an opener with no separator/terminator)
366        // is an error.
367        for bad in [
368            "<<<<<<< HEAD\nours\n",                  // no separator
369            "<<<<<<< HEAD\nours\n=======\ntheirs\n", // no terminator
370        ] {
371            assert!(
372                matches!(parse_conflicts(bad), Err(Error::Parse { .. })),
373                "{bad:?} must fail"
374            );
375        }
376    }
377
378    // A `=======`/`>>>>>>>` run outside any region is ordinary content (Markdown
379    // underline, divider, quoted email), not a malformed conflict — parsed as text,
380    // never an error, and round-trips byte-exact. (H6)
381    #[test]
382    fn marker_like_content_outside_a_region_is_text() {
383        for content in [
384            "Heading\n=======\nbody\n",          // RST/Markdown setext underline
385            "a\n=======================\nb\n",   // divider banner
386            ">>>>>>> deep email quote\nreply\n", // quoted email
387            "code: a <<<<<<< b\n",               // marker run not at line start
388        ] {
389            let segments = parse_conflicts(content).expect("parses as text, no error");
390            assert!(
391                segments
392                    .iter()
393                    .all(|s| matches!(s, ConflictSegment::Text(_))),
394                "{content:?} must be all text, got {segments:?}"
395            );
396            assert_eq!(render(&segments), content, "round-trips byte-exact");
397        }
398    }
399}
400
401// Property-based fuzzing. The marker grammar slices on marker-run lengths and
402// must never panic on a hostile file (a real conflicted file from a git we
403// don't control), and `render(parse(x)?) == x` must hold byte-for-byte — the
404// regression net for the marker-detection / byte-offset logic.
405#[cfg(test)]
406mod proptests {
407    use super::*;
408    use proptest::prelude::*;
409
410    /// A line drawn from the conflict-marker vocabulary plus multibyte text,
411    /// with variable marker-run lengths (7..16) and CRLF, so a joined document
412    /// reaches the marker-slicing branches with adversarial content.
413    fn conflict_line() -> impl Strategy<Value = String> {
414        prop_oneof![
415            (7usize..16).prop_map(|n| format!("{} HEAD\n", "<".repeat(n))),
416            (7usize..16).prop_map(|n| format!("{}\n", "=".repeat(n))),
417            (7usize..16).prop_map(|n| format!("{} branché\n", ">".repeat(n))),
418            (7usize..16).prop_map(|n| format!("{} base\n", "|".repeat(n))),
419            "[a-zé<>=|]{0,14}\r?\n", // text incl. marker-ish chars + multibyte + CRLF
420            Just("\n".to_string()),
421        ]
422    }
423
424    fn conflict_doc() -> impl Strategy<Value = String> {
425        prop::collection::vec(conflict_line(), 0..30).prop_map(|lines| lines.concat())
426    }
427
428    proptest! {
429        #[test]
430        fn parse_never_panics_on_arbitrary_text(s in any::<String>()) {
431            let _ = has_conflict_markers(&s);
432            // Whatever arbitrary text happens to parse must also round-trip
433            // byte-exact — the load-bearing invariant, asserted on this generator
434            // too (not just the structured one below).
435            if let Ok(segments) = parse_conflicts(&s) {
436                prop_assert_eq!(render(&segments), s);
437            }
438        }
439
440        #[test]
441        fn parse_never_panics_on_structured_text(s in conflict_doc()) {
442            let _ = parse_conflicts(&s);
443        }
444
445        // The load-bearing invariant: whenever the file parses, re-rendering is
446        // byte-exact.
447        #[test]
448        fn render_roundtrips_whatever_parses(s in conflict_doc()) {
449            if let Ok(segments) = parse_conflicts(&s) {
450                prop_assert_eq!(render(&segments), s);
451            }
452        }
453
454        // A marker-free file is one Text segment that renders back unchanged.
455        #[test]
456        fn marker_free_files_are_a_single_text_segment(s in "[a-zé \t\r\n]{0,80}") {
457            prop_assume!(!has_conflict_markers(&s));
458            let segments = parse_conflicts(&s).expect("no markers → Ok");
459            prop_assert_eq!(render(&segments), s);
460        }
461    }
462}