Skip to main content

vcs_git/
conflict.rs

1//! Typed model of git conflict markers — parse a conflicted file's *content*
2//! into structured regions and write a chosen resolution back. Pure functions
3//! (no subprocess), so everything here is hermetic.
4//!
5//! Handles git's three `merge.conflictStyle`s with one grammar: `merge`
6//! (2-way: ours/theirs), `diff3` (3-way: ours/base/theirs), and `zdiff3`
7//! (same markers as diff3 — the common affixes are already outside the
8//! region). Marker length is variable (`merge.conflictMarkerSize`, default 7)
9//! and is detected per region. Lines are kept verbatim (including `\r\n` and
10//! a missing trailing newline), so [`render`] is a byte-exact roundtrip.
11//!
12//! jj note: files materialized with jj's `ui.conflict-marker-style = "git"`
13//! use this exact grammar (with jj's own labels) and parse here; jj's native
14//! `diff`/`snapshot` styles live in `vcs_jj::conflict`.
15
16use processkit::{Error, Result};
17
18use crate::BINARY;
19
20/// Which side of a conflict a resolution keeps.
21#[derive(Debug, Clone, Copy, PartialEq, Eq)]
22pub enum ResolutionSide {
23    /// The `<<<<<<<` side (typically `HEAD`).
24    Ours,
25    /// The `|||||||` base (diff3/zdiff3 only).
26    Base,
27    /// The `>>>>>>>` side (the merged-in branch).
28    Theirs,
29}
30
31/// One conflicted region: the lines of each side plus the verbatim marker
32/// lines (kept so rendering is byte-exact).
33///
34/// All line vectors store lines **with** their original endings; the last
35/// line of a file may have none.
36#[derive(Debug, Clone, PartialEq, Eq)]
37#[non_exhaustive]
38pub struct ConflictRegion {
39    /// Label after the `<<<<<<<` marker (e.g. `HEAD`); empty when absent.
40    pub ours_label: String,
41    /// Label after the `|||||||` marker; `None` for 2-way conflicts.
42    pub base_label: Option<String>,
43    /// Label after the `>>>>>>>` marker (e.g. the branch name).
44    pub theirs_label: String,
45    /// The `<<<<<<<`-side lines.
46    pub ours: Vec<String>,
47    /// The base lines (`diff3`/`zdiff3`); `None` for 2-way conflicts.
48    pub base: Option<Vec<String>>,
49    /// The `>>>>>>>`-side lines.
50    pub theirs: Vec<String>,
51    /// The marker run length (7 unless `merge.conflictMarkerSize` raised it).
52    pub marker_len: usize,
53    // Verbatim marker lines, for byte-exact rendering.
54    marker_ours: String,
55    marker_base: Option<String>,
56    marker_sep: String,
57    marker_end: String,
58}
59
60/// A conflicted file as a sequence of plain-text runs and conflict regions —
61/// the shape that keeps [`render`] a byte-exact roundtrip.
62#[derive(Debug, Clone, PartialEq, Eq)]
63pub enum ConflictSegment {
64    /// Lines outside any conflict (verbatim, endings included).
65    Text(Vec<String>),
66    /// One conflicted region (boxed — much larger than a text run).
67    Conflict(Box<ConflictRegion>),
68}
69
70/// Does `content` contain a line that looks like a conflict-start marker?
71/// Cheap pre-check before a full [`parse_conflicts`].
72pub fn has_conflict_markers(content: &str) -> bool {
73    content
74        .split_inclusive('\n')
75        .any(|line| marker_run(line, '<').is_some_and(|n| n >= 7))
76}
77
78/// The length of the leading `ch` run when `line` is a marker line for it:
79/// the run must be followed by a space + label, or end the line.
80fn marker_run(line: &str, ch: char) -> Option<usize> {
81    let trimmed = line.trim_end_matches(['\r', '\n']);
82    let n = trimmed.chars().take_while(|&c| c == ch).count();
83    if n == 0 {
84        return None;
85    }
86    let rest = &trimmed[n..];
87    (rest.is_empty() || rest.starts_with(' ')).then_some(n)
88}
89
90/// The label after an `n`-char marker run (empty when none).
91fn marker_label(line: &str, n: usize) -> String {
92    line.trim_end_matches(['\r', '\n'])[n..]
93        .trim_start()
94        .to_string()
95}
96
97fn parse_error(message: String) -> Error {
98    Error::Parse {
99        program: BINARY.to_string(),
100        message,
101    }
102}
103
104/// Parse a conflicted file's content into text/conflict segments.
105///
106/// Errors with [`Error::Parse`] on a malformed file: a region missing its
107/// `=======` separator or `>>>>>>>` terminator, or a stray separator/end
108/// marker outside a region.
109pub fn parse_conflicts(content: &str) -> Result<Vec<ConflictSegment>> {
110    let mut segments = Vec::new();
111    let mut text: Vec<String> = Vec::new();
112    let mut lines = content.split_inclusive('\n').peekable();
113
114    while let Some(line) = lines.next() {
115        // A region starts at a `<<<<<<<`-run of length ≥ 7.
116        let Some(n) = marker_run(line, '<').filter(|&n| n >= 7) else {
117            if marker_run(line, '=').is_some_and(|m| m >= 7)
118                || marker_run(line, '>').is_some_and(|m| m >= 7)
119            {
120                return Err(parse_error(format!(
121                    "conflict marker outside a region: {:?}",
122                    line.trim_end()
123                )));
124            }
125            text.push(line.to_string());
126            continue;
127        };
128        if !text.is_empty() {
129            segments.push(ConflictSegment::Text(std::mem::take(&mut text)));
130        }
131
132        let marker_ours = line.to_string();
133        let ours_label = marker_label(line, n);
134        let mut ours = Vec::new();
135        let mut base: Option<Vec<String>> = None;
136        let mut marker_base = None;
137        let mut base_label = None;
138
139        // Ours, until the base marker (diff3) or the separator.
140        let marker_sep = loop {
141            let Some(line) = lines.next() else {
142                return Err(parse_error(format!(
143                    "unterminated conflict (no ======= after {:?})",
144                    marker_ours.trim_end()
145                )));
146            };
147            // Only the FIRST `|`-run is the diff3 base marker; a later matching
148            // line is base *content* (a region has exactly one base marker — a
149            // repeated one used to overwrite it and lose a line on render).
150            if base.is_none() && marker_run(line, '|') == Some(n) {
151                base_label = Some(marker_label(line, n));
152                marker_base = Some(line.to_string());
153                base = Some(Vec::new());
154                continue;
155            }
156            if marker_run(line, '=') == Some(n) {
157                break line.to_string();
158            }
159            match &mut base {
160                Some(base_lines) => base_lines.push(line.to_string()),
161                None => ours.push(line.to_string()),
162            }
163        };
164
165        // Theirs, until the end marker.
166        let mut theirs = Vec::new();
167        let marker_end = loop {
168            let Some(line) = lines.next() else {
169                return Err(parse_error(format!(
170                    "unterminated conflict (no >>>>>>> after {:?})",
171                    marker_ours.trim_end()
172                )));
173            };
174            if marker_run(line, '>') == Some(n) {
175                break line.to_string();
176            }
177            theirs.push(line.to_string());
178        };
179        let theirs_label = marker_label(&marker_end, n);
180
181        segments.push(ConflictSegment::Conflict(Box::new(ConflictRegion {
182            ours_label,
183            base_label,
184            theirs_label,
185            ours,
186            base,
187            theirs,
188            marker_len: n,
189            marker_ours,
190            marker_base,
191            marker_sep,
192            marker_end,
193        })));
194    }
195    if !text.is_empty() {
196        segments.push(ConflictSegment::Text(text));
197    }
198    Ok(segments)
199}
200
201/// Re-render segments verbatim — the byte-exact inverse of
202/// [`parse_conflicts`].
203pub fn render(segments: &[ConflictSegment]) -> String {
204    let mut out = String::new();
205    for segment in segments {
206        match segment {
207            ConflictSegment::Text(lines) => lines.iter().for_each(|l| out.push_str(l)),
208            ConflictSegment::Conflict(region) => {
209                out.push_str(&region.marker_ours);
210                region.ours.iter().for_each(|l| out.push_str(l));
211                if let Some(marker) = &region.marker_base {
212                    out.push_str(marker);
213                    if let Some(base) = &region.base {
214                        base.iter().for_each(|l| out.push_str(l));
215                    }
216                }
217                out.push_str(&region.marker_sep);
218                region.theirs.iter().for_each(|l| out.push_str(l));
219                out.push_str(&region.marker_end);
220            }
221        }
222    }
223    out
224}
225
226/// Produce the file content with every conflict resolved to `side`.
227///
228/// Errors with a clear message when `side` is [`ResolutionSide::Base`] and a
229/// region has no base (2-way `merge` style records none).
230pub fn resolve(segments: &[ConflictSegment], side: ResolutionSide) -> Result<String> {
231    let mut out = String::new();
232    for segment in segments {
233        match segment {
234            ConflictSegment::Text(lines) => lines.iter().for_each(|l| out.push_str(l)),
235            ConflictSegment::Conflict(region) => {
236                let chosen = match side {
237                    ResolutionSide::Ours => &region.ours,
238                    ResolutionSide::Theirs => &region.theirs,
239                    ResolutionSide::Base => region.base.as_ref().ok_or_else(|| Error::Spawn {
240                        program: BINARY.to_string(),
241                        source: std::io::Error::new(
242                            std::io::ErrorKind::InvalidInput,
243                            "cannot resolve to Base: this conflict records no base \
244                             (2-way `merge` style; use diff3/zdiff3)",
245                        ),
246                    })?,
247                };
248                chosen.iter().for_each(|l| out.push_str(l));
249            }
250        }
251    }
252    Ok(out)
253}
254
255#[cfg(test)]
256mod tests {
257    use super::*;
258
259    const MERGE_2WAY: &str =
260        "line 1\n<<<<<<< HEAD\nmain line 2\n=======\nfeature line 2\n>>>>>>> feature\nline 3\n";
261    const DIFF3: &str = "line 1\n<<<<<<< HEAD\nmain line 2\n||||||| 0b025ce\nline 2\n=======\nfeature line 2\n>>>>>>> feature\nline 3\n";
262
263    #[test]
264    fn parses_two_way_merge_style() {
265        let segments = parse_conflicts(MERGE_2WAY).expect("parse");
266        assert_eq!(segments.len(), 3);
267        let ConflictSegment::Conflict(region) = &segments[1] else {
268            panic!("expected a conflict, got {segments:?}");
269        };
270        assert_eq!(region.ours_label, "HEAD");
271        assert_eq!(region.theirs_label, "feature");
272        assert_eq!(region.ours, ["main line 2\n"]);
273        assert_eq!(region.theirs, ["feature line 2\n"]);
274        assert!(region.base.is_none());
275        assert_eq!(region.marker_len, 7);
276    }
277
278    #[test]
279    fn parses_diff3_with_base() {
280        let segments = parse_conflicts(DIFF3).expect("parse");
281        let ConflictSegment::Conflict(region) = &segments[1] else {
282            panic!("expected a conflict");
283        };
284        assert_eq!(region.base_label.as_deref(), Some("0b025ce"));
285        assert_eq!(region.base.as_deref(), Some(&["line 2\n".to_string()][..]));
286    }
287
288    // Proptest-found regression (seed committed in proptest-regressions/): a
289    // SECOND `|`-run line inside a diff3 region is base *content*, not a
290    // replacement base marker — the overwrite used to drop a line on render,
291    // breaking the byte-exact roundtrip.
292    #[test]
293    fn repeated_base_marker_line_is_base_content() {
294        let s = "<<<<<<<< HEAD\n|||||||| base\n|||||||| base\n========\n>>>>>>>> branché\n";
295        let segments = parse_conflicts(s).expect("parse");
296        let ConflictSegment::Conflict(region) = &segments[0] else {
297            panic!("expected a conflict, got {segments:?}");
298        };
299        assert_eq!(
300            region.base.as_deref(),
301            Some(&["|||||||| base\n".to_string()][..]),
302            "the second |-run line is content of the base section"
303        );
304        assert_eq!(render(&segments), s, "roundtrip must be byte-exact");
305    }
306
307    // Roundtrip must be byte-exact — including CRLF, custom marker sizes,
308    // and a conflict at EOF with no trailing newline.
309    #[test]
310    fn render_roundtrips_exactly() {
311        let crlf = "a\r\n<<<<<<< HEAD\r\nours\r\n=======\r\ntheirs\r\n>>>>>>> b\r\nz\r\n";
312        let wide = "<<<<<<<<<<<<<<< HEAD\nours\n===============\ntheirs\n>>>>>>>>>>>>>>> b\n";
313        let eof = "x\n<<<<<<< HEAD\nours\n=======\ntheirs\n>>>>>>> b";
314        for sample in [MERGE_2WAY, DIFF3, crlf, wide, eof] {
315            let segments = parse_conflicts(sample).expect("parse");
316            assert_eq!(render(&segments), sample, "roundtrip");
317        }
318        // The wide sample detected the larger marker run.
319        let segments = parse_conflicts(wide).unwrap();
320        let ConflictSegment::Conflict(region) = &segments[0] else {
321            panic!()
322        };
323        assert_eq!(region.marker_len, 15);
324    }
325
326    #[test]
327    fn resolve_takes_one_side_everywhere() {
328        let two = format!("{MERGE_2WAY}between\n{MERGE_2WAY}");
329        let segments = parse_conflicts(&two).expect("parse");
330        assert_eq!(
331            resolve(&segments, ResolutionSide::Ours).unwrap(),
332            "line 1\nmain line 2\nline 3\nbetween\nline 1\nmain line 2\nline 3\n"
333        );
334        assert_eq!(
335            resolve(&segments, ResolutionSide::Theirs).unwrap(),
336            "line 1\nfeature line 2\nline 3\nbetween\nline 1\nfeature line 2\nline 3\n"
337        );
338        // No base recorded in merge style → Base resolution is refused.
339        assert!(resolve(&segments, ResolutionSide::Base).is_err());
340
341        let diff3 = parse_conflicts(DIFF3).expect("parse");
342        assert_eq!(
343            resolve(&diff3, ResolutionSide::Base).unwrap(),
344            "line 1\nline 2\nline 3\n"
345        );
346    }
347
348    #[test]
349    fn empty_sides_and_clean_files_parse() {
350        // One side deleted everything.
351        let deletion = "<<<<<<< HEAD\n=======\nkept\n>>>>>>> b\n";
352        let segments = parse_conflicts(deletion).expect("parse");
353        assert_eq!(resolve(&segments, ResolutionSide::Ours).unwrap(), "");
354        // A file without conflicts is one text segment.
355        let clean = parse_conflicts("just\ntext\n").expect("parse");
356        assert_eq!(clean.len(), 1);
357        assert!(!has_conflict_markers("just\ntext\n"));
358        assert!(has_conflict_markers(MERGE_2WAY));
359    }
360
361    #[test]
362    fn malformed_files_are_parse_errors() {
363        for bad in [
364            "<<<<<<< HEAD\nours\n",                  // no separator
365            "<<<<<<< HEAD\nours\n=======\ntheirs\n", // no terminator
366            "=======\n",                             // stray separator
367            ">>>>>>> b\n",                           // stray end
368        ] {
369            assert!(
370                matches!(parse_conflicts(bad), Err(Error::Parse { .. })),
371                "{bad:?} must fail"
372            );
373        }
374    }
375}
376
377// Property-based fuzzing. The marker grammar slices on marker-run lengths and
378// must never panic on a hostile file (a real conflicted file from a git we
379// don't control), and `render(parse(x)?) == x` must hold byte-for-byte — the
380// regression net for the marker-detection / byte-offset logic.
381#[cfg(test)]
382mod proptests {
383    use super::*;
384    use proptest::prelude::*;
385
386    /// A line drawn from the conflict-marker vocabulary plus multibyte text,
387    /// with variable marker-run lengths (7..16) and CRLF, so a joined document
388    /// reaches the marker-slicing branches with adversarial content.
389    fn conflict_line() -> impl Strategy<Value = String> {
390        prop_oneof![
391            (7usize..16).prop_map(|n| format!("{} HEAD\n", "<".repeat(n))),
392            (7usize..16).prop_map(|n| format!("{}\n", "=".repeat(n))),
393            (7usize..16).prop_map(|n| format!("{} branché\n", ">".repeat(n))),
394            (7usize..16).prop_map(|n| format!("{} base\n", "|".repeat(n))),
395            "[a-zé<>=|]{0,14}\r?\n", // text incl. marker-ish chars + multibyte + CRLF
396            Just("\n".to_string()),
397        ]
398    }
399
400    fn conflict_doc() -> impl Strategy<Value = String> {
401        prop::collection::vec(conflict_line(), 0..30).prop_map(|lines| lines.concat())
402    }
403
404    proptest! {
405        #[test]
406        fn parse_never_panics_on_arbitrary_text(s in any::<String>()) {
407            let _ = has_conflict_markers(&s);
408            // Whatever arbitrary text happens to parse must also round-trip
409            // byte-exact — the load-bearing invariant, asserted on this generator
410            // too (not just the structured one below).
411            if let Ok(segments) = parse_conflicts(&s) {
412                prop_assert_eq!(render(&segments), s);
413            }
414        }
415
416        #[test]
417        fn parse_never_panics_on_structured_text(s in conflict_doc()) {
418            let _ = parse_conflicts(&s);
419        }
420
421        // The load-bearing invariant: whenever the file parses, re-rendering is
422        // byte-exact.
423        #[test]
424        fn render_roundtrips_whatever_parses(s in conflict_doc()) {
425            if let Ok(segments) = parse_conflicts(&s) {
426                prop_assert_eq!(render(&segments), s);
427            }
428        }
429
430        // A marker-free file is one Text segment that renders back unchanged.
431        #[test]
432        fn marker_free_files_are_a_single_text_segment(s in "[a-zé \t\r\n]{0,80}") {
433            prop_assume!(!has_conflict_markers(&s));
434            let segments = parse_conflicts(&s).expect("no markers → Ok");
435            prop_assert_eq!(render(&segments), s);
436        }
437    }
438}