jj_lib/
conflicts.rs

1// Copyright 2020 The Jujutsu Authors
2//
3// Licensed under the Apache License, Version 2.0 (the "License");
4// you may not use this file except in compliance with the License.
5// You may obtain a copy of the License at
6//
7// https://www.apache.org/licenses/LICENSE-2.0
8//
9// Unless required by applicable law or agreed to in writing, software
10// distributed under the License is distributed on an "AS IS" BASIS,
11// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12// See the License for the specific language governing permissions and
13// limitations under the License.
14
15#![allow(missing_docs)]
16
17use std::io;
18use std::io::Read;
19use std::io::Write;
20use std::iter::zip;
21
22use bstr::BString;
23use bstr::ByteSlice as _;
24use futures::stream::BoxStream;
25use futures::try_join;
26use futures::Stream;
27use futures::StreamExt as _;
28use futures::TryStreamExt as _;
29use itertools::Itertools as _;
30use pollster::FutureExt as _;
31
32use crate::backend::BackendError;
33use crate::backend::BackendResult;
34use crate::backend::CommitId;
35use crate::backend::FileId;
36use crate::backend::SymlinkId;
37use crate::backend::TreeId;
38use crate::backend::TreeValue;
39use crate::copies::CopiesTreeDiffEntry;
40use crate::copies::CopiesTreeDiffEntryPath;
41use crate::diff::Diff;
42use crate::diff::DiffHunk;
43use crate::diff::DiffHunkKind;
44use crate::files;
45use crate::files::MergeResult;
46use crate::merge::Merge;
47use crate::merge::MergeBuilder;
48use crate::merge::MergedTreeValue;
49use crate::repo_path::RepoPath;
50use crate::store::Store;
51
52/// Minimum length of conflict markers.
53pub const MIN_CONFLICT_MARKER_LEN: usize = 7;
54
55/// If a file already contains lines which look like conflict markers of length
56/// N, then the conflict markers we add will be of length (N + increment). This
57/// number is chosen to make the conflict markers noticeably longer than the
58/// existing markers.
59const CONFLICT_MARKER_LEN_INCREMENT: usize = 4;
60
61/// Comment for missing terminating newline in a term of a conflict.
62const NO_EOL_COMMENT: &str = " (no terminating newline)";
63
64/// Comment for missing terminating newline in the "add" side of a diff.
65const ADD_NO_EOL_COMMENT: &str = " (removes terminating newline)";
66
67/// Comment for missing terminating newline in the "remove" side of a diff.
68const REMOVE_NO_EOL_COMMENT: &str = " (adds terminating newline)";
69
70fn write_diff_hunks(hunks: &[DiffHunk], file: &mut dyn Write) -> io::Result<()> {
71    for hunk in hunks {
72        match hunk.kind {
73            DiffHunkKind::Matching => {
74                debug_assert!(hunk.contents.iter().all_equal());
75                for line in hunk.contents[0].lines_with_terminator() {
76                    file.write_all(b" ")?;
77                    write_and_ensure_newline(file, line)?;
78                }
79            }
80            DiffHunkKind::Different => {
81                for line in hunk.contents[0].lines_with_terminator() {
82                    file.write_all(b"-")?;
83                    write_and_ensure_newline(file, line)?;
84                }
85                for line in hunk.contents[1].lines_with_terminator() {
86                    file.write_all(b"+")?;
87                    write_and_ensure_newline(file, line)?;
88                }
89            }
90        }
91    }
92    Ok(())
93}
94
95async fn get_file_contents(
96    store: &Store,
97    path: &RepoPath,
98    term: &Option<FileId>,
99) -> BackendResult<BString> {
100    match term {
101        Some(id) => {
102            let mut content = vec![];
103            store
104                .read_file_async(path, id)
105                .await?
106                .read_to_end(&mut content)
107                .map_err(|err| BackendError::ReadFile {
108                    path: path.to_owned(),
109                    id: id.clone(),
110                    source: err.into(),
111                })?;
112            Ok(BString::new(content))
113        }
114        // If the conflict had removed the file on one side, we pretend that the file
115        // was empty there.
116        None => Ok(BString::new(vec![])),
117    }
118}
119
120pub async fn extract_as_single_hunk(
121    merge: &Merge<Option<FileId>>,
122    store: &Store,
123    path: &RepoPath,
124) -> BackendResult<Merge<BString>> {
125    let builder: MergeBuilder<BString> = futures::stream::iter(merge.iter())
126        .then(|term| get_file_contents(store, path, term))
127        .try_collect()
128        .await?;
129    Ok(builder.build())
130}
131
132/// A type similar to `MergedTreeValue` but with associated data to include in
133/// e.g. the working copy or in a diff.
134pub enum MaterializedTreeValue {
135    Absent,
136    AccessDenied(Box<dyn std::error::Error + Send + Sync>),
137    File(MaterializedFileValue),
138    Symlink {
139        id: SymlinkId,
140        target: String,
141    },
142    FileConflict {
143        id: Merge<Option<FileId>>,
144        // TODO: or Vec<(FileId, Box<dyn Read>)> so that caller can stop reading
145        // when null bytes found?
146        contents: Merge<BString>,
147        executable: bool,
148    },
149    OtherConflict {
150        id: MergedTreeValue,
151    },
152    GitSubmodule(CommitId),
153    Tree(TreeId),
154}
155
156impl MaterializedTreeValue {
157    pub fn is_absent(&self) -> bool {
158        matches!(self, MaterializedTreeValue::Absent)
159    }
160
161    pub fn is_present(&self) -> bool {
162        !self.is_absent()
163    }
164}
165
166/// [`TreeValue::File`] with file content `reader`.
167pub struct MaterializedFileValue {
168    pub id: FileId,
169    pub executable: bool,
170    pub reader: Box<dyn Read>,
171}
172
173impl MaterializedFileValue {
174    /// Reads file content until EOF. The provided `path` is used only for error
175    /// reporting purpose.
176    pub fn read_all(&mut self, path: &RepoPath) -> BackendResult<Vec<u8>> {
177        let mut buf = Vec::new();
178        self.reader
179            .read_to_end(&mut buf)
180            .map_err(|err| BackendError::ReadFile {
181                path: path.to_owned(),
182                id: self.id.clone(),
183                source: err.into(),
184            })?;
185        Ok(buf)
186    }
187}
188
189/// Reads the data associated with a `MergedTreeValue` so it can be written to
190/// e.g. the working copy or diff.
191pub async fn materialize_tree_value(
192    store: &Store,
193    path: &RepoPath,
194    value: MergedTreeValue,
195) -> BackendResult<MaterializedTreeValue> {
196    match materialize_tree_value_no_access_denied(store, path, value).await {
197        Err(BackendError::ReadAccessDenied { source, .. }) => {
198            Ok(MaterializedTreeValue::AccessDenied(source))
199        }
200        result => result,
201    }
202}
203
204async fn materialize_tree_value_no_access_denied(
205    store: &Store,
206    path: &RepoPath,
207    value: MergedTreeValue,
208) -> BackendResult<MaterializedTreeValue> {
209    match value.into_resolved() {
210        Ok(None) => Ok(MaterializedTreeValue::Absent),
211        Ok(Some(TreeValue::File { id, executable })) => {
212            let reader = store.read_file_async(path, &id).await?;
213            Ok(MaterializedTreeValue::File(MaterializedFileValue {
214                id,
215                executable,
216                reader,
217            }))
218        }
219        Ok(Some(TreeValue::Symlink(id))) => {
220            let target = store.read_symlink_async(path, &id).await?;
221            Ok(MaterializedTreeValue::Symlink { id, target })
222        }
223        Ok(Some(TreeValue::GitSubmodule(id))) => Ok(MaterializedTreeValue::GitSubmodule(id)),
224        Ok(Some(TreeValue::Tree(id))) => Ok(MaterializedTreeValue::Tree(id)),
225        Ok(Some(TreeValue::Conflict(_))) => {
226            panic!("cannot materialize legacy conflict object at path {path:?}");
227        }
228        Err(conflict) => {
229            let Some(file_merge) = conflict.to_file_merge() else {
230                return Ok(MaterializedTreeValue::OtherConflict { id: conflict });
231            };
232            let file_merge = file_merge.simplify();
233            let contents = extract_as_single_hunk(&file_merge, store, path).await?;
234            let executable = if let Some(merge) = conflict.to_executable_merge() {
235                merge.resolve_trivial().copied().unwrap_or_default()
236            } else {
237                false
238            };
239            Ok(MaterializedTreeValue::FileConflict {
240                id: file_merge,
241                contents,
242                executable,
243            })
244        }
245    }
246}
247
248/// Describes what style should be used when materializing conflicts.
249#[derive(Clone, Copy, PartialEq, Eq, Debug, Default, serde::Deserialize)]
250#[serde(rename_all = "kebab-case")]
251pub enum ConflictMarkerStyle {
252    /// Style which shows a snapshot and a series of diffs to apply.
253    #[default]
254    Diff,
255    /// Style which shows a snapshot for each base and side.
256    Snapshot,
257    /// Style which replicates Git's "diff3" style to support external tools.
258    Git,
259}
260
261/// Characters which can be repeated to form a conflict marker line when
262/// materializing and parsing conflicts.
263#[derive(Clone, Copy, PartialEq, Eq)]
264#[repr(u8)]
265enum ConflictMarkerLineChar {
266    ConflictStart = b'<',
267    ConflictEnd = b'>',
268    Add = b'+',
269    Remove = b'-',
270    Diff = b'%',
271    GitAncestor = b'|',
272    GitSeparator = b'=',
273}
274
275impl ConflictMarkerLineChar {
276    /// Get the ASCII byte used for this conflict marker.
277    fn to_byte(self) -> u8 {
278        self as u8
279    }
280
281    /// Parse a byte to see if it corresponds with any kind of conflict marker.
282    fn parse_byte(byte: u8) -> Option<Self> {
283        match byte {
284            b'<' => Some(Self::ConflictStart),
285            b'>' => Some(Self::ConflictEnd),
286            b'+' => Some(Self::Add),
287            b'-' => Some(Self::Remove),
288            b'%' => Some(Self::Diff),
289            b'|' => Some(Self::GitAncestor),
290            b'=' => Some(Self::GitSeparator),
291            _ => None,
292        }
293    }
294}
295
296/// Represents a conflict marker line parsed from the file. Conflict marker
297/// lines consist of a single ASCII character repeated for a certain length.
298struct ConflictMarkerLine {
299    kind: ConflictMarkerLineChar,
300    len: usize,
301}
302
303/// Write a conflict marker to an output file.
304fn write_conflict_marker(
305    output: &mut dyn Write,
306    kind: ConflictMarkerLineChar,
307    len: usize,
308    suffix_text: &str,
309) -> io::Result<()> {
310    let conflict_marker = BString::new(vec![kind.to_byte(); len]);
311
312    if suffix_text.is_empty() {
313        writeln!(output, "{conflict_marker}")
314    } else {
315        writeln!(output, "{conflict_marker} {suffix_text}")
316    }
317}
318
319/// Parse a conflict marker from a line of a file. The conflict marker may have
320/// any length (even less than MIN_CONFLICT_MARKER_LEN).
321fn parse_conflict_marker_any_len(line: &[u8]) -> Option<ConflictMarkerLine> {
322    let first_byte = *line.first()?;
323    let kind = ConflictMarkerLineChar::parse_byte(first_byte)?;
324    let len = line.iter().take_while(|&&b| b == first_byte).count();
325
326    if let Some(next_byte) = line.get(len) {
327        // If there is a character after the marker, it must be ASCII whitespace
328        if !next_byte.is_ascii_whitespace() {
329            return None;
330        }
331    }
332
333    Some(ConflictMarkerLine { kind, len })
334}
335
336/// Parse a conflict marker, expecting it to be at least a certain length. Any
337/// shorter conflict markers are ignored.
338fn parse_conflict_marker(line: &[u8], expected_len: usize) -> Option<ConflictMarkerLineChar> {
339    parse_conflict_marker_any_len(line)
340        .filter(|marker| marker.len >= expected_len)
341        .map(|marker| marker.kind)
342}
343
344/// Given a Merge of files, choose the conflict marker length to use when
345/// materializing conflicts.
346pub fn choose_materialized_conflict_marker_len<T: AsRef<[u8]>>(single_hunk: &Merge<T>) -> usize {
347    let max_existing_marker_len = single_hunk
348        .iter()
349        .flat_map(|file| file.as_ref().lines_with_terminator())
350        .filter_map(parse_conflict_marker_any_len)
351        .map(|marker| marker.len)
352        .max()
353        .unwrap_or_default();
354
355    max_existing_marker_len
356        .saturating_add(CONFLICT_MARKER_LEN_INCREMENT)
357        .max(MIN_CONFLICT_MARKER_LEN)
358}
359
360pub fn materialize_merge_result<T: AsRef<[u8]>>(
361    single_hunk: &Merge<T>,
362    conflict_marker_style: ConflictMarkerStyle,
363    output: &mut dyn Write,
364) -> io::Result<()> {
365    let merge_result = files::merge(single_hunk);
366    match &merge_result {
367        MergeResult::Resolved(content) => output.write_all(content),
368        MergeResult::Conflict(hunks) => {
369            let conflict_marker_len = choose_materialized_conflict_marker_len(single_hunk);
370            materialize_conflict_hunks(hunks, conflict_marker_style, conflict_marker_len, output)
371        }
372    }
373}
374
375pub fn materialize_merge_result_with_marker_len<T: AsRef<[u8]>>(
376    single_hunk: &Merge<T>,
377    conflict_marker_style: ConflictMarkerStyle,
378    conflict_marker_len: usize,
379    output: &mut dyn Write,
380) -> io::Result<()> {
381    let merge_result = files::merge(single_hunk);
382    match &merge_result {
383        MergeResult::Resolved(content) => output.write_all(content),
384        MergeResult::Conflict(hunks) => {
385            materialize_conflict_hunks(hunks, conflict_marker_style, conflict_marker_len, output)
386        }
387    }
388}
389
390pub fn materialize_merge_result_to_bytes<T: AsRef<[u8]>>(
391    single_hunk: &Merge<T>,
392    conflict_marker_style: ConflictMarkerStyle,
393) -> BString {
394    let merge_result = files::merge(single_hunk);
395    match merge_result {
396        MergeResult::Resolved(content) => content,
397        MergeResult::Conflict(hunks) => {
398            let conflict_marker_len = choose_materialized_conflict_marker_len(single_hunk);
399            let mut output = Vec::new();
400            materialize_conflict_hunks(
401                &hunks,
402                conflict_marker_style,
403                conflict_marker_len,
404                &mut output,
405            )
406            .expect("writing to an in-memory buffer should never fail");
407            output.into()
408        }
409    }
410}
411
412pub fn materialize_merge_result_to_bytes_with_marker_len<T: AsRef<[u8]>>(
413    single_hunk: &Merge<T>,
414    conflict_marker_style: ConflictMarkerStyle,
415    conflict_marker_len: usize,
416) -> BString {
417    let merge_result = files::merge(single_hunk);
418    match merge_result {
419        MergeResult::Resolved(content) => content,
420        MergeResult::Conflict(hunks) => {
421            let mut output = Vec::new();
422            materialize_conflict_hunks(
423                &hunks,
424                conflict_marker_style,
425                conflict_marker_len,
426                &mut output,
427            )
428            .expect("writing to an in-memory buffer should never fail");
429            output.into()
430        }
431    }
432}
433
434fn materialize_conflict_hunks(
435    hunks: &[Merge<BString>],
436    conflict_marker_style: ConflictMarkerStyle,
437    conflict_marker_len: usize,
438    output: &mut dyn Write,
439) -> io::Result<()> {
440    let num_conflicts = hunks
441        .iter()
442        .filter(|hunk| hunk.as_resolved().is_none())
443        .count();
444    let mut conflict_index = 0;
445    for hunk in hunks {
446        if let Some(content) = hunk.as_resolved() {
447            output.write_all(content)?;
448        } else {
449            conflict_index += 1;
450            let conflict_info = format!("Conflict {conflict_index} of {num_conflicts}");
451
452            match (conflict_marker_style, hunk.as_slice()) {
453                // 2-sided conflicts can use Git-style conflict markers
454                (ConflictMarkerStyle::Git, [left, base, right]) => {
455                    materialize_git_style_conflict(
456                        left,
457                        base,
458                        right,
459                        &conflict_info,
460                        conflict_marker_len,
461                        output,
462                    )?;
463                }
464                _ => {
465                    materialize_jj_style_conflict(
466                        hunk,
467                        &conflict_info,
468                        conflict_marker_style,
469                        conflict_marker_len,
470                        output,
471                    )?;
472                }
473            }
474        }
475    }
476    Ok(())
477}
478
479fn materialize_git_style_conflict(
480    left: &[u8],
481    base: &[u8],
482    right: &[u8],
483    conflict_info: &str,
484    conflict_marker_len: usize,
485    output: &mut dyn Write,
486) -> io::Result<()> {
487    write_conflict_marker(
488        output,
489        ConflictMarkerLineChar::ConflictStart,
490        conflict_marker_len,
491        &format!("Side #1 ({conflict_info})"),
492    )?;
493    write_and_ensure_newline(output, left)?;
494
495    write_conflict_marker(
496        output,
497        ConflictMarkerLineChar::GitAncestor,
498        conflict_marker_len,
499        "Base",
500    )?;
501    write_and_ensure_newline(output, base)?;
502
503    // VS Code doesn't seem to support any trailing text on the separator line
504    write_conflict_marker(
505        output,
506        ConflictMarkerLineChar::GitSeparator,
507        conflict_marker_len,
508        "",
509    )?;
510
511    write_and_ensure_newline(output, right)?;
512    write_conflict_marker(
513        output,
514        ConflictMarkerLineChar::ConflictEnd,
515        conflict_marker_len,
516        &format!("Side #2 ({conflict_info} ends)"),
517    )?;
518
519    Ok(())
520}
521
522fn materialize_jj_style_conflict(
523    hunk: &Merge<BString>,
524    conflict_info: &str,
525    conflict_marker_style: ConflictMarkerStyle,
526    conflict_marker_len: usize,
527    output: &mut dyn Write,
528) -> io::Result<()> {
529    // Write a positive snapshot (side) of a conflict
530    let write_side = |add_index: usize, data: &[u8], output: &mut dyn Write| {
531        write_conflict_marker(
532            output,
533            ConflictMarkerLineChar::Add,
534            conflict_marker_len,
535            &format!(
536                "Contents of side #{}{}",
537                add_index + 1,
538                maybe_no_eol_comment(data)
539            ),
540        )?;
541        write_and_ensure_newline(output, data)
542    };
543
544    // Write a negative snapshot (base) of a conflict
545    let write_base = |base_str: &str, data: &[u8], output: &mut dyn Write| {
546        write_conflict_marker(
547            output,
548            ConflictMarkerLineChar::Remove,
549            conflict_marker_len,
550            &format!("Contents of {base_str}{}", maybe_no_eol_comment(data)),
551        )?;
552        write_and_ensure_newline(output, data)
553    };
554
555    // Write a diff from a negative term to a positive term
556    let write_diff =
557        |base_str: &str, add_index: usize, diff: &[DiffHunk], output: &mut dyn Write| {
558            let no_eol_remove = diff
559                .last()
560                .is_some_and(|diff_hunk| has_no_eol(diff_hunk.contents[0]));
561            let no_eol_add = diff
562                .last()
563                .is_some_and(|diff_hunk| has_no_eol(diff_hunk.contents[1]));
564            let no_eol_comment = match (no_eol_remove, no_eol_add) {
565                (true, true) => NO_EOL_COMMENT,
566                (true, _) => REMOVE_NO_EOL_COMMENT,
567                (_, true) => ADD_NO_EOL_COMMENT,
568                _ => "",
569            };
570            write_conflict_marker(
571                output,
572                ConflictMarkerLineChar::Diff,
573                conflict_marker_len,
574                &format!(
575                    "Changes from {base_str} to side #{}{no_eol_comment}",
576                    add_index + 1
577                ),
578            )?;
579            write_diff_hunks(diff, output)
580        };
581
582    write_conflict_marker(
583        output,
584        ConflictMarkerLineChar::ConflictStart,
585        conflict_marker_len,
586        conflict_info,
587    )?;
588    let mut add_index = 0;
589    for (base_index, left) in hunk.removes().enumerate() {
590        // The vast majority of conflicts one actually tries to resolve manually have 1
591        // base.
592        let base_str = if hunk.removes().len() == 1 {
593            "base".to_string()
594        } else {
595            format!("base #{}", base_index + 1)
596        };
597
598        let Some(right1) = hunk.get_add(add_index) else {
599            // If we have no more positive terms, emit the remaining negative terms as
600            // snapshots.
601            write_base(&base_str, left, output)?;
602            continue;
603        };
604
605        // For any style other than "diff", always emit sides and bases separately
606        if conflict_marker_style != ConflictMarkerStyle::Diff {
607            write_side(add_index, right1, output)?;
608            write_base(&base_str, left, output)?;
609            add_index += 1;
610            continue;
611        }
612
613        let diff1 = Diff::by_line([&left, &right1]).hunks().collect_vec();
614        // Check if the diff against the next positive term is better. Since we want to
615        // preserve the order of the terms, we don't match against any later positive
616        // terms.
617        if let Some(right2) = hunk.get_add(add_index + 1) {
618            let diff2 = Diff::by_line([&left, &right2]).hunks().collect_vec();
619            if diff_size(&diff2) < diff_size(&diff1) {
620                // If the next positive term is a better match, emit the current positive term
621                // as a snapshot and the next positive term as a diff.
622                write_side(add_index, right1, output)?;
623                write_diff(&base_str, add_index + 1, &diff2, output)?;
624                add_index += 2;
625                continue;
626            }
627        }
628
629        write_diff(&base_str, add_index, &diff1, output)?;
630        add_index += 1;
631    }
632
633    // Emit the remaining positive terms as snapshots.
634    for (add_index, slice) in hunk.adds().enumerate().skip(add_index) {
635        write_side(add_index, slice, output)?;
636    }
637    write_conflict_marker(
638        output,
639        ConflictMarkerLineChar::ConflictEnd,
640        conflict_marker_len,
641        &format!("{conflict_info} ends"),
642    )?;
643    Ok(())
644}
645
646fn maybe_no_eol_comment(slice: &[u8]) -> &'static str {
647    if has_no_eol(slice) {
648        NO_EOL_COMMENT
649    } else {
650        ""
651    }
652}
653
654// Write a chunk of data, ensuring that it doesn't end with a line which is
655// missing its terminating newline.
656fn write_and_ensure_newline(output: &mut dyn Write, data: &[u8]) -> io::Result<()> {
657    output.write_all(data)?;
658    if has_no_eol(data) {
659        writeln!(output)?;
660    }
661    Ok(())
662}
663
664// Check whether a slice is missing its terminating newline character.
665fn has_no_eol(slice: &[u8]) -> bool {
666    slice.last().is_some_and(|&last| last != b'\n')
667}
668
669fn diff_size(hunks: &[DiffHunk]) -> usize {
670    hunks
671        .iter()
672        .map(|hunk| match hunk.kind {
673            DiffHunkKind::Matching => 0,
674            DiffHunkKind::Different => hunk.contents.iter().map(|content| content.len()).sum(),
675        })
676        .sum()
677}
678
679pub struct MaterializedTreeDiffEntry {
680    pub path: CopiesTreeDiffEntryPath,
681    pub values: BackendResult<(MaterializedTreeValue, MaterializedTreeValue)>,
682}
683
684pub fn materialized_diff_stream<'a>(
685    store: &'a Store,
686    tree_diff: BoxStream<'a, CopiesTreeDiffEntry>,
687) -> impl Stream<Item = MaterializedTreeDiffEntry> + use<'a> {
688    tree_diff
689        .map(|CopiesTreeDiffEntry { path, values }| async {
690            match values {
691                Err(err) => MaterializedTreeDiffEntry {
692                    path,
693                    values: Err(err),
694                },
695                Ok((before, after)) => {
696                    let before_future = materialize_tree_value(store, path.source(), before);
697                    let after_future = materialize_tree_value(store, path.target(), after);
698                    let values = try_join!(before_future, after_future);
699                    MaterializedTreeDiffEntry { path, values }
700                }
701            }
702        })
703        .buffered((store.concurrency() / 2).max(1))
704}
705
706/// Parses conflict markers from a slice.
707///
708/// Returns `None` if there were no valid conflict markers. The caller
709/// has to provide the expected number of merge sides (adds). Conflict
710/// markers that are otherwise valid will be considered invalid if
711/// they don't have the expected arity.
712///
713/// All conflict markers in the file must be at least as long as the expected
714/// length. Any shorter conflict markers will be ignored.
715// TODO: "parse" is not usually the opposite of "materialize", so maybe we
716// should rename them to "serialize" and "deserialize"?
717pub fn parse_conflict(
718    input: &[u8],
719    num_sides: usize,
720    expected_marker_len: usize,
721) -> Option<Vec<Merge<BString>>> {
722    if input.is_empty() {
723        return None;
724    }
725    let mut hunks = vec![];
726    let mut pos = 0;
727    let mut resolved_start = 0;
728    let mut conflict_start = None;
729    let mut conflict_start_len = 0;
730    for line in input.lines_with_terminator() {
731        match parse_conflict_marker(line, expected_marker_len) {
732            Some(ConflictMarkerLineChar::ConflictStart) => {
733                conflict_start = Some(pos);
734                conflict_start_len = line.len();
735            }
736            Some(ConflictMarkerLineChar::ConflictEnd) => {
737                if let Some(conflict_start_index) = conflict_start.take() {
738                    let conflict_body = &input[conflict_start_index + conflict_start_len..pos];
739                    let hunk = parse_conflict_hunk(conflict_body, expected_marker_len);
740                    if hunk.num_sides() == num_sides {
741                        let resolved_slice = &input[resolved_start..conflict_start_index];
742                        if !resolved_slice.is_empty() {
743                            hunks.push(Merge::resolved(BString::from(resolved_slice)));
744                        }
745                        hunks.push(hunk);
746                        resolved_start = pos + line.len();
747                    }
748                }
749            }
750            _ => {}
751        }
752        pos += line.len();
753    }
754
755    if hunks.is_empty() {
756        None
757    } else {
758        if resolved_start < input.len() {
759            hunks.push(Merge::resolved(BString::from(&input[resolved_start..])));
760        }
761        Some(hunks)
762    }
763}
764
765/// This method handles parsing both JJ-style and Git-style conflict markers,
766/// meaning that switching conflict marker styles won't prevent existing files
767/// with other conflict marker styles from being parsed successfully. The
768/// conflict marker style to use for parsing is determined based on the first
769/// line of the hunk.
770fn parse_conflict_hunk(input: &[u8], expected_marker_len: usize) -> Merge<BString> {
771    // If the hunk starts with a conflict marker, find its first character
772    let initial_conflict_marker = input
773        .lines_with_terminator()
774        .next()
775        .and_then(|line| parse_conflict_marker(line, expected_marker_len));
776
777    match initial_conflict_marker {
778        // JJ-style conflicts must start with one of these 3 conflict marker lines
779        Some(
780            ConflictMarkerLineChar::Diff
781            | ConflictMarkerLineChar::Remove
782            | ConflictMarkerLineChar::Add,
783        ) => parse_jj_style_conflict_hunk(input, expected_marker_len),
784        // Git-style conflicts either must not start with a conflict marker line, or must start with
785        // the "|||||||" conflict marker line (if the first side was empty)
786        None | Some(ConflictMarkerLineChar::GitAncestor) => {
787            parse_git_style_conflict_hunk(input, expected_marker_len)
788        }
789        // No other conflict markers are allowed at the start of a hunk
790        Some(_) => Merge::resolved(BString::new(vec![])),
791    }
792}
793
794fn parse_jj_style_conflict_hunk(input: &[u8], expected_marker_len: usize) -> Merge<BString> {
795    enum State {
796        Diff,
797        Remove,
798        Add,
799        Unknown,
800    }
801    let mut state = State::Unknown;
802    let mut removes = vec![];
803    let mut adds = vec![];
804    for line in input.lines_with_terminator() {
805        match parse_conflict_marker(line, expected_marker_len) {
806            Some(ConflictMarkerLineChar::Diff) => {
807                state = State::Diff;
808                removes.push(BString::new(vec![]));
809                adds.push(BString::new(vec![]));
810                continue;
811            }
812            Some(ConflictMarkerLineChar::Remove) => {
813                state = State::Remove;
814                removes.push(BString::new(vec![]));
815                continue;
816            }
817            Some(ConflictMarkerLineChar::Add) => {
818                state = State::Add;
819                adds.push(BString::new(vec![]));
820                continue;
821            }
822            _ => {}
823        }
824        match state {
825            State::Diff => {
826                if let Some(rest) = line.strip_prefix(b"-") {
827                    removes.last_mut().unwrap().extend_from_slice(rest);
828                } else if let Some(rest) = line.strip_prefix(b"+") {
829                    adds.last_mut().unwrap().extend_from_slice(rest);
830                } else if let Some(rest) = line.strip_prefix(b" ") {
831                    removes.last_mut().unwrap().extend_from_slice(rest);
832                    adds.last_mut().unwrap().extend_from_slice(rest);
833                } else if line == b"\n" || line == b"\r\n" {
834                    // Some editors strip trailing whitespace, so " \n" might become "\n". It would
835                    // be unfortunate if this prevented the conflict from being parsed, so we add
836                    // the empty line to the "remove" and "add" as if there was a space in front
837                    removes.last_mut().unwrap().extend_from_slice(line);
838                    adds.last_mut().unwrap().extend_from_slice(line);
839                } else {
840                    // Doesn't look like a valid conflict
841                    return Merge::resolved(BString::new(vec![]));
842                }
843            }
844            State::Remove => {
845                removes.last_mut().unwrap().extend_from_slice(line);
846            }
847            State::Add => {
848                adds.last_mut().unwrap().extend_from_slice(line);
849            }
850            State::Unknown => {
851                // Doesn't look like a valid conflict
852                return Merge::resolved(BString::new(vec![]));
853            }
854        }
855    }
856
857    if adds.len() == removes.len() + 1 {
858        Merge::from_removes_adds(removes, adds)
859    } else {
860        // Doesn't look like a valid conflict
861        Merge::resolved(BString::new(vec![]))
862    }
863}
864
865fn parse_git_style_conflict_hunk(input: &[u8], expected_marker_len: usize) -> Merge<BString> {
866    #[derive(PartialEq, Eq)]
867    enum State {
868        Left,
869        Base,
870        Right,
871    }
872    let mut state = State::Left;
873    let mut left = BString::new(vec![]);
874    let mut base = BString::new(vec![]);
875    let mut right = BString::new(vec![]);
876    for line in input.lines_with_terminator() {
877        match parse_conflict_marker(line, expected_marker_len) {
878            Some(ConflictMarkerLineChar::GitAncestor) => {
879                if state == State::Left {
880                    state = State::Base;
881                    continue;
882                } else {
883                    // Base must come after left
884                    return Merge::resolved(BString::new(vec![]));
885                }
886            }
887            Some(ConflictMarkerLineChar::GitSeparator) => {
888                if state == State::Base {
889                    state = State::Right;
890                    continue;
891                } else {
892                    // Right must come after base
893                    return Merge::resolved(BString::new(vec![]));
894                }
895            }
896            _ => {}
897        }
898        match state {
899            State::Left => left.extend_from_slice(line),
900            State::Base => base.extend_from_slice(line),
901            State::Right => right.extend_from_slice(line),
902        }
903    }
904
905    if state == State::Right {
906        Merge::from_vec(vec![left, base, right])
907    } else {
908        // Doesn't look like a valid conflict
909        Merge::resolved(BString::new(vec![]))
910    }
911}
912
913/// Parses conflict markers in `content` and returns an updated version of
914/// `file_ids` with the new contents. If no (valid) conflict markers remain, a
915/// single resolves `FileId` will be returned.
916pub async fn update_from_content(
917    file_ids: &Merge<Option<FileId>>,
918    store: &Store,
919    path: &RepoPath,
920    content: &[u8],
921    conflict_marker_style: ConflictMarkerStyle,
922    conflict_marker_len: usize,
923) -> BackendResult<Merge<Option<FileId>>> {
924    let simplified_file_ids = file_ids.clone().simplify();
925
926    // First check if the new content is unchanged compared to the old content. If
927    // it is, we don't need parse the content or write any new objects to the
928    // store. This is also a way of making sure that unchanged tree/file
929    // conflicts (for example) are not converted to regular files in the working
930    // copy.
931    let mut old_content = Vec::with_capacity(content.len());
932    let merge_hunk = extract_as_single_hunk(&simplified_file_ids, store, path).await?;
933    materialize_merge_result_with_marker_len(
934        &merge_hunk,
935        conflict_marker_style,
936        conflict_marker_len,
937        &mut old_content,
938    )
939    .unwrap();
940    if content == old_content {
941        return Ok(file_ids.clone());
942    }
943
944    // Parse conflicts from the new content using the arity of the simplified
945    // conflicts.
946    let Some(mut hunks) = parse_conflict(
947        content,
948        simplified_file_ids.num_sides(),
949        conflict_marker_len,
950    ) else {
951        // Either there are no markers or they don't have the expected arity
952        let file_id = store.write_file(path, &mut &content[..]).await?;
953        return Ok(Merge::normal(file_id));
954    };
955
956    // If there is a conflict at the end of the file and a term ends with a newline,
957    // check whether the original term ended with a newline. If it didn't, then
958    // remove the newline since it was added automatically when materializing.
959    if let Some(last_hunk) = hunks.last_mut().filter(|hunk| !hunk.is_resolved()) {
960        for (original_content, term) in merge_hunk.iter().zip_eq(last_hunk.iter_mut()) {
961            if term.last() == Some(&b'\n') && has_no_eol(original_content) {
962                term.pop();
963            }
964        }
965    }
966
967    let mut contents = simplified_file_ids.map(|_| vec![]);
968    for hunk in hunks {
969        if let Some(slice) = hunk.as_resolved() {
970            for content in contents.iter_mut() {
971                content.extend_from_slice(slice);
972            }
973        } else {
974            for (content, slice) in zip(contents.iter_mut(), hunk.into_iter()) {
975                content.extend(Vec::from(slice));
976            }
977        }
978    }
979
980    // If the user edited the empty placeholder for an absent side, we consider the
981    // conflict resolved.
982    if zip(contents.iter(), simplified_file_ids.iter())
983        .any(|(content, file_id)| file_id.is_none() && !content.is_empty())
984    {
985        let file_id = store.write_file(path, &mut &content[..]).await?;
986        return Ok(Merge::normal(file_id));
987    }
988
989    // Now write the new files contents we found by parsing the file with conflict
990    // markers.
991    // TODO: Write these concurrently
992    let new_file_ids: Vec<Option<FileId>> = zip(contents.iter(), simplified_file_ids.iter())
993        .map(|(content, file_id)| -> BackendResult<Option<FileId>> {
994            match file_id {
995                Some(_) => {
996                    let file_id = store.write_file(path, &mut content.as_slice()).block_on()?;
997                    Ok(Some(file_id))
998                }
999                None => {
1000                    // The missing side of a conflict is still represented by
1001                    // the empty string we materialized it as
1002                    Ok(None)
1003                }
1004            }
1005        })
1006        .try_collect()?;
1007
1008    // If the conflict was simplified, expand the conflict to the original
1009    // number of sides.
1010    let new_file_ids = if new_file_ids.len() != file_ids.iter().len() {
1011        file_ids
1012            .clone()
1013            .update_from_simplified(Merge::from_vec(new_file_ids))
1014    } else {
1015        Merge::from_vec(new_file_ids)
1016    };
1017    Ok(new_file_ids)
1018}