jj_lib/
conflicts.rs

1// Copyright 2020 The Jujutsu Authors
2//
3// Licensed under the Apache License, Version 2.0 (the "License");
4// you may not use this file except in compliance with the License.
5// You may obtain a copy of the License at
6//
7// https://www.apache.org/licenses/LICENSE-2.0
8//
9// Unless required by applicable law or agreed to in writing, software
10// distributed under the License is distributed on an "AS IS" BASIS,
11// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12// See the License for the specific language governing permissions and
13// limitations under the License.
14
15#![allow(missing_docs)]
16
17use std::io;
18use std::io::Read;
19use std::io::Write;
20use std::iter::zip;
21
22use bstr::BString;
23use bstr::ByteSlice;
24use futures::stream::BoxStream;
25use futures::try_join;
26use futures::Stream;
27use futures::StreamExt;
28use futures::TryStreamExt;
29use itertools::Itertools;
30use pollster::FutureExt;
31
32use crate::backend::BackendError;
33use crate::backend::BackendResult;
34use crate::backend::CommitId;
35use crate::backend::FileId;
36use crate::backend::SymlinkId;
37use crate::backend::TreeId;
38use crate::backend::TreeValue;
39use crate::copies::CopiesTreeDiffEntry;
40use crate::copies::CopiesTreeDiffEntryPath;
41use crate::diff::Diff;
42use crate::diff::DiffHunk;
43use crate::diff::DiffHunkKind;
44use crate::files;
45use crate::files::MergeResult;
46use crate::merge::Merge;
47use crate::merge::MergeBuilder;
48use crate::merge::MergedTreeValue;
49use crate::repo_path::RepoPath;
50use crate::store::Store;
51
52/// Minimum length of conflict markers.
53pub const MIN_CONFLICT_MARKER_LEN: usize = 7;
54
55/// If a file already contains lines which look like conflict markers of length
56/// N, then the conflict markers we add will be of length (N + increment). This
57/// number is chosen to make the conflict markers noticeably longer than the
58/// existing markers.
59const CONFLICT_MARKER_LEN_INCREMENT: usize = 4;
60
61/// Comment for missing terminating newline in a term of a conflict.
62const NO_EOL_COMMENT: &str = " (no terminating newline)";
63
64/// Comment for missing terminating newline in the "add" side of a diff.
65const ADD_NO_EOL_COMMENT: &str = " (removes terminating newline)";
66
67/// Comment for missing terminating newline in the "remove" side of a diff.
68const REMOVE_NO_EOL_COMMENT: &str = " (adds terminating newline)";
69
70fn write_diff_hunks(hunks: &[DiffHunk], file: &mut dyn Write) -> io::Result<()> {
71    for hunk in hunks {
72        match hunk.kind {
73            DiffHunkKind::Matching => {
74                debug_assert!(hunk.contents.iter().all_equal());
75                for line in hunk.contents[0].lines_with_terminator() {
76                    file.write_all(b" ")?;
77                    write_and_ensure_newline(file, line)?;
78                }
79            }
80            DiffHunkKind::Different => {
81                for line in hunk.contents[0].lines_with_terminator() {
82                    file.write_all(b"-")?;
83                    write_and_ensure_newline(file, line)?;
84                }
85                for line in hunk.contents[1].lines_with_terminator() {
86                    file.write_all(b"+")?;
87                    write_and_ensure_newline(file, line)?;
88                }
89            }
90        }
91    }
92    Ok(())
93}
94
95async fn get_file_contents(
96    store: &Store,
97    path: &RepoPath,
98    term: &Option<FileId>,
99) -> BackendResult<BString> {
100    match term {
101        Some(id) => {
102            let mut content = vec![];
103            store
104                .read_file_async(path, id)
105                .await?
106                .read_to_end(&mut content)
107                .map_err(|err| BackendError::ReadFile {
108                    path: path.to_owned(),
109                    id: id.clone(),
110                    source: err.into(),
111                })?;
112            Ok(BString::new(content))
113        }
114        // If the conflict had removed the file on one side, we pretend that the file
115        // was empty there.
116        None => Ok(BString::new(vec![])),
117    }
118}
119
120pub async fn extract_as_single_hunk(
121    merge: &Merge<Option<FileId>>,
122    store: &Store,
123    path: &RepoPath,
124) -> BackendResult<Merge<BString>> {
125    let builder: MergeBuilder<BString> = futures::stream::iter(merge.iter())
126        .then(|term| get_file_contents(store, path, term))
127        .try_collect()
128        .await?;
129    Ok(builder.build())
130}
131
132/// A type similar to `MergedTreeValue` but with associated data to include in
133/// e.g. the working copy or in a diff.
134pub enum MaterializedTreeValue {
135    Absent,
136    AccessDenied(Box<dyn std::error::Error + Send + Sync>),
137    File {
138        id: FileId,
139        executable: bool,
140        reader: Box<dyn Read>,
141    },
142    Symlink {
143        id: SymlinkId,
144        target: String,
145    },
146    FileConflict {
147        id: Merge<Option<FileId>>,
148        // TODO: or Vec<(FileId, Box<dyn Read>)> so that caller can stop reading
149        // when null bytes found?
150        contents: Merge<BString>,
151        executable: bool,
152    },
153    OtherConflict {
154        id: MergedTreeValue,
155    },
156    GitSubmodule(CommitId),
157    Tree(TreeId),
158}
159
160impl MaterializedTreeValue {
161    pub fn is_absent(&self) -> bool {
162        matches!(self, MaterializedTreeValue::Absent)
163    }
164
165    pub fn is_present(&self) -> bool {
166        !self.is_absent()
167    }
168}
169
170/// Reads the data associated with a `MergedTreeValue` so it can be written to
171/// e.g. the working copy or diff.
172pub async fn materialize_tree_value(
173    store: &Store,
174    path: &RepoPath,
175    value: MergedTreeValue,
176) -> BackendResult<MaterializedTreeValue> {
177    match materialize_tree_value_no_access_denied(store, path, value).await {
178        Err(BackendError::ReadAccessDenied { source, .. }) => {
179            Ok(MaterializedTreeValue::AccessDenied(source))
180        }
181        result => result,
182    }
183}
184
185async fn materialize_tree_value_no_access_denied(
186    store: &Store,
187    path: &RepoPath,
188    value: MergedTreeValue,
189) -> BackendResult<MaterializedTreeValue> {
190    match value.into_resolved() {
191        Ok(None) => Ok(MaterializedTreeValue::Absent),
192        Ok(Some(TreeValue::File { id, executable })) => {
193            let reader = store.read_file_async(path, &id).await?;
194            Ok(MaterializedTreeValue::File {
195                id,
196                executable,
197                reader,
198            })
199        }
200        Ok(Some(TreeValue::Symlink(id))) => {
201            let target = store.read_symlink_async(path, &id).await?;
202            Ok(MaterializedTreeValue::Symlink { id, target })
203        }
204        Ok(Some(TreeValue::GitSubmodule(id))) => Ok(MaterializedTreeValue::GitSubmodule(id)),
205        Ok(Some(TreeValue::Tree(id))) => Ok(MaterializedTreeValue::Tree(id)),
206        Ok(Some(TreeValue::Conflict(_))) => {
207            panic!("cannot materialize legacy conflict object at path {path:?}");
208        }
209        Err(conflict) => {
210            let Some(file_merge) = conflict.to_file_merge() else {
211                return Ok(MaterializedTreeValue::OtherConflict { id: conflict });
212            };
213            let file_merge = file_merge.simplify();
214            let contents = extract_as_single_hunk(&file_merge, store, path).await?;
215            let executable = if let Some(merge) = conflict.to_executable_merge() {
216                merge.resolve_trivial().copied().unwrap_or_default()
217            } else {
218                false
219            };
220            Ok(MaterializedTreeValue::FileConflict {
221                id: file_merge,
222                contents,
223                executable,
224            })
225        }
226    }
227}
228
229/// Describes what style should be used when materializing conflicts.
230#[derive(Clone, Copy, PartialEq, Eq, Debug, Default, serde::Deserialize)]
231#[serde(rename_all = "kebab-case")]
232pub enum ConflictMarkerStyle {
233    /// Style which shows a snapshot and a series of diffs to apply.
234    #[default]
235    Diff,
236    /// Style which shows a snapshot for each base and side.
237    Snapshot,
238    /// Style which replicates Git's "diff3" style to support external tools.
239    Git,
240}
241
242/// Characters which can be repeated to form a conflict marker line when
243/// materializing and parsing conflicts.
244#[derive(Clone, Copy, PartialEq, Eq)]
245#[repr(u8)]
246enum ConflictMarkerLineChar {
247    ConflictStart = b'<',
248    ConflictEnd = b'>',
249    Add = b'+',
250    Remove = b'-',
251    Diff = b'%',
252    GitAncestor = b'|',
253    GitSeparator = b'=',
254}
255
256impl ConflictMarkerLineChar {
257    /// Get the ASCII byte used for this conflict marker.
258    fn to_byte(self) -> u8 {
259        self as u8
260    }
261
262    /// Parse a byte to see if it corresponds with any kind of conflict marker.
263    fn parse_byte(byte: u8) -> Option<Self> {
264        match byte {
265            b'<' => Some(Self::ConflictStart),
266            b'>' => Some(Self::ConflictEnd),
267            b'+' => Some(Self::Add),
268            b'-' => Some(Self::Remove),
269            b'%' => Some(Self::Diff),
270            b'|' => Some(Self::GitAncestor),
271            b'=' => Some(Self::GitSeparator),
272            _ => None,
273        }
274    }
275}
276
277/// Represents a conflict marker line parsed from the file. Conflict marker
278/// lines consist of a single ASCII character repeated for a certain length.
279struct ConflictMarkerLine {
280    kind: ConflictMarkerLineChar,
281    len: usize,
282}
283
284/// Write a conflict marker to an output file.
285fn write_conflict_marker(
286    output: &mut dyn Write,
287    kind: ConflictMarkerLineChar,
288    len: usize,
289    suffix_text: &str,
290) -> io::Result<()> {
291    let conflict_marker = BString::new(vec![kind.to_byte(); len]);
292
293    if suffix_text.is_empty() {
294        writeln!(output, "{conflict_marker}")
295    } else {
296        writeln!(output, "{conflict_marker} {suffix_text}")
297    }
298}
299
300/// Parse a conflict marker from a line of a file. The conflict marker may have
301/// any length (even less than MIN_CONFLICT_MARKER_LEN).
302fn parse_conflict_marker_any_len(line: &[u8]) -> Option<ConflictMarkerLine> {
303    let first_byte = *line.first()?;
304    let kind = ConflictMarkerLineChar::parse_byte(first_byte)?;
305    let len = line.iter().take_while(|&&b| b == first_byte).count();
306
307    if let Some(next_byte) = line.get(len) {
308        // If there is a character after the marker, it must be ASCII whitespace
309        if !next_byte.is_ascii_whitespace() {
310            return None;
311        }
312    }
313
314    Some(ConflictMarkerLine { kind, len })
315}
316
317/// Parse a conflict marker, expecting it to be at least a certain length. Any
318/// shorter conflict markers are ignored.
319fn parse_conflict_marker(line: &[u8], expected_len: usize) -> Option<ConflictMarkerLineChar> {
320    parse_conflict_marker_any_len(line)
321        .filter(|marker| marker.len >= expected_len)
322        .map(|marker| marker.kind)
323}
324
325/// Given a Merge of files, choose the conflict marker length to use when
326/// materializing conflicts.
327pub fn choose_materialized_conflict_marker_len<T: AsRef<[u8]>>(single_hunk: &Merge<T>) -> usize {
328    let max_existing_marker_len = single_hunk
329        .iter()
330        .flat_map(|file| file.as_ref().lines_with_terminator())
331        .filter_map(parse_conflict_marker_any_len)
332        .map(|marker| marker.len)
333        .max()
334        .unwrap_or_default();
335
336    max_existing_marker_len
337        .saturating_add(CONFLICT_MARKER_LEN_INCREMENT)
338        .max(MIN_CONFLICT_MARKER_LEN)
339}
340
341pub fn materialize_merge_result<T: AsRef<[u8]>>(
342    single_hunk: &Merge<T>,
343    conflict_marker_style: ConflictMarkerStyle,
344    output: &mut dyn Write,
345) -> io::Result<()> {
346    let merge_result = files::merge(single_hunk);
347    match &merge_result {
348        MergeResult::Resolved(content) => output.write_all(content),
349        MergeResult::Conflict(hunks) => {
350            let conflict_marker_len = choose_materialized_conflict_marker_len(single_hunk);
351            materialize_conflict_hunks(hunks, conflict_marker_style, conflict_marker_len, output)
352        }
353    }
354}
355
356pub fn materialize_merge_result_with_marker_len<T: AsRef<[u8]>>(
357    single_hunk: &Merge<T>,
358    conflict_marker_style: ConflictMarkerStyle,
359    conflict_marker_len: usize,
360    output: &mut dyn Write,
361) -> io::Result<()> {
362    let merge_result = files::merge(single_hunk);
363    match &merge_result {
364        MergeResult::Resolved(content) => output.write_all(content),
365        MergeResult::Conflict(hunks) => {
366            materialize_conflict_hunks(hunks, conflict_marker_style, conflict_marker_len, output)
367        }
368    }
369}
370
371pub fn materialize_merge_result_to_bytes<T: AsRef<[u8]>>(
372    single_hunk: &Merge<T>,
373    conflict_marker_style: ConflictMarkerStyle,
374) -> BString {
375    let merge_result = files::merge(single_hunk);
376    match merge_result {
377        MergeResult::Resolved(content) => content,
378        MergeResult::Conflict(hunks) => {
379            let conflict_marker_len = choose_materialized_conflict_marker_len(single_hunk);
380            let mut output = Vec::new();
381            materialize_conflict_hunks(
382                &hunks,
383                conflict_marker_style,
384                conflict_marker_len,
385                &mut output,
386            )
387            .expect("writing to an in-memory buffer should never fail");
388            output.into()
389        }
390    }
391}
392
393pub fn materialize_merge_result_to_bytes_with_marker_len<T: AsRef<[u8]>>(
394    single_hunk: &Merge<T>,
395    conflict_marker_style: ConflictMarkerStyle,
396    conflict_marker_len: usize,
397) -> BString {
398    let merge_result = files::merge(single_hunk);
399    match merge_result {
400        MergeResult::Resolved(content) => content,
401        MergeResult::Conflict(hunks) => {
402            let mut output = Vec::new();
403            materialize_conflict_hunks(
404                &hunks,
405                conflict_marker_style,
406                conflict_marker_len,
407                &mut output,
408            )
409            .expect("writing to an in-memory buffer should never fail");
410            output.into()
411        }
412    }
413}
414
415fn materialize_conflict_hunks(
416    hunks: &[Merge<BString>],
417    conflict_marker_style: ConflictMarkerStyle,
418    conflict_marker_len: usize,
419    output: &mut dyn Write,
420) -> io::Result<()> {
421    let num_conflicts = hunks
422        .iter()
423        .filter(|hunk| hunk.as_resolved().is_none())
424        .count();
425    let mut conflict_index = 0;
426    for hunk in hunks {
427        if let Some(content) = hunk.as_resolved() {
428            output.write_all(content)?;
429        } else {
430            conflict_index += 1;
431            let conflict_info = format!("Conflict {conflict_index} of {num_conflicts}");
432
433            match (conflict_marker_style, hunk.as_slice()) {
434                // 2-sided conflicts can use Git-style conflict markers
435                (ConflictMarkerStyle::Git, [left, base, right]) => {
436                    materialize_git_style_conflict(
437                        left,
438                        base,
439                        right,
440                        &conflict_info,
441                        conflict_marker_len,
442                        output,
443                    )?;
444                }
445                _ => {
446                    materialize_jj_style_conflict(
447                        hunk,
448                        &conflict_info,
449                        conflict_marker_style,
450                        conflict_marker_len,
451                        output,
452                    )?;
453                }
454            }
455        }
456    }
457    Ok(())
458}
459
460fn materialize_git_style_conflict(
461    left: &[u8],
462    base: &[u8],
463    right: &[u8],
464    conflict_info: &str,
465    conflict_marker_len: usize,
466    output: &mut dyn Write,
467) -> io::Result<()> {
468    write_conflict_marker(
469        output,
470        ConflictMarkerLineChar::ConflictStart,
471        conflict_marker_len,
472        &format!("Side #1 ({conflict_info})"),
473    )?;
474    write_and_ensure_newline(output, left)?;
475
476    write_conflict_marker(
477        output,
478        ConflictMarkerLineChar::GitAncestor,
479        conflict_marker_len,
480        "Base",
481    )?;
482    write_and_ensure_newline(output, base)?;
483
484    // VS Code doesn't seem to support any trailing text on the separator line
485    write_conflict_marker(
486        output,
487        ConflictMarkerLineChar::GitSeparator,
488        conflict_marker_len,
489        "",
490    )?;
491
492    write_and_ensure_newline(output, right)?;
493    write_conflict_marker(
494        output,
495        ConflictMarkerLineChar::ConflictEnd,
496        conflict_marker_len,
497        &format!("Side #2 ({conflict_info} ends)"),
498    )?;
499
500    Ok(())
501}
502
503fn materialize_jj_style_conflict(
504    hunk: &Merge<BString>,
505    conflict_info: &str,
506    conflict_marker_style: ConflictMarkerStyle,
507    conflict_marker_len: usize,
508    output: &mut dyn Write,
509) -> io::Result<()> {
510    // Write a positive snapshot (side) of a conflict
511    let write_side = |add_index: usize, data: &[u8], output: &mut dyn Write| {
512        write_conflict_marker(
513            output,
514            ConflictMarkerLineChar::Add,
515            conflict_marker_len,
516            &format!(
517                "Contents of side #{}{}",
518                add_index + 1,
519                maybe_no_eol_comment(data)
520            ),
521        )?;
522        write_and_ensure_newline(output, data)
523    };
524
525    // Write a negative snapshot (base) of a conflict
526    let write_base = |base_str: &str, data: &[u8], output: &mut dyn Write| {
527        write_conflict_marker(
528            output,
529            ConflictMarkerLineChar::Remove,
530            conflict_marker_len,
531            &format!("Contents of {base_str}{}", maybe_no_eol_comment(data)),
532        )?;
533        write_and_ensure_newline(output, data)
534    };
535
536    // Write a diff from a negative term to a positive term
537    let write_diff =
538        |base_str: &str, add_index: usize, diff: &[DiffHunk], output: &mut dyn Write| {
539            let no_eol_remove = diff
540                .last()
541                .is_some_and(|diff_hunk| has_no_eol(diff_hunk.contents[0]));
542            let no_eol_add = diff
543                .last()
544                .is_some_and(|diff_hunk| has_no_eol(diff_hunk.contents[1]));
545            let no_eol_comment = match (no_eol_remove, no_eol_add) {
546                (true, true) => NO_EOL_COMMENT,
547                (true, _) => REMOVE_NO_EOL_COMMENT,
548                (_, true) => ADD_NO_EOL_COMMENT,
549                _ => "",
550            };
551            write_conflict_marker(
552                output,
553                ConflictMarkerLineChar::Diff,
554                conflict_marker_len,
555                &format!(
556                    "Changes from {base_str} to side #{}{no_eol_comment}",
557                    add_index + 1
558                ),
559            )?;
560            write_diff_hunks(diff, output)
561        };
562
563    write_conflict_marker(
564        output,
565        ConflictMarkerLineChar::ConflictStart,
566        conflict_marker_len,
567        conflict_info,
568    )?;
569    let mut add_index = 0;
570    for (base_index, left) in hunk.removes().enumerate() {
571        // The vast majority of conflicts one actually tries to resolve manually have 1
572        // base.
573        let base_str = if hunk.removes().len() == 1 {
574            "base".to_string()
575        } else {
576            format!("base #{}", base_index + 1)
577        };
578
579        let Some(right1) = hunk.get_add(add_index) else {
580            // If we have no more positive terms, emit the remaining negative terms as
581            // snapshots.
582            write_base(&base_str, left, output)?;
583            continue;
584        };
585
586        // For any style other than "diff", always emit sides and bases separately
587        if conflict_marker_style != ConflictMarkerStyle::Diff {
588            write_side(add_index, right1, output)?;
589            write_base(&base_str, left, output)?;
590            add_index += 1;
591            continue;
592        }
593
594        let diff1 = Diff::by_line([&left, &right1]).hunks().collect_vec();
595        // Check if the diff against the next positive term is better. Since we want to
596        // preserve the order of the terms, we don't match against any later positive
597        // terms.
598        if let Some(right2) = hunk.get_add(add_index + 1) {
599            let diff2 = Diff::by_line([&left, &right2]).hunks().collect_vec();
600            if diff_size(&diff2) < diff_size(&diff1) {
601                // If the next positive term is a better match, emit the current positive term
602                // as a snapshot and the next positive term as a diff.
603                write_side(add_index, right1, output)?;
604                write_diff(&base_str, add_index + 1, &diff2, output)?;
605                add_index += 2;
606                continue;
607            }
608        }
609
610        write_diff(&base_str, add_index, &diff1, output)?;
611        add_index += 1;
612    }
613
614    // Emit the remaining positive terms as snapshots.
615    for (add_index, slice) in hunk.adds().enumerate().skip(add_index) {
616        write_side(add_index, slice, output)?;
617    }
618    write_conflict_marker(
619        output,
620        ConflictMarkerLineChar::ConflictEnd,
621        conflict_marker_len,
622        &format!("{conflict_info} ends"),
623    )?;
624    Ok(())
625}
626
627fn maybe_no_eol_comment(slice: &[u8]) -> &'static str {
628    if has_no_eol(slice) {
629        NO_EOL_COMMENT
630    } else {
631        ""
632    }
633}
634
635// Write a chunk of data, ensuring that it doesn't end with a line which is
636// missing its terminating newline.
637fn write_and_ensure_newline(output: &mut dyn Write, data: &[u8]) -> io::Result<()> {
638    output.write_all(data)?;
639    if has_no_eol(data) {
640        writeln!(output)?;
641    }
642    Ok(())
643}
644
645// Check whether a slice is missing its terminating newline character.
646fn has_no_eol(slice: &[u8]) -> bool {
647    slice.last().is_some_and(|&last| last != b'\n')
648}
649
650fn diff_size(hunks: &[DiffHunk]) -> usize {
651    hunks
652        .iter()
653        .map(|hunk| match hunk.kind {
654            DiffHunkKind::Matching => 0,
655            DiffHunkKind::Different => hunk.contents.iter().map(|content| content.len()).sum(),
656        })
657        .sum()
658}
659
660pub struct MaterializedTreeDiffEntry {
661    pub path: CopiesTreeDiffEntryPath,
662    pub values: BackendResult<(MaterializedTreeValue, MaterializedTreeValue)>,
663}
664
665pub fn materialized_diff_stream<'a>(
666    store: &'a Store,
667    tree_diff: BoxStream<'a, CopiesTreeDiffEntry>,
668) -> impl Stream<Item = MaterializedTreeDiffEntry> + 'a {
669    tree_diff
670        .map(|CopiesTreeDiffEntry { path, values }| async {
671            match values {
672                Err(err) => MaterializedTreeDiffEntry {
673                    path,
674                    values: Err(err),
675                },
676                Ok((before, after)) => {
677                    let before_future = materialize_tree_value(store, path.source(), before);
678                    let after_future = materialize_tree_value(store, path.target(), after);
679                    let values = try_join!(before_future, after_future);
680                    MaterializedTreeDiffEntry { path, values }
681                }
682            }
683        })
684        .buffered((store.concurrency() / 2).max(1))
685}
686
687/// Parses conflict markers from a slice.
688///
689/// Returns `None` if there were no valid conflict markers. The caller
690/// has to provide the expected number of merge sides (adds). Conflict
691/// markers that are otherwise valid will be considered invalid if
692/// they don't have the expected arity.
693///
694/// All conflict markers in the file must be at least as long as the expected
695/// length. Any shorter conflict markers will be ignored.
696// TODO: "parse" is not usually the opposite of "materialize", so maybe we
697// should rename them to "serialize" and "deserialize"?
698pub fn parse_conflict(
699    input: &[u8],
700    num_sides: usize,
701    expected_marker_len: usize,
702) -> Option<Vec<Merge<BString>>> {
703    if input.is_empty() {
704        return None;
705    }
706    let mut hunks = vec![];
707    let mut pos = 0;
708    let mut resolved_start = 0;
709    let mut conflict_start = None;
710    let mut conflict_start_len = 0;
711    for line in input.lines_with_terminator() {
712        match parse_conflict_marker(line, expected_marker_len) {
713            Some(ConflictMarkerLineChar::ConflictStart) => {
714                conflict_start = Some(pos);
715                conflict_start_len = line.len();
716            }
717            Some(ConflictMarkerLineChar::ConflictEnd) => {
718                if let Some(conflict_start_index) = conflict_start.take() {
719                    let conflict_body = &input[conflict_start_index + conflict_start_len..pos];
720                    let hunk = parse_conflict_hunk(conflict_body, expected_marker_len);
721                    if hunk.num_sides() == num_sides {
722                        let resolved_slice = &input[resolved_start..conflict_start_index];
723                        if !resolved_slice.is_empty() {
724                            hunks.push(Merge::resolved(BString::from(resolved_slice)));
725                        }
726                        hunks.push(hunk);
727                        resolved_start = pos + line.len();
728                    }
729                }
730            }
731            _ => {}
732        }
733        pos += line.len();
734    }
735
736    if hunks.is_empty() {
737        None
738    } else {
739        if resolved_start < input.len() {
740            hunks.push(Merge::resolved(BString::from(&input[resolved_start..])));
741        }
742        Some(hunks)
743    }
744}
745
746/// This method handles parsing both JJ-style and Git-style conflict markers,
747/// meaning that switching conflict marker styles won't prevent existing files
748/// with other conflict marker styles from being parsed successfully. The
749/// conflict marker style to use for parsing is determined based on the first
750/// line of the hunk.
751fn parse_conflict_hunk(input: &[u8], expected_marker_len: usize) -> Merge<BString> {
752    // If the hunk starts with a conflict marker, find its first character
753    let initial_conflict_marker = input
754        .lines_with_terminator()
755        .next()
756        .and_then(|line| parse_conflict_marker(line, expected_marker_len));
757
758    match initial_conflict_marker {
759        // JJ-style conflicts must start with one of these 3 conflict marker lines
760        Some(
761            ConflictMarkerLineChar::Diff
762            | ConflictMarkerLineChar::Remove
763            | ConflictMarkerLineChar::Add,
764        ) => parse_jj_style_conflict_hunk(input, expected_marker_len),
765        // Git-style conflicts either must not start with a conflict marker line, or must start with
766        // the "|||||||" conflict marker line (if the first side was empty)
767        None | Some(ConflictMarkerLineChar::GitAncestor) => {
768            parse_git_style_conflict_hunk(input, expected_marker_len)
769        }
770        // No other conflict markers are allowed at the start of a hunk
771        Some(_) => Merge::resolved(BString::new(vec![])),
772    }
773}
774
775fn parse_jj_style_conflict_hunk(input: &[u8], expected_marker_len: usize) -> Merge<BString> {
776    enum State {
777        Diff,
778        Remove,
779        Add,
780        Unknown,
781    }
782    let mut state = State::Unknown;
783    let mut removes = vec![];
784    let mut adds = vec![];
785    for line in input.lines_with_terminator() {
786        match parse_conflict_marker(line, expected_marker_len) {
787            Some(ConflictMarkerLineChar::Diff) => {
788                state = State::Diff;
789                removes.push(BString::new(vec![]));
790                adds.push(BString::new(vec![]));
791                continue;
792            }
793            Some(ConflictMarkerLineChar::Remove) => {
794                state = State::Remove;
795                removes.push(BString::new(vec![]));
796                continue;
797            }
798            Some(ConflictMarkerLineChar::Add) => {
799                state = State::Add;
800                adds.push(BString::new(vec![]));
801                continue;
802            }
803            _ => {}
804        }
805        match state {
806            State::Diff => {
807                if let Some(rest) = line.strip_prefix(b"-") {
808                    removes.last_mut().unwrap().extend_from_slice(rest);
809                } else if let Some(rest) = line.strip_prefix(b"+") {
810                    adds.last_mut().unwrap().extend_from_slice(rest);
811                } else if let Some(rest) = line.strip_prefix(b" ") {
812                    removes.last_mut().unwrap().extend_from_slice(rest);
813                    adds.last_mut().unwrap().extend_from_slice(rest);
814                } else if line == b"\n" || line == b"\r\n" {
815                    // Some editors strip trailing whitespace, so " \n" might become "\n". It would
816                    // be unfortunate if this prevented the conflict from being parsed, so we add
817                    // the empty line to the "remove" and "add" as if there was a space in front
818                    removes.last_mut().unwrap().extend_from_slice(line);
819                    adds.last_mut().unwrap().extend_from_slice(line);
820                } else {
821                    // Doesn't look like a valid conflict
822                    return Merge::resolved(BString::new(vec![]));
823                }
824            }
825            State::Remove => {
826                removes.last_mut().unwrap().extend_from_slice(line);
827            }
828            State::Add => {
829                adds.last_mut().unwrap().extend_from_slice(line);
830            }
831            State::Unknown => {
832                // Doesn't look like a valid conflict
833                return Merge::resolved(BString::new(vec![]));
834            }
835        }
836    }
837
838    if adds.len() == removes.len() + 1 {
839        Merge::from_removes_adds(removes, adds)
840    } else {
841        // Doesn't look like a valid conflict
842        Merge::resolved(BString::new(vec![]))
843    }
844}
845
846fn parse_git_style_conflict_hunk(input: &[u8], expected_marker_len: usize) -> Merge<BString> {
847    #[derive(PartialEq, Eq)]
848    enum State {
849        Left,
850        Base,
851        Right,
852    }
853    let mut state = State::Left;
854    let mut left = BString::new(vec![]);
855    let mut base = BString::new(vec![]);
856    let mut right = BString::new(vec![]);
857    for line in input.lines_with_terminator() {
858        match parse_conflict_marker(line, expected_marker_len) {
859            Some(ConflictMarkerLineChar::GitAncestor) => {
860                if state == State::Left {
861                    state = State::Base;
862                    continue;
863                } else {
864                    // Base must come after left
865                    return Merge::resolved(BString::new(vec![]));
866                }
867            }
868            Some(ConflictMarkerLineChar::GitSeparator) => {
869                if state == State::Base {
870                    state = State::Right;
871                    continue;
872                } else {
873                    // Right must come after base
874                    return Merge::resolved(BString::new(vec![]));
875                }
876            }
877            _ => {}
878        }
879        match state {
880            State::Left => left.extend_from_slice(line),
881            State::Base => base.extend_from_slice(line),
882            State::Right => right.extend_from_slice(line),
883        }
884    }
885
886    if state == State::Right {
887        Merge::from_vec(vec![left, base, right])
888    } else {
889        // Doesn't look like a valid conflict
890        Merge::resolved(BString::new(vec![]))
891    }
892}
893
894/// Parses conflict markers in `content` and returns an updated version of
895/// `file_ids` with the new contents. If no (valid) conflict markers remain, a
896/// single resolves `FileId` will be returned.
897pub async fn update_from_content(
898    file_ids: &Merge<Option<FileId>>,
899    store: &Store,
900    path: &RepoPath,
901    content: &[u8],
902    conflict_marker_style: ConflictMarkerStyle,
903    conflict_marker_len: usize,
904) -> BackendResult<Merge<Option<FileId>>> {
905    let simplified_file_ids = file_ids.clone().simplify();
906
907    // First check if the new content is unchanged compared to the old content. If
908    // it is, we don't need parse the content or write any new objects to the
909    // store. This is also a way of making sure that unchanged tree/file
910    // conflicts (for example) are not converted to regular files in the working
911    // copy.
912    let mut old_content = Vec::with_capacity(content.len());
913    let merge_hunk = extract_as_single_hunk(&simplified_file_ids, store, path).await?;
914    materialize_merge_result_with_marker_len(
915        &merge_hunk,
916        conflict_marker_style,
917        conflict_marker_len,
918        &mut old_content,
919    )
920    .unwrap();
921    if content == old_content {
922        return Ok(file_ids.clone());
923    }
924
925    // Parse conflicts from the new content using the arity of the simplified
926    // conflicts.
927    let Some(mut hunks) = parse_conflict(
928        content,
929        simplified_file_ids.num_sides(),
930        conflict_marker_len,
931    ) else {
932        // Either there are no markers or they don't have the expected arity
933        let file_id = store.write_file(path, &mut &content[..]).await?;
934        return Ok(Merge::normal(file_id));
935    };
936
937    // If there is a conflict at the end of the file and a term ends with a newline,
938    // check whether the original term ended with a newline. If it didn't, then
939    // remove the newline since it was added automatically when materializing.
940    if let Some(last_hunk) = hunks.last_mut().filter(|hunk| !hunk.is_resolved()) {
941        for (original_content, term) in merge_hunk.iter().zip_eq(last_hunk.iter_mut()) {
942            if term.last() == Some(&b'\n') && has_no_eol(original_content) {
943                term.pop();
944            }
945        }
946    }
947
948    let mut contents = simplified_file_ids.map(|_| vec![]);
949    for hunk in hunks {
950        if let Some(slice) = hunk.as_resolved() {
951            for content in contents.iter_mut() {
952                content.extend_from_slice(slice);
953            }
954        } else {
955            for (content, slice) in zip(contents.iter_mut(), hunk.into_iter()) {
956                content.extend(Vec::from(slice));
957            }
958        }
959    }
960
961    // If the user edited the empty placeholder for an absent side, we consider the
962    // conflict resolved.
963    if zip(contents.iter(), simplified_file_ids.iter())
964        .any(|(content, file_id)| file_id.is_none() && !content.is_empty())
965    {
966        let file_id = store.write_file(path, &mut &content[..]).await?;
967        return Ok(Merge::normal(file_id));
968    }
969
970    // Now write the new files contents we found by parsing the file with conflict
971    // markers.
972    // TODO: Write these concurrently
973    let new_file_ids: Vec<Option<FileId>> = zip(contents.iter(), simplified_file_ids.iter())
974        .map(|(content, file_id)| -> BackendResult<Option<FileId>> {
975            match file_id {
976                Some(_) => {
977                    let file_id = store.write_file(path, &mut content.as_slice()).block_on()?;
978                    Ok(Some(file_id))
979                }
980                None => {
981                    // The missing side of a conflict is still represented by
982                    // the empty string we materialized it as
983                    Ok(None)
984                }
985            }
986        })
987        .try_collect()?;
988
989    // If the conflict was simplified, expand the conflict to the original
990    // number of sides.
991    let new_file_ids = if new_file_ids.len() != file_ids.iter().len() {
992        file_ids
993            .clone()
994            .update_from_simplified(Merge::from_vec(new_file_ids))
995    } else {
996        Merge::from_vec(new_file_ids)
997    };
998    Ok(new_file_ids)
999}