jj_lib/
conflicts.rs

1// Copyright 2020 The Jujutsu Authors
2//
3// Licensed under the Apache License, Version 2.0 (the "License");
4// you may not use this file except in compliance with the License.
5// You may obtain a copy of the License at
6//
7// https://www.apache.org/licenses/LICENSE-2.0
8//
9// Unless required by applicable law or agreed to in writing, software
10// distributed under the License is distributed on an "AS IS" BASIS,
11// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12// See the License for the specific language governing permissions and
13// limitations under the License.
14
15#![expect(missing_docs)]
16
17use std::io;
18use std::io::Write;
19use std::iter::zip;
20use std::pin::Pin;
21
22use bstr::BString;
23use bstr::ByteSlice as _;
24use futures::Stream;
25use futures::StreamExt as _;
26use futures::stream::BoxStream;
27use futures::try_join;
28use itertools::Itertools as _;
29use pollster::FutureExt as _;
30use tokio::io::AsyncRead;
31use tokio::io::AsyncReadExt as _;
32
33use crate::backend::BackendError;
34use crate::backend::BackendResult;
35use crate::backend::CommitId;
36use crate::backend::CopyId;
37use crate::backend::FileId;
38use crate::backend::SymlinkId;
39use crate::backend::TreeId;
40use crate::backend::TreeValue;
41use crate::copies::CopiesTreeDiffEntry;
42use crate::copies::CopiesTreeDiffEntryPath;
43use crate::diff::ContentDiff;
44use crate::diff::DiffHunk;
45use crate::diff::DiffHunkKind;
46use crate::files;
47use crate::files::MergeResult;
48use crate::merge::Merge;
49use crate::merge::MergedTreeValue;
50use crate::merge::SameChange;
51use crate::repo_path::RepoPath;
52use crate::store::Store;
53use crate::tree_merge::MergeOptions;
54
55/// Minimum length of conflict markers.
56pub const MIN_CONFLICT_MARKER_LEN: usize = 7;
57
58/// If a file already contains lines which look like conflict markers of length
59/// N, then the conflict markers we add will be of length (N + increment). This
60/// number is chosen to make the conflict markers noticeably longer than the
61/// existing markers.
62const CONFLICT_MARKER_LEN_INCREMENT: usize = 4;
63
64/// Comment for missing terminating newline in a term of a conflict.
65const NO_EOL_COMMENT: &str = " (no terminating newline)";
66
67/// Comment for missing terminating newline in the "add" side of a diff.
68const ADD_NO_EOL_COMMENT: &str = " (removes terminating newline)";
69
70/// Comment for missing terminating newline in the "remove" side of a diff.
71const REMOVE_NO_EOL_COMMENT: &str = " (adds terminating newline)";
72
73fn write_diff_hunks(hunks: &[DiffHunk], file: &mut dyn Write) -> io::Result<()> {
74    for hunk in hunks {
75        match hunk.kind {
76            DiffHunkKind::Matching => {
77                debug_assert!(hunk.contents.iter().all_equal());
78                for line in hunk.contents[0].lines_with_terminator() {
79                    file.write_all(b" ")?;
80                    write_and_ensure_newline(file, line)?;
81                }
82            }
83            DiffHunkKind::Different => {
84                for line in hunk.contents[0].lines_with_terminator() {
85                    file.write_all(b"-")?;
86                    write_and_ensure_newline(file, line)?;
87                }
88                for line in hunk.contents[1].lines_with_terminator() {
89                    file.write_all(b"+")?;
90                    write_and_ensure_newline(file, line)?;
91                }
92            }
93        }
94    }
95    Ok(())
96}
97
98async fn get_file_contents(
99    store: &Store,
100    path: &RepoPath,
101    term: &Option<FileId>,
102) -> BackendResult<BString> {
103    match term {
104        Some(id) => {
105            let mut reader = store.read_file(path, id).await?;
106            let mut content = vec![];
107            reader
108                .read_to_end(&mut content)
109                .await
110                .map_err(|err| BackendError::ReadFile {
111                    path: path.to_owned(),
112                    id: id.clone(),
113                    source: err.into(),
114                })?;
115            Ok(BString::new(content))
116        }
117        // If the conflict had removed the file on one side, we pretend that the file
118        // was empty there.
119        None => Ok(BString::new(vec![])),
120    }
121}
122
123pub async fn extract_as_single_hunk(
124    merge: &Merge<Option<FileId>>,
125    store: &Store,
126    path: &RepoPath,
127) -> BackendResult<Merge<BString>> {
128    merge
129        .try_map_async(|term| get_file_contents(store, path, term))
130        .await
131}
132
133/// A type similar to `MergedTreeValue` but with associated data to include in
134/// e.g. the working copy or in a diff.
135pub enum MaterializedTreeValue {
136    Absent,
137    AccessDenied(Box<dyn std::error::Error + Send + Sync>),
138    File(MaterializedFileValue),
139    Symlink { id: SymlinkId, target: String },
140    FileConflict(MaterializedFileConflictValue),
141    OtherConflict { id: MergedTreeValue },
142    GitSubmodule(CommitId),
143    Tree(TreeId),
144}
145
146impl MaterializedTreeValue {
147    pub fn is_absent(&self) -> bool {
148        matches!(self, Self::Absent)
149    }
150
151    pub fn is_present(&self) -> bool {
152        !self.is_absent()
153    }
154}
155
156/// [`TreeValue::File`] with file content `reader`.
157pub struct MaterializedFileValue {
158    pub id: FileId,
159    pub executable: bool,
160    pub copy_id: CopyId,
161    pub reader: Pin<Box<dyn AsyncRead + Send>>,
162}
163
164impl MaterializedFileValue {
165    /// Reads file content until EOF. The provided `path` is used only for error
166    /// reporting purpose.
167    pub async fn read_all(&mut self, path: &RepoPath) -> BackendResult<Vec<u8>> {
168        let mut buf = Vec::new();
169        self.reader
170            .read_to_end(&mut buf)
171            .await
172            .map_err(|err| BackendError::ReadFile {
173                path: path.to_owned(),
174                id: self.id.clone(),
175                source: err.into(),
176            })?;
177        Ok(buf)
178    }
179}
180
181/// Conflicted [`TreeValue::File`]s with file contents.
182pub struct MaterializedFileConflictValue {
183    /// File ids which preserve the shape of the tree conflict, to be used with
184    /// [`Merge::update_from_simplified()`].
185    pub unsimplified_ids: Merge<Option<FileId>>,
186    /// Simplified file ids, in which redundant id pairs are dropped.
187    pub ids: Merge<Option<FileId>>,
188    /// File contents corresponding to the simplified `ids`.
189    // TODO: or Vec<(FileId, Box<dyn Read>)> so that caller can stop reading
190    // when null bytes found?
191    pub contents: Merge<BString>,
192    /// Merged executable bit. `None` if there are changes in both executable
193    /// bit and file absence.
194    pub executable: Option<bool>,
195    /// Merged copy id. `None` if no single value could be determined.
196    pub copy_id: Option<CopyId>,
197}
198
199/// Reads the data associated with a `MergedTreeValue` so it can be written to
200/// e.g. the working copy or diff.
201pub async fn materialize_tree_value(
202    store: &Store,
203    path: &RepoPath,
204    value: MergedTreeValue,
205) -> BackendResult<MaterializedTreeValue> {
206    match materialize_tree_value_no_access_denied(store, path, value).await {
207        Err(BackendError::ReadAccessDenied { source, .. }) => {
208            Ok(MaterializedTreeValue::AccessDenied(source))
209        }
210        result => result,
211    }
212}
213
214async fn materialize_tree_value_no_access_denied(
215    store: &Store,
216    path: &RepoPath,
217    value: MergedTreeValue,
218) -> BackendResult<MaterializedTreeValue> {
219    match value.into_resolved() {
220        Ok(None) => Ok(MaterializedTreeValue::Absent),
221        Ok(Some(TreeValue::File {
222            id,
223            executable,
224            copy_id,
225        })) => {
226            let reader = store.read_file(path, &id).await?;
227            Ok(MaterializedTreeValue::File(MaterializedFileValue {
228                id,
229                executable,
230                copy_id,
231                reader,
232            }))
233        }
234        Ok(Some(TreeValue::Symlink(id))) => {
235            let target = store.read_symlink(path, &id).await?;
236            Ok(MaterializedTreeValue::Symlink { id, target })
237        }
238        Ok(Some(TreeValue::GitSubmodule(id))) => Ok(MaterializedTreeValue::GitSubmodule(id)),
239        Ok(Some(TreeValue::Tree(id))) => Ok(MaterializedTreeValue::Tree(id)),
240        Err(conflict) => match try_materialize_file_conflict_value(store, path, &conflict).await? {
241            Some(file) => Ok(MaterializedTreeValue::FileConflict(file)),
242            None => Ok(MaterializedTreeValue::OtherConflict { id: conflict }),
243        },
244    }
245}
246
247/// Suppose `conflict` contains only files or absent entries, reads the file
248/// contents.
249pub async fn try_materialize_file_conflict_value(
250    store: &Store,
251    path: &RepoPath,
252    conflict: &MergedTreeValue,
253) -> BackendResult<Option<MaterializedFileConflictValue>> {
254    let (Some(unsimplified_ids), Some(executable_bits)) =
255        (conflict.to_file_merge(), conflict.to_executable_merge())
256    else {
257        return Ok(None);
258    };
259    let ids = unsimplified_ids.simplify();
260    let contents = extract_as_single_hunk(&ids, store, path).await?;
261    let executable = resolve_file_executable(&executable_bits);
262    Ok(Some(MaterializedFileConflictValue {
263        unsimplified_ids,
264        ids,
265        contents,
266        executable,
267        copy_id: Some(CopyId::placeholder()),
268    }))
269}
270
271/// Resolves conflicts in file executable bit, returns the original state if the
272/// file is deleted and executable bit is unchanged.
273pub fn resolve_file_executable(merge: &Merge<Option<bool>>) -> Option<bool> {
274    let resolved = merge.resolve_trivial(SameChange::Accept).copied()?;
275    if resolved.is_some() {
276        resolved
277    } else {
278        // If the merge is resolved to None (absent), there should be the same
279        // number of Some(true) and Some(false). Pick the old state if
280        // unambiguous, so the new file inherits the original executable bit.
281        merge.removes().flatten().copied().all_equal_value().ok()
282    }
283}
284
285/// Describes what style should be used when materializing conflicts.
286#[derive(Clone, Copy, PartialEq, Eq, Debug, serde::Deserialize)]
287#[serde(rename_all = "kebab-case")]
288pub enum ConflictMarkerStyle {
289    /// Style which shows a snapshot and a series of diffs to apply.
290    Diff,
291    /// Style which shows a snapshot for each base and side.
292    Snapshot,
293    /// Style which replicates Git's "diff3" style to support external tools.
294    Git,
295}
296
297/// Options for conflict materialization.
298#[derive(Clone, Debug)]
299pub struct ConflictMaterializeOptions {
300    pub marker_style: ConflictMarkerStyle,
301    pub marker_len: Option<usize>,
302    pub merge: MergeOptions,
303}
304
305/// Characters which can be repeated to form a conflict marker line when
306/// materializing and parsing conflicts.
307#[derive(Clone, Copy, PartialEq, Eq)]
308#[repr(u8)]
309enum ConflictMarkerLineChar {
310    ConflictStart = b'<',
311    ConflictEnd = b'>',
312    Add = b'+',
313    Remove = b'-',
314    Diff = b'%',
315    GitAncestor = b'|',
316    GitSeparator = b'=',
317}
318
319impl ConflictMarkerLineChar {
320    /// Get the ASCII byte used for this conflict marker.
321    fn to_byte(self) -> u8 {
322        self as u8
323    }
324
325    /// Parse a byte to see if it corresponds with any kind of conflict marker.
326    fn parse_byte(byte: u8) -> Option<Self> {
327        match byte {
328            b'<' => Some(Self::ConflictStart),
329            b'>' => Some(Self::ConflictEnd),
330            b'+' => Some(Self::Add),
331            b'-' => Some(Self::Remove),
332            b'%' => Some(Self::Diff),
333            b'|' => Some(Self::GitAncestor),
334            b'=' => Some(Self::GitSeparator),
335            _ => None,
336        }
337    }
338}
339
340/// Represents a conflict marker line parsed from the file. Conflict marker
341/// lines consist of a single ASCII character repeated for a certain length.
342struct ConflictMarkerLine {
343    kind: ConflictMarkerLineChar,
344    len: usize,
345}
346
347/// Write a conflict marker to an output file.
348fn write_conflict_marker(
349    output: &mut dyn Write,
350    kind: ConflictMarkerLineChar,
351    len: usize,
352    suffix_text: &str,
353) -> io::Result<()> {
354    let conflict_marker = BString::new(vec![kind.to_byte(); len]);
355
356    if suffix_text.is_empty() {
357        writeln!(output, "{conflict_marker}")
358    } else {
359        writeln!(output, "{conflict_marker} {suffix_text}")
360    }
361}
362
363/// Parse a conflict marker from a line of a file. The conflict marker may have
364/// any length (even less than MIN_CONFLICT_MARKER_LEN).
365fn parse_conflict_marker_any_len(line: &[u8]) -> Option<ConflictMarkerLine> {
366    let first_byte = *line.first()?;
367    let kind = ConflictMarkerLineChar::parse_byte(first_byte)?;
368    let len = line.iter().take_while(|&&b| b == first_byte).count();
369
370    if let Some(next_byte) = line.get(len) {
371        // If there is a character after the marker, it must be ASCII whitespace
372        if !next_byte.is_ascii_whitespace() {
373            return None;
374        }
375    }
376
377    Some(ConflictMarkerLine { kind, len })
378}
379
380/// Parse a conflict marker, expecting it to be at least a certain length. Any
381/// shorter conflict markers are ignored.
382fn parse_conflict_marker(line: &[u8], expected_len: usize) -> Option<ConflictMarkerLineChar> {
383    parse_conflict_marker_any_len(line)
384        .filter(|marker| marker.len >= expected_len)
385        .map(|marker| marker.kind)
386}
387
388/// Given a Merge of files, choose the conflict marker length to use when
389/// materializing conflicts.
390pub fn choose_materialized_conflict_marker_len<T: AsRef<[u8]>>(single_hunk: &Merge<T>) -> usize {
391    let max_existing_marker_len = single_hunk
392        .iter()
393        .flat_map(|file| file.as_ref().lines_with_terminator())
394        .filter_map(parse_conflict_marker_any_len)
395        .map(|marker| marker.len)
396        .max()
397        .unwrap_or_default();
398
399    max_existing_marker_len
400        .saturating_add(CONFLICT_MARKER_LEN_INCREMENT)
401        .max(MIN_CONFLICT_MARKER_LEN)
402}
403
404pub fn materialize_merge_result<T: AsRef<[u8]>>(
405    single_hunk: &Merge<T>,
406    output: &mut dyn Write,
407    options: &ConflictMaterializeOptions,
408) -> io::Result<()> {
409    let merge_result = files::merge_hunks(single_hunk, &options.merge);
410    match &merge_result {
411        MergeResult::Resolved(content) => output.write_all(content),
412        MergeResult::Conflict(hunks) => {
413            let marker_len = options
414                .marker_len
415                .unwrap_or_else(|| choose_materialized_conflict_marker_len(single_hunk));
416            materialize_conflict_hunks(hunks, options.marker_style, marker_len, output)
417        }
418    }
419}
420
421pub fn materialize_merge_result_to_bytes<T: AsRef<[u8]>>(
422    single_hunk: &Merge<T>,
423    options: &ConflictMaterializeOptions,
424) -> BString {
425    let merge_result = files::merge_hunks(single_hunk, &options.merge);
426    match merge_result {
427        MergeResult::Resolved(content) => content,
428        MergeResult::Conflict(hunks) => {
429            let marker_len = options
430                .marker_len
431                .unwrap_or_else(|| choose_materialized_conflict_marker_len(single_hunk));
432            let mut output = Vec::new();
433            materialize_conflict_hunks(&hunks, options.marker_style, marker_len, &mut output)
434                .expect("writing to an in-memory buffer should never fail");
435            output.into()
436        }
437    }
438}
439
440fn materialize_conflict_hunks(
441    hunks: &[Merge<BString>],
442    conflict_marker_style: ConflictMarkerStyle,
443    conflict_marker_len: usize,
444    output: &mut dyn Write,
445) -> io::Result<()> {
446    let num_conflicts = hunks
447        .iter()
448        .filter(|hunk| hunk.as_resolved().is_none())
449        .count();
450    let mut conflict_index = 0;
451    for hunk in hunks {
452        if let Some(content) = hunk.as_resolved() {
453            output.write_all(content)?;
454        } else {
455            conflict_index += 1;
456            let conflict_info = format!("Conflict {conflict_index} of {num_conflicts}");
457
458            match (conflict_marker_style, hunk.as_slice()) {
459                // 2-sided conflicts can use Git-style conflict markers
460                (ConflictMarkerStyle::Git, [left, base, right]) => {
461                    materialize_git_style_conflict(
462                        left,
463                        base,
464                        right,
465                        &conflict_info,
466                        conflict_marker_len,
467                        output,
468                    )?;
469                }
470                _ => {
471                    materialize_jj_style_conflict(
472                        hunk,
473                        &conflict_info,
474                        conflict_marker_style,
475                        conflict_marker_len,
476                        output,
477                    )?;
478                }
479            }
480        }
481    }
482    Ok(())
483}
484
485fn materialize_git_style_conflict(
486    left: &[u8],
487    base: &[u8],
488    right: &[u8],
489    conflict_info: &str,
490    conflict_marker_len: usize,
491    output: &mut dyn Write,
492) -> io::Result<()> {
493    write_conflict_marker(
494        output,
495        ConflictMarkerLineChar::ConflictStart,
496        conflict_marker_len,
497        &format!("Side #1 ({conflict_info})"),
498    )?;
499    write_and_ensure_newline(output, left)?;
500
501    write_conflict_marker(
502        output,
503        ConflictMarkerLineChar::GitAncestor,
504        conflict_marker_len,
505        "Base",
506    )?;
507    write_and_ensure_newline(output, base)?;
508
509    // VS Code doesn't seem to support any trailing text on the separator line
510    write_conflict_marker(
511        output,
512        ConflictMarkerLineChar::GitSeparator,
513        conflict_marker_len,
514        "",
515    )?;
516
517    write_and_ensure_newline(output, right)?;
518    write_conflict_marker(
519        output,
520        ConflictMarkerLineChar::ConflictEnd,
521        conflict_marker_len,
522        &format!("Side #2 ({conflict_info} ends)"),
523    )?;
524
525    Ok(())
526}
527
528fn materialize_jj_style_conflict(
529    hunk: &Merge<BString>,
530    conflict_info: &str,
531    conflict_marker_style: ConflictMarkerStyle,
532    conflict_marker_len: usize,
533    output: &mut dyn Write,
534) -> io::Result<()> {
535    // Write a positive snapshot (side) of a conflict
536    let write_side = |add_index: usize, data: &[u8], output: &mut dyn Write| {
537        write_conflict_marker(
538            output,
539            ConflictMarkerLineChar::Add,
540            conflict_marker_len,
541            &format!(
542                "Contents of side #{}{}",
543                add_index + 1,
544                maybe_no_eol_comment(data)
545            ),
546        )?;
547        write_and_ensure_newline(output, data)
548    };
549
550    // Write a negative snapshot (base) of a conflict
551    let write_base = |base_str: &str, data: &[u8], output: &mut dyn Write| {
552        write_conflict_marker(
553            output,
554            ConflictMarkerLineChar::Remove,
555            conflict_marker_len,
556            &format!("Contents of {base_str}{}", maybe_no_eol_comment(data)),
557        )?;
558        write_and_ensure_newline(output, data)
559    };
560
561    // Write a diff from a negative term to a positive term
562    let write_diff =
563        |base_str: &str, add_index: usize, diff: &[DiffHunk], output: &mut dyn Write| {
564            let no_eol_remove = diff
565                .last()
566                .is_some_and(|diff_hunk| has_no_eol(diff_hunk.contents[0]));
567            let no_eol_add = diff
568                .last()
569                .is_some_and(|diff_hunk| has_no_eol(diff_hunk.contents[1]));
570            let no_eol_comment = match (no_eol_remove, no_eol_add) {
571                (true, true) => NO_EOL_COMMENT,
572                (true, _) => REMOVE_NO_EOL_COMMENT,
573                (_, true) => ADD_NO_EOL_COMMENT,
574                _ => "",
575            };
576            write_conflict_marker(
577                output,
578                ConflictMarkerLineChar::Diff,
579                conflict_marker_len,
580                &format!(
581                    "Changes from {base_str} to side #{}{no_eol_comment}",
582                    add_index + 1
583                ),
584            )?;
585            write_diff_hunks(diff, output)
586        };
587
588    write_conflict_marker(
589        output,
590        ConflictMarkerLineChar::ConflictStart,
591        conflict_marker_len,
592        conflict_info,
593    )?;
594    let mut add_index = 0;
595    for (base_index, left) in hunk.removes().enumerate() {
596        // The vast majority of conflicts one actually tries to resolve manually have 1
597        // base.
598        let base_str = if hunk.removes().len() == 1 {
599            "base".to_string()
600        } else {
601            format!("base #{}", base_index + 1)
602        };
603
604        let Some(right1) = hunk.get_add(add_index) else {
605            // If we have no more positive terms, emit the remaining negative terms as
606            // snapshots.
607            write_base(&base_str, left, output)?;
608            continue;
609        };
610
611        // For any style other than "diff", always emit sides and bases separately
612        if conflict_marker_style != ConflictMarkerStyle::Diff {
613            write_side(add_index, right1, output)?;
614            write_base(&base_str, left, output)?;
615            add_index += 1;
616            continue;
617        }
618
619        let diff1 = ContentDiff::by_line([&left, &right1]).hunks().collect_vec();
620        // Check if the diff against the next positive term is better. Since we want to
621        // preserve the order of the terms, we don't match against any later positive
622        // terms.
623        if let Some(right2) = hunk.get_add(add_index + 1) {
624            let diff2 = ContentDiff::by_line([&left, &right2]).hunks().collect_vec();
625            if diff_size(&diff2) < diff_size(&diff1) {
626                // If the next positive term is a better match, emit the current positive term
627                // as a snapshot and the next positive term as a diff.
628                write_side(add_index, right1, output)?;
629                write_diff(&base_str, add_index + 1, &diff2, output)?;
630                add_index += 2;
631                continue;
632            }
633        }
634
635        write_diff(&base_str, add_index, &diff1, output)?;
636        add_index += 1;
637    }
638
639    // Emit the remaining positive terms as snapshots.
640    for (add_index, slice) in hunk.adds().enumerate().skip(add_index) {
641        write_side(add_index, slice, output)?;
642    }
643    write_conflict_marker(
644        output,
645        ConflictMarkerLineChar::ConflictEnd,
646        conflict_marker_len,
647        &format!("{conflict_info} ends"),
648    )?;
649    Ok(())
650}
651
652fn maybe_no_eol_comment(slice: &[u8]) -> &'static str {
653    if has_no_eol(slice) {
654        NO_EOL_COMMENT
655    } else {
656        ""
657    }
658}
659
660// Write a chunk of data, ensuring that it doesn't end with a line which is
661// missing its terminating newline.
662fn write_and_ensure_newline(output: &mut dyn Write, data: &[u8]) -> io::Result<()> {
663    output.write_all(data)?;
664    if has_no_eol(data) {
665        writeln!(output)?;
666    }
667    Ok(())
668}
669
670// Check whether a slice is missing its terminating newline character.
671fn has_no_eol(slice: &[u8]) -> bool {
672    slice.last().is_some_and(|&last| last != b'\n')
673}
674
675fn diff_size(hunks: &[DiffHunk]) -> usize {
676    hunks
677        .iter()
678        .map(|hunk| match hunk.kind {
679            DiffHunkKind::Matching => 0,
680            DiffHunkKind::Different => hunk.contents.iter().map(|content| content.len()).sum(),
681        })
682        .sum()
683}
684
685pub struct MaterializedTreeDiffEntry {
686    pub path: CopiesTreeDiffEntryPath,
687    pub values: BackendResult<(MaterializedTreeValue, MaterializedTreeValue)>,
688}
689
690pub fn materialized_diff_stream<'a>(
691    store: &'a Store,
692    tree_diff: BoxStream<'a, CopiesTreeDiffEntry>,
693) -> impl Stream<Item = MaterializedTreeDiffEntry> + use<'a> {
694    tree_diff
695        .map(async |CopiesTreeDiffEntry { path, values }| match values {
696            Err(err) => MaterializedTreeDiffEntry {
697                path,
698                values: Err(err),
699            },
700            Ok(values) => {
701                let before_future = materialize_tree_value(store, path.source(), values.before);
702                let after_future = materialize_tree_value(store, path.target(), values.after);
703                let values = try_join!(before_future, after_future);
704                MaterializedTreeDiffEntry { path, values }
705            }
706        })
707        .buffered((store.concurrency() / 2).max(1))
708}
709
710/// Parses conflict markers from a slice.
711///
712/// Returns `None` if there were no valid conflict markers. The caller
713/// has to provide the expected number of merge sides (adds). Conflict
714/// markers that are otherwise valid will be considered invalid if
715/// they don't have the expected arity.
716///
717/// All conflict markers in the file must be at least as long as the expected
718/// length. Any shorter conflict markers will be ignored.
719// TODO: "parse" is not usually the opposite of "materialize", so maybe we
720// should rename them to "serialize" and "deserialize"?
721pub fn parse_conflict(
722    input: &[u8],
723    num_sides: usize,
724    expected_marker_len: usize,
725) -> Option<Vec<Merge<BString>>> {
726    if input.is_empty() {
727        return None;
728    }
729    let mut hunks = vec![];
730    let mut pos = 0;
731    let mut resolved_start = 0;
732    let mut conflict_start = None;
733    let mut conflict_start_len = 0;
734    for line in input.lines_with_terminator() {
735        match parse_conflict_marker(line, expected_marker_len) {
736            Some(ConflictMarkerLineChar::ConflictStart) => {
737                conflict_start = Some(pos);
738                conflict_start_len = line.len();
739            }
740            Some(ConflictMarkerLineChar::ConflictEnd) => {
741                if let Some(conflict_start_index) = conflict_start.take() {
742                    let conflict_body = &input[conflict_start_index + conflict_start_len..pos];
743                    let hunk = parse_conflict_hunk(conflict_body, expected_marker_len);
744                    if hunk.num_sides() == num_sides {
745                        let resolved_slice = &input[resolved_start..conflict_start_index];
746                        if !resolved_slice.is_empty() {
747                            hunks.push(Merge::resolved(BString::from(resolved_slice)));
748                        }
749                        hunks.push(hunk);
750                        resolved_start = pos + line.len();
751                    }
752                }
753            }
754            _ => {}
755        }
756        pos += line.len();
757    }
758
759    if hunks.is_empty() {
760        None
761    } else {
762        if resolved_start < input.len() {
763            hunks.push(Merge::resolved(BString::from(&input[resolved_start..])));
764        }
765        Some(hunks)
766    }
767}
768
769/// This method handles parsing both JJ-style and Git-style conflict markers,
770/// meaning that switching conflict marker styles won't prevent existing files
771/// with other conflict marker styles from being parsed successfully. The
772/// conflict marker style to use for parsing is determined based on the first
773/// line of the hunk.
774fn parse_conflict_hunk(input: &[u8], expected_marker_len: usize) -> Merge<BString> {
775    // If the hunk starts with a conflict marker, find its first character
776    let initial_conflict_marker = input
777        .lines_with_terminator()
778        .next()
779        .and_then(|line| parse_conflict_marker(line, expected_marker_len));
780
781    match initial_conflict_marker {
782        // JJ-style conflicts must start with one of these 3 conflict marker lines
783        Some(
784            ConflictMarkerLineChar::Diff
785            | ConflictMarkerLineChar::Remove
786            | ConflictMarkerLineChar::Add,
787        ) => parse_jj_style_conflict_hunk(input, expected_marker_len),
788        // Git-style conflicts either must not start with a conflict marker line, or must start with
789        // the "|||||||" conflict marker line (if the first side was empty)
790        None | Some(ConflictMarkerLineChar::GitAncestor) => {
791            parse_git_style_conflict_hunk(input, expected_marker_len)
792        }
793        // No other conflict markers are allowed at the start of a hunk
794        Some(_) => Merge::resolved(BString::new(vec![])),
795    }
796}
797
798fn parse_jj_style_conflict_hunk(input: &[u8], expected_marker_len: usize) -> Merge<BString> {
799    enum State {
800        Diff,
801        Remove,
802        Add,
803        Unknown,
804    }
805    let mut state = State::Unknown;
806    let mut removes = vec![];
807    let mut adds = vec![];
808    for line in input.lines_with_terminator() {
809        match parse_conflict_marker(line, expected_marker_len) {
810            Some(ConflictMarkerLineChar::Diff) => {
811                state = State::Diff;
812                removes.push(BString::new(vec![]));
813                adds.push(BString::new(vec![]));
814                continue;
815            }
816            Some(ConflictMarkerLineChar::Remove) => {
817                state = State::Remove;
818                removes.push(BString::new(vec![]));
819                continue;
820            }
821            Some(ConflictMarkerLineChar::Add) => {
822                state = State::Add;
823                adds.push(BString::new(vec![]));
824                continue;
825            }
826            _ => {}
827        }
828        match state {
829            State::Diff => {
830                if let Some(rest) = line.strip_prefix(b"-") {
831                    removes.last_mut().unwrap().extend_from_slice(rest);
832                } else if let Some(rest) = line.strip_prefix(b"+") {
833                    adds.last_mut().unwrap().extend_from_slice(rest);
834                } else if let Some(rest) = line.strip_prefix(b" ") {
835                    removes.last_mut().unwrap().extend_from_slice(rest);
836                    adds.last_mut().unwrap().extend_from_slice(rest);
837                } else if line == b"\n" || line == b"\r\n" {
838                    // Some editors strip trailing whitespace, so " \n" might become "\n". It would
839                    // be unfortunate if this prevented the conflict from being parsed, so we add
840                    // the empty line to the "remove" and "add" as if there was a space in front
841                    removes.last_mut().unwrap().extend_from_slice(line);
842                    adds.last_mut().unwrap().extend_from_slice(line);
843                } else {
844                    // Doesn't look like a valid conflict
845                    return Merge::resolved(BString::new(vec![]));
846                }
847            }
848            State::Remove => {
849                removes.last_mut().unwrap().extend_from_slice(line);
850            }
851            State::Add => {
852                adds.last_mut().unwrap().extend_from_slice(line);
853            }
854            State::Unknown => {
855                // Doesn't look like a valid conflict
856                return Merge::resolved(BString::new(vec![]));
857            }
858        }
859    }
860
861    if adds.len() == removes.len() + 1 {
862        Merge::from_removes_adds(removes, adds)
863    } else {
864        // Doesn't look like a valid conflict
865        Merge::resolved(BString::new(vec![]))
866    }
867}
868
869fn parse_git_style_conflict_hunk(input: &[u8], expected_marker_len: usize) -> Merge<BString> {
870    #[derive(PartialEq, Eq)]
871    enum State {
872        Left,
873        Base,
874        Right,
875    }
876    let mut state = State::Left;
877    let mut left = BString::new(vec![]);
878    let mut base = BString::new(vec![]);
879    let mut right = BString::new(vec![]);
880    for line in input.lines_with_terminator() {
881        match parse_conflict_marker(line, expected_marker_len) {
882            Some(ConflictMarkerLineChar::GitAncestor) => {
883                if state == State::Left {
884                    state = State::Base;
885                    continue;
886                } else {
887                    // Base must come after left
888                    return Merge::resolved(BString::new(vec![]));
889                }
890            }
891            Some(ConflictMarkerLineChar::GitSeparator) => {
892                if state == State::Base {
893                    state = State::Right;
894                    continue;
895                } else {
896                    // Right must come after base
897                    return Merge::resolved(BString::new(vec![]));
898                }
899            }
900            _ => {}
901        }
902        match state {
903            State::Left => left.extend_from_slice(line),
904            State::Base => base.extend_from_slice(line),
905            State::Right => right.extend_from_slice(line),
906        }
907    }
908
909    if state == State::Right {
910        Merge::from_vec(vec![left, base, right])
911    } else {
912        // Doesn't look like a valid conflict
913        Merge::resolved(BString::new(vec![]))
914    }
915}
916
917/// Parses conflict markers in `content` and returns an updated version of
918/// `file_ids` with the new contents. If no (valid) conflict markers remain, a
919/// single resolves `FileId` will be returned.
920pub async fn update_from_content(
921    file_ids: &Merge<Option<FileId>>,
922    store: &Store,
923    path: &RepoPath,
924    content: &[u8],
925    conflict_marker_len: usize,
926) -> BackendResult<Merge<Option<FileId>>> {
927    let simplified_file_ids = file_ids.simplify();
928
929    let old_contents = extract_as_single_hunk(&simplified_file_ids, store, path).await?;
930    let old_hunks = files::merge_hunks(&old_contents, store.merge_options());
931
932    // Parse conflicts from the new content using the arity of the simplified
933    // conflicts.
934    let mut new_hunks = parse_conflict(
935        content,
936        simplified_file_ids.num_sides(),
937        conflict_marker_len,
938    );
939
940    // If there is a conflict at the end of the file and a term ends with a newline,
941    // check whether the original term ended with a newline. If it didn't, then
942    // remove the newline since it was added automatically when materializing.
943    if let Some(last_hunk) = new_hunks
944        .as_mut()
945        .and_then(|hunks| hunks.last_mut())
946        .filter(|hunk| !hunk.is_resolved())
947    {
948        for (original_content, term) in old_contents.iter().zip_eq(last_hunk.iter_mut()) {
949            if term.last() == Some(&b'\n') && has_no_eol(original_content) {
950                term.pop();
951            }
952        }
953    }
954
955    // Check if the new hunks are unchanged. This makes sure that unchanged file
956    // conflicts aren't updated to partially-resolved contents.
957    let unchanged = match (&old_hunks, &new_hunks) {
958        (MergeResult::Resolved(old), None) => old == content,
959        (MergeResult::Conflict(old), Some(new)) => old == new,
960        (MergeResult::Resolved(_), Some(_)) | (MergeResult::Conflict(_), None) => false,
961    };
962    if unchanged {
963        return Ok(file_ids.clone());
964    }
965
966    let Some(hunks) = new_hunks else {
967        // Either there are no markers or they don't have the expected arity
968        let file_id = store.write_file(path, &mut &content[..]).await?;
969        return Ok(Merge::normal(file_id));
970    };
971
972    let mut contents = simplified_file_ids.map(|_| vec![]);
973    for hunk in hunks {
974        if let Some(slice) = hunk.as_resolved() {
975            for content in contents.iter_mut() {
976                content.extend_from_slice(slice);
977            }
978        } else {
979            for (content, slice) in zip(contents.iter_mut(), hunk.into_iter()) {
980                content.extend(Vec::from(slice));
981            }
982        }
983    }
984
985    // If the user edited the empty placeholder for an absent side, we consider the
986    // conflict resolved.
987    if zip(contents.iter(), simplified_file_ids.iter())
988        .any(|(content, file_id)| file_id.is_none() && !content.is_empty())
989    {
990        let file_id = store.write_file(path, &mut &content[..]).await?;
991        return Ok(Merge::normal(file_id));
992    }
993
994    // Now write the new files contents we found by parsing the file with conflict
995    // markers.
996    // TODO: Write these concurrently
997    let new_file_ids: Vec<Option<FileId>> = zip(contents.iter(), simplified_file_ids.iter())
998        .map(|(content, file_id)| -> BackendResult<Option<FileId>> {
999            match file_id {
1000                Some(_) => {
1001                    let file_id = store.write_file(path, &mut content.as_slice()).block_on()?;
1002                    Ok(Some(file_id))
1003                }
1004                None => {
1005                    // The missing side of a conflict is still represented by
1006                    // the empty string we materialized it as
1007                    Ok(None)
1008                }
1009            }
1010        })
1011        .try_collect()?;
1012
1013    // If the conflict was simplified, expand the conflict to the original
1014    // number of sides.
1015    let new_file_ids = if new_file_ids.len() != file_ids.iter().len() {
1016        file_ids
1017            .clone()
1018            .update_from_simplified(Merge::from_vec(new_file_ids))
1019    } else {
1020        Merge::from_vec(new_file_ids)
1021    };
1022    Ok(new_file_ids)
1023}
1024
1025#[cfg(test)]
1026mod tests {
1027    use super::*;
1028
1029    #[test]
1030    fn test_resolve_file_executable() {
1031        fn resolve<const N: usize>(values: [Option<bool>; N]) -> Option<bool> {
1032            resolve_file_executable(&Merge::from_vec(values.to_vec()))
1033        }
1034
1035        // already resolved
1036        assert_eq!(resolve([None]), None);
1037        assert_eq!(resolve([Some(false)]), Some(false));
1038        assert_eq!(resolve([Some(true)]), Some(true));
1039
1040        // trivially resolved
1041        assert_eq!(resolve([Some(true), Some(true), Some(true)]), Some(true));
1042        assert_eq!(resolve([Some(true), Some(false), Some(false)]), Some(true));
1043        assert_eq!(resolve([Some(false), Some(true), Some(false)]), Some(false));
1044        assert_eq!(resolve([None, None, Some(true)]), Some(true));
1045
1046        // unresolvable
1047        assert_eq!(resolve([Some(false), Some(true), None]), None);
1048
1049        // trivially resolved to absent, so pick the original state
1050        assert_eq!(resolve([Some(true), Some(true), None]), Some(true));
1051        assert_eq!(resolve([None, Some(false), Some(false)]), Some(false));
1052        assert_eq!(
1053            resolve([None, None, Some(true), Some(true), None]),
1054            Some(true)
1055        );
1056
1057        // trivially resolved to absent, and the original state is ambiguous
1058        assert_eq!(
1059            resolve([Some(true), Some(true), None, Some(false), Some(false)]),
1060            None
1061        );
1062        assert_eq!(
1063            resolve([
1064                None,
1065                Some(true),
1066                Some(true),
1067                Some(false),
1068                Some(false),
1069                Some(false),
1070                Some(false),
1071            ]),
1072            None
1073        );
1074    }
1075}