jj_lib/
conflicts.rs

1// Copyright 2020 The Jujutsu Authors
2//
3// Licensed under the Apache License, Version 2.0 (the "License");
4// you may not use this file except in compliance with the License.
5// You may obtain a copy of the License at
6//
7// https://www.apache.org/licenses/LICENSE-2.0
8//
9// Unless required by applicable law or agreed to in writing, software
10// distributed under the License is distributed on an "AS IS" BASIS,
11// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12// See the License for the specific language governing permissions and
13// limitations under the License.
14
15#![allow(missing_docs)]
16
17use std::io;
18use std::io::Write;
19use std::iter::zip;
20use std::pin::Pin;
21
22use bstr::BString;
23use bstr::ByteSlice as _;
24use futures::Stream;
25use futures::StreamExt as _;
26use futures::stream::BoxStream;
27use futures::try_join;
28use itertools::Itertools as _;
29use pollster::FutureExt as _;
30use tokio::io::AsyncRead;
31use tokio::io::AsyncReadExt as _;
32
33use crate::backend::BackendError;
34use crate::backend::BackendResult;
35use crate::backend::CommitId;
36use crate::backend::CopyId;
37use crate::backend::FileId;
38use crate::backend::SymlinkId;
39use crate::backend::TreeId;
40use crate::backend::TreeValue;
41use crate::copies::CopiesTreeDiffEntry;
42use crate::copies::CopiesTreeDiffEntryPath;
43use crate::diff::Diff;
44use crate::diff::DiffHunk;
45use crate::diff::DiffHunkKind;
46use crate::files;
47use crate::files::MergeResult;
48use crate::merge::Merge;
49use crate::merge::MergedTreeValue;
50use crate::repo_path::RepoPath;
51use crate::store::Store;
52
53/// Minimum length of conflict markers.
54pub const MIN_CONFLICT_MARKER_LEN: usize = 7;
55
56/// If a file already contains lines which look like conflict markers of length
57/// N, then the conflict markers we add will be of length (N + increment). This
58/// number is chosen to make the conflict markers noticeably longer than the
59/// existing markers.
60const CONFLICT_MARKER_LEN_INCREMENT: usize = 4;
61
62/// Comment for missing terminating newline in a term of a conflict.
63const NO_EOL_COMMENT: &str = " (no terminating newline)";
64
65/// Comment for missing terminating newline in the "add" side of a diff.
66const ADD_NO_EOL_COMMENT: &str = " (removes terminating newline)";
67
68/// Comment for missing terminating newline in the "remove" side of a diff.
69const REMOVE_NO_EOL_COMMENT: &str = " (adds terminating newline)";
70
71fn write_diff_hunks(hunks: &[DiffHunk], file: &mut dyn Write) -> io::Result<()> {
72    for hunk in hunks {
73        match hunk.kind {
74            DiffHunkKind::Matching => {
75                debug_assert!(hunk.contents.iter().all_equal());
76                for line in hunk.contents[0].lines_with_terminator() {
77                    file.write_all(b" ")?;
78                    write_and_ensure_newline(file, line)?;
79                }
80            }
81            DiffHunkKind::Different => {
82                for line in hunk.contents[0].lines_with_terminator() {
83                    file.write_all(b"-")?;
84                    write_and_ensure_newline(file, line)?;
85                }
86                for line in hunk.contents[1].lines_with_terminator() {
87                    file.write_all(b"+")?;
88                    write_and_ensure_newline(file, line)?;
89                }
90            }
91        }
92    }
93    Ok(())
94}
95
96async fn get_file_contents(
97    store: &Store,
98    path: &RepoPath,
99    term: &Option<FileId>,
100) -> BackendResult<BString> {
101    match term {
102        Some(id) => {
103            let mut reader = store.read_file(path, id).await?;
104            let mut content = vec![];
105            reader
106                .read_to_end(&mut content)
107                .await
108                .map_err(|err| BackendError::ReadFile {
109                    path: path.to_owned(),
110                    id: id.clone(),
111                    source: err.into(),
112                })?;
113            Ok(BString::new(content))
114        }
115        // If the conflict had removed the file on one side, we pretend that the file
116        // was empty there.
117        None => Ok(BString::new(vec![])),
118    }
119}
120
121pub async fn extract_as_single_hunk(
122    merge: &Merge<Option<FileId>>,
123    store: &Store,
124    path: &RepoPath,
125) -> BackendResult<Merge<BString>> {
126    merge
127        .try_map_async(|term| get_file_contents(store, path, term))
128        .await
129}
130
131/// A type similar to `MergedTreeValue` but with associated data to include in
132/// e.g. the working copy or in a diff.
133pub enum MaterializedTreeValue {
134    Absent,
135    AccessDenied(Box<dyn std::error::Error + Send + Sync>),
136    File(MaterializedFileValue),
137    Symlink { id: SymlinkId, target: String },
138    FileConflict(MaterializedFileConflictValue),
139    OtherConflict { id: MergedTreeValue },
140    GitSubmodule(CommitId),
141    Tree(TreeId),
142}
143
144impl MaterializedTreeValue {
145    pub fn is_absent(&self) -> bool {
146        matches!(self, Self::Absent)
147    }
148
149    pub fn is_present(&self) -> bool {
150        !self.is_absent()
151    }
152}
153
154/// [`TreeValue::File`] with file content `reader`.
155pub struct MaterializedFileValue {
156    pub id: FileId,
157    pub executable: bool,
158    pub copy_id: CopyId,
159    pub reader: Pin<Box<dyn AsyncRead + Send>>,
160}
161
162impl MaterializedFileValue {
163    /// Reads file content until EOF. The provided `path` is used only for error
164    /// reporting purpose.
165    pub async fn read_all(&mut self, path: &RepoPath) -> BackendResult<Vec<u8>> {
166        let mut buf = Vec::new();
167        self.reader
168            .read_to_end(&mut buf)
169            .await
170            .map_err(|err| BackendError::ReadFile {
171                path: path.to_owned(),
172                id: self.id.clone(),
173                source: err.into(),
174            })?;
175        Ok(buf)
176    }
177}
178
179/// Conflicted [`TreeValue::File`]s with file contents.
180pub struct MaterializedFileConflictValue {
181    /// File ids which preserve the shape of the tree conflict, to be used with
182    /// [`Merge::update_from_simplified()`].
183    pub unsimplified_ids: Merge<Option<FileId>>,
184    /// Simplified file ids, in which redundant id pairs are dropped.
185    pub ids: Merge<Option<FileId>>,
186    /// File contents corresponding to the simplified `ids`.
187    // TODO: or Vec<(FileId, Box<dyn Read>)> so that caller can stop reading
188    // when null bytes found?
189    pub contents: Merge<BString>,
190    /// Merged executable bit. `None` if there are changes in both executable
191    /// bit and file absence.
192    pub executable: Option<bool>,
193    /// Merged copy id. `None` if no single value could be determined.
194    pub copy_id: Option<CopyId>,
195}
196
197/// Reads the data associated with a `MergedTreeValue` so it can be written to
198/// e.g. the working copy or diff.
199pub async fn materialize_tree_value(
200    store: &Store,
201    path: &RepoPath,
202    value: MergedTreeValue,
203) -> BackendResult<MaterializedTreeValue> {
204    match materialize_tree_value_no_access_denied(store, path, value).await {
205        Err(BackendError::ReadAccessDenied { source, .. }) => {
206            Ok(MaterializedTreeValue::AccessDenied(source))
207        }
208        result => result,
209    }
210}
211
212async fn materialize_tree_value_no_access_denied(
213    store: &Store,
214    path: &RepoPath,
215    value: MergedTreeValue,
216) -> BackendResult<MaterializedTreeValue> {
217    match value.into_resolved() {
218        Ok(None) => Ok(MaterializedTreeValue::Absent),
219        Ok(Some(TreeValue::File {
220            id,
221            executable,
222            copy_id,
223        })) => {
224            let reader = store.read_file(path, &id).await?;
225            Ok(MaterializedTreeValue::File(MaterializedFileValue {
226                id,
227                executable,
228                copy_id,
229                reader,
230            }))
231        }
232        Ok(Some(TreeValue::Symlink(id))) => {
233            let target = store.read_symlink(path, &id).await?;
234            Ok(MaterializedTreeValue::Symlink { id, target })
235        }
236        Ok(Some(TreeValue::GitSubmodule(id))) => Ok(MaterializedTreeValue::GitSubmodule(id)),
237        Ok(Some(TreeValue::Tree(id))) => Ok(MaterializedTreeValue::Tree(id)),
238        Ok(Some(TreeValue::Conflict(_))) => {
239            panic!("cannot materialize legacy conflict object at path {path:?}");
240        }
241        Err(conflict) => match try_materialize_file_conflict_value(store, path, &conflict).await? {
242            Some(file) => Ok(MaterializedTreeValue::FileConflict(file)),
243            None => Ok(MaterializedTreeValue::OtherConflict { id: conflict }),
244        },
245    }
246}
247
248/// Suppose `conflict` contains only files or absent entries, reads the file
249/// contents.
250pub async fn try_materialize_file_conflict_value(
251    store: &Store,
252    path: &RepoPath,
253    conflict: &MergedTreeValue,
254) -> BackendResult<Option<MaterializedFileConflictValue>> {
255    let (Some(unsimplified_ids), Some(executable_bits)) =
256        (conflict.to_file_merge(), conflict.to_executable_merge())
257    else {
258        return Ok(None);
259    };
260    let ids = unsimplified_ids.simplify();
261    let contents = extract_as_single_hunk(&ids, store, path).await?;
262    let executable = resolve_file_executable(&executable_bits);
263    Ok(Some(MaterializedFileConflictValue {
264        unsimplified_ids,
265        ids,
266        contents,
267        executable,
268        copy_id: Some(CopyId::placeholder()),
269    }))
270}
271
272/// Resolves conflicts in file executable bit, returns the original state if the
273/// file is deleted and executable bit is unchanged.
274pub fn resolve_file_executable(merge: &Merge<Option<bool>>) -> Option<bool> {
275    let resolved = merge.resolve_trivial().copied()?;
276    if resolved.is_some() {
277        resolved
278    } else {
279        // If the merge is resolved to None (absent), there should be the same
280        // number of Some(true) and Some(false). Pick the old state if
281        // unambiguous, so the new file inherits the original executable bit.
282        merge.removes().flatten().copied().all_equal_value().ok()
283    }
284}
285
286/// Describes what style should be used when materializing conflicts.
287#[derive(Clone, Copy, PartialEq, Eq, Debug, Default, serde::Deserialize)]
288#[serde(rename_all = "kebab-case")]
289pub enum ConflictMarkerStyle {
290    /// Style which shows a snapshot and a series of diffs to apply.
291    #[default]
292    Diff,
293    /// Style which shows a snapshot for each base and side.
294    Snapshot,
295    /// Style which replicates Git's "diff3" style to support external tools.
296    Git,
297}
298
299/// Options for conflict materialization.
300#[derive(Clone, Debug, Default)]
301pub struct ConflictMaterializeOptions {
302    pub marker_style: ConflictMarkerStyle,
303    pub marker_len: Option<usize>,
304}
305
306/// Characters which can be repeated to form a conflict marker line when
307/// materializing and parsing conflicts.
308#[derive(Clone, Copy, PartialEq, Eq)]
309#[repr(u8)]
310enum ConflictMarkerLineChar {
311    ConflictStart = b'<',
312    ConflictEnd = b'>',
313    Add = b'+',
314    Remove = b'-',
315    Diff = b'%',
316    GitAncestor = b'|',
317    GitSeparator = b'=',
318}
319
320impl ConflictMarkerLineChar {
321    /// Get the ASCII byte used for this conflict marker.
322    fn to_byte(self) -> u8 {
323        self as u8
324    }
325
326    /// Parse a byte to see if it corresponds with any kind of conflict marker.
327    fn parse_byte(byte: u8) -> Option<Self> {
328        match byte {
329            b'<' => Some(Self::ConflictStart),
330            b'>' => Some(Self::ConflictEnd),
331            b'+' => Some(Self::Add),
332            b'-' => Some(Self::Remove),
333            b'%' => Some(Self::Diff),
334            b'|' => Some(Self::GitAncestor),
335            b'=' => Some(Self::GitSeparator),
336            _ => None,
337        }
338    }
339}
340
341/// Represents a conflict marker line parsed from the file. Conflict marker
342/// lines consist of a single ASCII character repeated for a certain length.
343struct ConflictMarkerLine {
344    kind: ConflictMarkerLineChar,
345    len: usize,
346}
347
348/// Write a conflict marker to an output file.
349fn write_conflict_marker(
350    output: &mut dyn Write,
351    kind: ConflictMarkerLineChar,
352    len: usize,
353    suffix_text: &str,
354) -> io::Result<()> {
355    let conflict_marker = BString::new(vec![kind.to_byte(); len]);
356
357    if suffix_text.is_empty() {
358        writeln!(output, "{conflict_marker}")
359    } else {
360        writeln!(output, "{conflict_marker} {suffix_text}")
361    }
362}
363
364/// Parse a conflict marker from a line of a file. The conflict marker may have
365/// any length (even less than MIN_CONFLICT_MARKER_LEN).
366fn parse_conflict_marker_any_len(line: &[u8]) -> Option<ConflictMarkerLine> {
367    let first_byte = *line.first()?;
368    let kind = ConflictMarkerLineChar::parse_byte(first_byte)?;
369    let len = line.iter().take_while(|&&b| b == first_byte).count();
370
371    if let Some(next_byte) = line.get(len) {
372        // If there is a character after the marker, it must be ASCII whitespace
373        if !next_byte.is_ascii_whitespace() {
374            return None;
375        }
376    }
377
378    Some(ConflictMarkerLine { kind, len })
379}
380
381/// Parse a conflict marker, expecting it to be at least a certain length. Any
382/// shorter conflict markers are ignored.
383fn parse_conflict_marker(line: &[u8], expected_len: usize) -> Option<ConflictMarkerLineChar> {
384    parse_conflict_marker_any_len(line)
385        .filter(|marker| marker.len >= expected_len)
386        .map(|marker| marker.kind)
387}
388
389/// Given a Merge of files, choose the conflict marker length to use when
390/// materializing conflicts.
391pub fn choose_materialized_conflict_marker_len<T: AsRef<[u8]>>(single_hunk: &Merge<T>) -> usize {
392    let max_existing_marker_len = single_hunk
393        .iter()
394        .flat_map(|file| file.as_ref().lines_with_terminator())
395        .filter_map(parse_conflict_marker_any_len)
396        .map(|marker| marker.len)
397        .max()
398        .unwrap_or_default();
399
400    max_existing_marker_len
401        .saturating_add(CONFLICT_MARKER_LEN_INCREMENT)
402        .max(MIN_CONFLICT_MARKER_LEN)
403}
404
405pub fn materialize_merge_result<T: AsRef<[u8]>>(
406    single_hunk: &Merge<T>,
407    output: &mut dyn Write,
408    options: &ConflictMaterializeOptions,
409) -> io::Result<()> {
410    let merge_result = files::merge_hunks(single_hunk);
411    match &merge_result {
412        MergeResult::Resolved(content) => output.write_all(content),
413        MergeResult::Conflict(hunks) => {
414            let marker_len = options
415                .marker_len
416                .unwrap_or_else(|| choose_materialized_conflict_marker_len(single_hunk));
417            materialize_conflict_hunks(hunks, options.marker_style, marker_len, output)
418        }
419    }
420}
421
422pub fn materialize_merge_result_to_bytes<T: AsRef<[u8]>>(
423    single_hunk: &Merge<T>,
424    options: &ConflictMaterializeOptions,
425) -> BString {
426    let merge_result = files::merge_hunks(single_hunk);
427    match merge_result {
428        MergeResult::Resolved(content) => content,
429        MergeResult::Conflict(hunks) => {
430            let marker_len = options
431                .marker_len
432                .unwrap_or_else(|| choose_materialized_conflict_marker_len(single_hunk));
433            let mut output = Vec::new();
434            materialize_conflict_hunks(&hunks, options.marker_style, marker_len, &mut output)
435                .expect("writing to an in-memory buffer should never fail");
436            output.into()
437        }
438    }
439}
440
441fn materialize_conflict_hunks(
442    hunks: &[Merge<BString>],
443    conflict_marker_style: ConflictMarkerStyle,
444    conflict_marker_len: usize,
445    output: &mut dyn Write,
446) -> io::Result<()> {
447    let num_conflicts = hunks
448        .iter()
449        .filter(|hunk| hunk.as_resolved().is_none())
450        .count();
451    let mut conflict_index = 0;
452    for hunk in hunks {
453        if let Some(content) = hunk.as_resolved() {
454            output.write_all(content)?;
455        } else {
456            conflict_index += 1;
457            let conflict_info = format!("Conflict {conflict_index} of {num_conflicts}");
458
459            match (conflict_marker_style, hunk.as_slice()) {
460                // 2-sided conflicts can use Git-style conflict markers
461                (ConflictMarkerStyle::Git, [left, base, right]) => {
462                    materialize_git_style_conflict(
463                        left,
464                        base,
465                        right,
466                        &conflict_info,
467                        conflict_marker_len,
468                        output,
469                    )?;
470                }
471                _ => {
472                    materialize_jj_style_conflict(
473                        hunk,
474                        &conflict_info,
475                        conflict_marker_style,
476                        conflict_marker_len,
477                        output,
478                    )?;
479                }
480            }
481        }
482    }
483    Ok(())
484}
485
486fn materialize_git_style_conflict(
487    left: &[u8],
488    base: &[u8],
489    right: &[u8],
490    conflict_info: &str,
491    conflict_marker_len: usize,
492    output: &mut dyn Write,
493) -> io::Result<()> {
494    write_conflict_marker(
495        output,
496        ConflictMarkerLineChar::ConflictStart,
497        conflict_marker_len,
498        &format!("Side #1 ({conflict_info})"),
499    )?;
500    write_and_ensure_newline(output, left)?;
501
502    write_conflict_marker(
503        output,
504        ConflictMarkerLineChar::GitAncestor,
505        conflict_marker_len,
506        "Base",
507    )?;
508    write_and_ensure_newline(output, base)?;
509
510    // VS Code doesn't seem to support any trailing text on the separator line
511    write_conflict_marker(
512        output,
513        ConflictMarkerLineChar::GitSeparator,
514        conflict_marker_len,
515        "",
516    )?;
517
518    write_and_ensure_newline(output, right)?;
519    write_conflict_marker(
520        output,
521        ConflictMarkerLineChar::ConflictEnd,
522        conflict_marker_len,
523        &format!("Side #2 ({conflict_info} ends)"),
524    )?;
525
526    Ok(())
527}
528
529fn materialize_jj_style_conflict(
530    hunk: &Merge<BString>,
531    conflict_info: &str,
532    conflict_marker_style: ConflictMarkerStyle,
533    conflict_marker_len: usize,
534    output: &mut dyn Write,
535) -> io::Result<()> {
536    // Write a positive snapshot (side) of a conflict
537    let write_side = |add_index: usize, data: &[u8], output: &mut dyn Write| {
538        write_conflict_marker(
539            output,
540            ConflictMarkerLineChar::Add,
541            conflict_marker_len,
542            &format!(
543                "Contents of side #{}{}",
544                add_index + 1,
545                maybe_no_eol_comment(data)
546            ),
547        )?;
548        write_and_ensure_newline(output, data)
549    };
550
551    // Write a negative snapshot (base) of a conflict
552    let write_base = |base_str: &str, data: &[u8], output: &mut dyn Write| {
553        write_conflict_marker(
554            output,
555            ConflictMarkerLineChar::Remove,
556            conflict_marker_len,
557            &format!("Contents of {base_str}{}", maybe_no_eol_comment(data)),
558        )?;
559        write_and_ensure_newline(output, data)
560    };
561
562    // Write a diff from a negative term to a positive term
563    let write_diff =
564        |base_str: &str, add_index: usize, diff: &[DiffHunk], output: &mut dyn Write| {
565            let no_eol_remove = diff
566                .last()
567                .is_some_and(|diff_hunk| has_no_eol(diff_hunk.contents[0]));
568            let no_eol_add = diff
569                .last()
570                .is_some_and(|diff_hunk| has_no_eol(diff_hunk.contents[1]));
571            let no_eol_comment = match (no_eol_remove, no_eol_add) {
572                (true, true) => NO_EOL_COMMENT,
573                (true, _) => REMOVE_NO_EOL_COMMENT,
574                (_, true) => ADD_NO_EOL_COMMENT,
575                _ => "",
576            };
577            write_conflict_marker(
578                output,
579                ConflictMarkerLineChar::Diff,
580                conflict_marker_len,
581                &format!(
582                    "Changes from {base_str} to side #{}{no_eol_comment}",
583                    add_index + 1
584                ),
585            )?;
586            write_diff_hunks(diff, output)
587        };
588
589    write_conflict_marker(
590        output,
591        ConflictMarkerLineChar::ConflictStart,
592        conflict_marker_len,
593        conflict_info,
594    )?;
595    let mut add_index = 0;
596    for (base_index, left) in hunk.removes().enumerate() {
597        // The vast majority of conflicts one actually tries to resolve manually have 1
598        // base.
599        let base_str = if hunk.removes().len() == 1 {
600            "base".to_string()
601        } else {
602            format!("base #{}", base_index + 1)
603        };
604
605        let Some(right1) = hunk.get_add(add_index) else {
606            // If we have no more positive terms, emit the remaining negative terms as
607            // snapshots.
608            write_base(&base_str, left, output)?;
609            continue;
610        };
611
612        // For any style other than "diff", always emit sides and bases separately
613        if conflict_marker_style != ConflictMarkerStyle::Diff {
614            write_side(add_index, right1, output)?;
615            write_base(&base_str, left, output)?;
616            add_index += 1;
617            continue;
618        }
619
620        let diff1 = Diff::by_line([&left, &right1]).hunks().collect_vec();
621        // Check if the diff against the next positive term is better. Since we want to
622        // preserve the order of the terms, we don't match against any later positive
623        // terms.
624        if let Some(right2) = hunk.get_add(add_index + 1) {
625            let diff2 = Diff::by_line([&left, &right2]).hunks().collect_vec();
626            if diff_size(&diff2) < diff_size(&diff1) {
627                // If the next positive term is a better match, emit the current positive term
628                // as a snapshot and the next positive term as a diff.
629                write_side(add_index, right1, output)?;
630                write_diff(&base_str, add_index + 1, &diff2, output)?;
631                add_index += 2;
632                continue;
633            }
634        }
635
636        write_diff(&base_str, add_index, &diff1, output)?;
637        add_index += 1;
638    }
639
640    // Emit the remaining positive terms as snapshots.
641    for (add_index, slice) in hunk.adds().enumerate().skip(add_index) {
642        write_side(add_index, slice, output)?;
643    }
644    write_conflict_marker(
645        output,
646        ConflictMarkerLineChar::ConflictEnd,
647        conflict_marker_len,
648        &format!("{conflict_info} ends"),
649    )?;
650    Ok(())
651}
652
653fn maybe_no_eol_comment(slice: &[u8]) -> &'static str {
654    if has_no_eol(slice) {
655        NO_EOL_COMMENT
656    } else {
657        ""
658    }
659}
660
661// Write a chunk of data, ensuring that it doesn't end with a line which is
662// missing its terminating newline.
663fn write_and_ensure_newline(output: &mut dyn Write, data: &[u8]) -> io::Result<()> {
664    output.write_all(data)?;
665    if has_no_eol(data) {
666        writeln!(output)?;
667    }
668    Ok(())
669}
670
671// Check whether a slice is missing its terminating newline character.
672fn has_no_eol(slice: &[u8]) -> bool {
673    slice.last().is_some_and(|&last| last != b'\n')
674}
675
676fn diff_size(hunks: &[DiffHunk]) -> usize {
677    hunks
678        .iter()
679        .map(|hunk| match hunk.kind {
680            DiffHunkKind::Matching => 0,
681            DiffHunkKind::Different => hunk.contents.iter().map(|content| content.len()).sum(),
682        })
683        .sum()
684}
685
686pub struct MaterializedTreeDiffEntry {
687    pub path: CopiesTreeDiffEntryPath,
688    pub values: BackendResult<(MaterializedTreeValue, MaterializedTreeValue)>,
689}
690
691pub fn materialized_diff_stream<'a>(
692    store: &'a Store,
693    tree_diff: BoxStream<'a, CopiesTreeDiffEntry>,
694) -> impl Stream<Item = MaterializedTreeDiffEntry> + use<'a> {
695    tree_diff
696        .map(async |CopiesTreeDiffEntry { path, values }| match values {
697            Err(err) => MaterializedTreeDiffEntry {
698                path,
699                values: Err(err),
700            },
701            Ok((before, after)) => {
702                let before_future = materialize_tree_value(store, path.source(), before);
703                let after_future = materialize_tree_value(store, path.target(), after);
704                let values = try_join!(before_future, after_future);
705                MaterializedTreeDiffEntry { path, values }
706            }
707        })
708        .buffered((store.concurrency() / 2).max(1))
709}
710
711/// Parses conflict markers from a slice.
712///
713/// Returns `None` if there were no valid conflict markers. The caller
714/// has to provide the expected number of merge sides (adds). Conflict
715/// markers that are otherwise valid will be considered invalid if
716/// they don't have the expected arity.
717///
718/// All conflict markers in the file must be at least as long as the expected
719/// length. Any shorter conflict markers will be ignored.
720// TODO: "parse" is not usually the opposite of "materialize", so maybe we
721// should rename them to "serialize" and "deserialize"?
722pub fn parse_conflict(
723    input: &[u8],
724    num_sides: usize,
725    expected_marker_len: usize,
726) -> Option<Vec<Merge<BString>>> {
727    if input.is_empty() {
728        return None;
729    }
730    let mut hunks = vec![];
731    let mut pos = 0;
732    let mut resolved_start = 0;
733    let mut conflict_start = None;
734    let mut conflict_start_len = 0;
735    for line in input.lines_with_terminator() {
736        match parse_conflict_marker(line, expected_marker_len) {
737            Some(ConflictMarkerLineChar::ConflictStart) => {
738                conflict_start = Some(pos);
739                conflict_start_len = line.len();
740            }
741            Some(ConflictMarkerLineChar::ConflictEnd) => {
742                if let Some(conflict_start_index) = conflict_start.take() {
743                    let conflict_body = &input[conflict_start_index + conflict_start_len..pos];
744                    let hunk = parse_conflict_hunk(conflict_body, expected_marker_len);
745                    if hunk.num_sides() == num_sides {
746                        let resolved_slice = &input[resolved_start..conflict_start_index];
747                        if !resolved_slice.is_empty() {
748                            hunks.push(Merge::resolved(BString::from(resolved_slice)));
749                        }
750                        hunks.push(hunk);
751                        resolved_start = pos + line.len();
752                    }
753                }
754            }
755            _ => {}
756        }
757        pos += line.len();
758    }
759
760    if hunks.is_empty() {
761        None
762    } else {
763        if resolved_start < input.len() {
764            hunks.push(Merge::resolved(BString::from(&input[resolved_start..])));
765        }
766        Some(hunks)
767    }
768}
769
770/// This method handles parsing both JJ-style and Git-style conflict markers,
771/// meaning that switching conflict marker styles won't prevent existing files
772/// with other conflict marker styles from being parsed successfully. The
773/// conflict marker style to use for parsing is determined based on the first
774/// line of the hunk.
775fn parse_conflict_hunk(input: &[u8], expected_marker_len: usize) -> Merge<BString> {
776    // If the hunk starts with a conflict marker, find its first character
777    let initial_conflict_marker = input
778        .lines_with_terminator()
779        .next()
780        .and_then(|line| parse_conflict_marker(line, expected_marker_len));
781
782    match initial_conflict_marker {
783        // JJ-style conflicts must start with one of these 3 conflict marker lines
784        Some(
785            ConflictMarkerLineChar::Diff
786            | ConflictMarkerLineChar::Remove
787            | ConflictMarkerLineChar::Add,
788        ) => parse_jj_style_conflict_hunk(input, expected_marker_len),
789        // Git-style conflicts either must not start with a conflict marker line, or must start with
790        // the "|||||||" conflict marker line (if the first side was empty)
791        None | Some(ConflictMarkerLineChar::GitAncestor) => {
792            parse_git_style_conflict_hunk(input, expected_marker_len)
793        }
794        // No other conflict markers are allowed at the start of a hunk
795        Some(_) => Merge::resolved(BString::new(vec![])),
796    }
797}
798
799fn parse_jj_style_conflict_hunk(input: &[u8], expected_marker_len: usize) -> Merge<BString> {
800    enum State {
801        Diff,
802        Remove,
803        Add,
804        Unknown,
805    }
806    let mut state = State::Unknown;
807    let mut removes = vec![];
808    let mut adds = vec![];
809    for line in input.lines_with_terminator() {
810        match parse_conflict_marker(line, expected_marker_len) {
811            Some(ConflictMarkerLineChar::Diff) => {
812                state = State::Diff;
813                removes.push(BString::new(vec![]));
814                adds.push(BString::new(vec![]));
815                continue;
816            }
817            Some(ConflictMarkerLineChar::Remove) => {
818                state = State::Remove;
819                removes.push(BString::new(vec![]));
820                continue;
821            }
822            Some(ConflictMarkerLineChar::Add) => {
823                state = State::Add;
824                adds.push(BString::new(vec![]));
825                continue;
826            }
827            _ => {}
828        }
829        match state {
830            State::Diff => {
831                if let Some(rest) = line.strip_prefix(b"-") {
832                    removes.last_mut().unwrap().extend_from_slice(rest);
833                } else if let Some(rest) = line.strip_prefix(b"+") {
834                    adds.last_mut().unwrap().extend_from_slice(rest);
835                } else if let Some(rest) = line.strip_prefix(b" ") {
836                    removes.last_mut().unwrap().extend_from_slice(rest);
837                    adds.last_mut().unwrap().extend_from_slice(rest);
838                } else if line == b"\n" || line == b"\r\n" {
839                    // Some editors strip trailing whitespace, so " \n" might become "\n". It would
840                    // be unfortunate if this prevented the conflict from being parsed, so we add
841                    // the empty line to the "remove" and "add" as if there was a space in front
842                    removes.last_mut().unwrap().extend_from_slice(line);
843                    adds.last_mut().unwrap().extend_from_slice(line);
844                } else {
845                    // Doesn't look like a valid conflict
846                    return Merge::resolved(BString::new(vec![]));
847                }
848            }
849            State::Remove => {
850                removes.last_mut().unwrap().extend_from_slice(line);
851            }
852            State::Add => {
853                adds.last_mut().unwrap().extend_from_slice(line);
854            }
855            State::Unknown => {
856                // Doesn't look like a valid conflict
857                return Merge::resolved(BString::new(vec![]));
858            }
859        }
860    }
861
862    if adds.len() == removes.len() + 1 {
863        Merge::from_removes_adds(removes, adds)
864    } else {
865        // Doesn't look like a valid conflict
866        Merge::resolved(BString::new(vec![]))
867    }
868}
869
870fn parse_git_style_conflict_hunk(input: &[u8], expected_marker_len: usize) -> Merge<BString> {
871    #[derive(PartialEq, Eq)]
872    enum State {
873        Left,
874        Base,
875        Right,
876    }
877    let mut state = State::Left;
878    let mut left = BString::new(vec![]);
879    let mut base = BString::new(vec![]);
880    let mut right = BString::new(vec![]);
881    for line in input.lines_with_terminator() {
882        match parse_conflict_marker(line, expected_marker_len) {
883            Some(ConflictMarkerLineChar::GitAncestor) => {
884                if state == State::Left {
885                    state = State::Base;
886                    continue;
887                } else {
888                    // Base must come after left
889                    return Merge::resolved(BString::new(vec![]));
890                }
891            }
892            Some(ConflictMarkerLineChar::GitSeparator) => {
893                if state == State::Base {
894                    state = State::Right;
895                    continue;
896                } else {
897                    // Right must come after base
898                    return Merge::resolved(BString::new(vec![]));
899                }
900            }
901            _ => {}
902        }
903        match state {
904            State::Left => left.extend_from_slice(line),
905            State::Base => base.extend_from_slice(line),
906            State::Right => right.extend_from_slice(line),
907        }
908    }
909
910    if state == State::Right {
911        Merge::from_vec(vec![left, base, right])
912    } else {
913        // Doesn't look like a valid conflict
914        Merge::resolved(BString::new(vec![]))
915    }
916}
917
918/// Parses conflict markers in `content` and returns an updated version of
919/// `file_ids` with the new contents. If no (valid) conflict markers remain, a
920/// single resolves `FileId` will be returned.
921pub async fn update_from_content(
922    file_ids: &Merge<Option<FileId>>,
923    store: &Store,
924    path: &RepoPath,
925    content: &[u8],
926    conflict_marker_style: ConflictMarkerStyle,
927    conflict_marker_len: usize,
928) -> BackendResult<Merge<Option<FileId>>> {
929    let simplified_file_ids = file_ids.simplify();
930
931    // First check if the new content is unchanged compared to the old content. If
932    // it is, we don't need parse the content or write any new objects to the
933    // store. This is also a way of making sure that unchanged tree/file
934    // conflicts (for example) are not converted to regular files in the working
935    // copy.
936    let mut old_content = Vec::with_capacity(content.len());
937    let merge_hunk = extract_as_single_hunk(&simplified_file_ids, store, path).await?;
938    let materialize_options = ConflictMaterializeOptions {
939        marker_style: conflict_marker_style,
940        marker_len: Some(conflict_marker_len),
941    };
942    materialize_merge_result(&merge_hunk, &mut old_content, &materialize_options).unwrap();
943    if content == old_content {
944        return Ok(file_ids.clone());
945    }
946
947    // Parse conflicts from the new content using the arity of the simplified
948    // conflicts.
949    let Some(mut hunks) = parse_conflict(
950        content,
951        simplified_file_ids.num_sides(),
952        conflict_marker_len,
953    ) else {
954        // Either there are no markers or they don't have the expected arity
955        let file_id = store.write_file(path, &mut &content[..]).await?;
956        return Ok(Merge::normal(file_id));
957    };
958
959    // If there is a conflict at the end of the file and a term ends with a newline,
960    // check whether the original term ended with a newline. If it didn't, then
961    // remove the newline since it was added automatically when materializing.
962    if let Some(last_hunk) = hunks.last_mut().filter(|hunk| !hunk.is_resolved()) {
963        for (original_content, term) in merge_hunk.iter().zip_eq(last_hunk.iter_mut()) {
964            if term.last() == Some(&b'\n') && has_no_eol(original_content) {
965                term.pop();
966            }
967        }
968    }
969
970    let mut contents = simplified_file_ids.map(|_| vec![]);
971    for hunk in hunks {
972        if let Some(slice) = hunk.as_resolved() {
973            for content in contents.iter_mut() {
974                content.extend_from_slice(slice);
975            }
976        } else {
977            for (content, slice) in zip(contents.iter_mut(), hunk.into_iter()) {
978                content.extend(Vec::from(slice));
979            }
980        }
981    }
982
983    // If the user edited the empty placeholder for an absent side, we consider the
984    // conflict resolved.
985    if zip(contents.iter(), simplified_file_ids.iter())
986        .any(|(content, file_id)| file_id.is_none() && !content.is_empty())
987    {
988        let file_id = store.write_file(path, &mut &content[..]).await?;
989        return Ok(Merge::normal(file_id));
990    }
991
992    // Now write the new files contents we found by parsing the file with conflict
993    // markers.
994    // TODO: Write these concurrently
995    let new_file_ids: Vec<Option<FileId>> = zip(contents.iter(), simplified_file_ids.iter())
996        .map(|(content, file_id)| -> BackendResult<Option<FileId>> {
997            match file_id {
998                Some(_) => {
999                    let file_id = store.write_file(path, &mut content.as_slice()).block_on()?;
1000                    Ok(Some(file_id))
1001                }
1002                None => {
1003                    // The missing side of a conflict is still represented by
1004                    // the empty string we materialized it as
1005                    Ok(None)
1006                }
1007            }
1008        })
1009        .try_collect()?;
1010
1011    // If the conflict was simplified, expand the conflict to the original
1012    // number of sides.
1013    let new_file_ids = if new_file_ids.len() != file_ids.iter().len() {
1014        file_ids
1015            .clone()
1016            .update_from_simplified(Merge::from_vec(new_file_ids))
1017    } else {
1018        Merge::from_vec(new_file_ids)
1019    };
1020    Ok(new_file_ids)
1021}
1022
1023#[cfg(test)]
1024mod tests {
1025    use super::*;
1026
1027    #[test]
1028    fn test_resolve_file_executable() {
1029        fn resolve<const N: usize>(values: [Option<bool>; N]) -> Option<bool> {
1030            resolve_file_executable(&Merge::from_vec(values.to_vec()))
1031        }
1032
1033        // already resolved
1034        assert_eq!(resolve([None]), None);
1035        assert_eq!(resolve([Some(false)]), Some(false));
1036        assert_eq!(resolve([Some(true)]), Some(true));
1037
1038        // trivially resolved
1039        assert_eq!(resolve([Some(true), Some(true), Some(true)]), Some(true));
1040        assert_eq!(resolve([Some(true), Some(false), Some(false)]), Some(true));
1041        assert_eq!(resolve([Some(false), Some(true), Some(false)]), Some(false));
1042        assert_eq!(resolve([None, None, Some(true)]), Some(true));
1043
1044        // unresolvable
1045        assert_eq!(resolve([Some(false), Some(true), None]), None);
1046
1047        // trivially resolved to absent, so pick the original state
1048        assert_eq!(resolve([Some(true), Some(true), None]), Some(true));
1049        assert_eq!(resolve([None, Some(false), Some(false)]), Some(false));
1050        assert_eq!(
1051            resolve([None, None, Some(true), Some(true), None]),
1052            Some(true)
1053        );
1054
1055        // trivially resolved to absent, and the original state is ambiguous
1056        assert_eq!(
1057            resolve([Some(true), Some(true), None, Some(false), Some(false)]),
1058            None
1059        );
1060        assert_eq!(
1061            resolve([
1062                None,
1063                Some(true),
1064                Some(true),
1065                Some(false),
1066                Some(false),
1067                Some(false),
1068                Some(false),
1069            ]),
1070            None
1071        );
1072    }
1073}