jj_lib/
conflicts.rs

1// Copyright 2020 The Jujutsu Authors
2//
3// Licensed under the Apache License, Version 2.0 (the "License");
4// you may not use this file except in compliance with the License.
5// You may obtain a copy of the License at
6//
7// https://www.apache.org/licenses/LICENSE-2.0
8//
9// Unless required by applicable law or agreed to in writing, software
10// distributed under the License is distributed on an "AS IS" BASIS,
11// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12// See the License for the specific language governing permissions and
13// limitations under the License.
14
15#![expect(missing_docs)]
16
17use std::io;
18use std::io::Write;
19use std::iter::zip;
20use std::pin::Pin;
21
22use bstr::BStr;
23use bstr::BString;
24use bstr::ByteSlice as _;
25use futures::Stream;
26use futures::StreamExt as _;
27use futures::stream::BoxStream;
28use futures::try_join;
29use itertools::Itertools as _;
30use pollster::FutureExt as _;
31use tokio::io::AsyncRead;
32use tokio::io::AsyncReadExt as _;
33
34use crate::backend::BackendError;
35use crate::backend::BackendResult;
36use crate::backend::CommitId;
37use crate::backend::CopyId;
38use crate::backend::FileId;
39use crate::backend::SymlinkId;
40use crate::backend::TreeId;
41use crate::backend::TreeValue;
42use crate::conflict_labels::ConflictLabels;
43use crate::copies::CopiesTreeDiffEntry;
44use crate::copies::CopiesTreeDiffEntryPath;
45use crate::diff::ContentDiff;
46use crate::diff::DiffHunk;
47use crate::diff::DiffHunkKind;
48use crate::files;
49use crate::files::MergeResult;
50use crate::merge::Diff;
51use crate::merge::Merge;
52use crate::merge::MergedTreeValue;
53use crate::merge::SameChange;
54use crate::repo_path::RepoPath;
55use crate::store::Store;
56use crate::tree_merge::MergeOptions;
57
58/// Minimum length of conflict markers.
59pub const MIN_CONFLICT_MARKER_LEN: usize = 7;
60
61/// If a file already contains lines which look like conflict markers of length
62/// N, then the conflict markers we add will be of length (N + increment). This
63/// number is chosen to make the conflict markers noticeably longer than the
64/// existing markers.
65const CONFLICT_MARKER_LEN_INCREMENT: usize = 4;
66
67/// Comment for missing terminating newline in a term of a conflict.
68const NO_EOL_COMMENT: &str = " (no terminating newline)";
69
70fn write_diff_hunks(hunks: &[DiffHunk], file: &mut dyn Write) -> io::Result<()> {
71    for hunk in hunks {
72        match hunk.kind {
73            DiffHunkKind::Matching => {
74                debug_assert!(hunk.contents.iter().all_equal());
75                for line in hunk.contents[0].lines_with_terminator() {
76                    file.write_all(b" ")?;
77                    write_and_ensure_newline(file, line)?;
78                }
79            }
80            DiffHunkKind::Different => {
81                for line in hunk.contents[0].lines_with_terminator() {
82                    file.write_all(b"-")?;
83                    write_and_ensure_newline(file, line)?;
84                }
85                for line in hunk.contents[1].lines_with_terminator() {
86                    file.write_all(b"+")?;
87                    write_and_ensure_newline(file, line)?;
88                }
89            }
90        }
91    }
92    Ok(())
93}
94
95async fn get_file_contents(
96    store: &Store,
97    path: &RepoPath,
98    term: &Option<FileId>,
99) -> BackendResult<BString> {
100    match term {
101        Some(id) => {
102            let mut reader = store.read_file(path, id).await?;
103            let mut content = vec![];
104            reader
105                .read_to_end(&mut content)
106                .await
107                .map_err(|err| BackendError::ReadFile {
108                    path: path.to_owned(),
109                    id: id.clone(),
110                    source: err.into(),
111                })?;
112            Ok(BString::new(content))
113        }
114        // If the conflict had removed the file on one side, we pretend that the file
115        // was empty there.
116        None => Ok(BString::new(vec![])),
117    }
118}
119
120pub async fn extract_as_single_hunk(
121    merge: &Merge<Option<FileId>>,
122    store: &Store,
123    path: &RepoPath,
124) -> BackendResult<Merge<BString>> {
125    merge
126        .try_map_async(|term| get_file_contents(store, path, term))
127        .await
128}
129
130/// A type similar to `MergedTreeValue` but with associated data to include in
131/// e.g. the working copy or in a diff.
132pub enum MaterializedTreeValue {
133    Absent,
134    AccessDenied(Box<dyn std::error::Error + Send + Sync>),
135    File(MaterializedFileValue),
136    Symlink {
137        id: SymlinkId,
138        target: String,
139    },
140    FileConflict(MaterializedFileConflictValue),
141    OtherConflict {
142        id: MergedTreeValue,
143        labels: ConflictLabels,
144    },
145    GitSubmodule(CommitId),
146    Tree(TreeId),
147}
148
149impl MaterializedTreeValue {
150    pub fn is_absent(&self) -> bool {
151        matches!(self, Self::Absent)
152    }
153
154    pub fn is_present(&self) -> bool {
155        !self.is_absent()
156    }
157}
158
159/// [`TreeValue::File`] with file content `reader`.
160pub struct MaterializedFileValue {
161    pub id: FileId,
162    pub executable: bool,
163    pub copy_id: CopyId,
164    pub reader: Pin<Box<dyn AsyncRead + Send>>,
165}
166
167impl MaterializedFileValue {
168    /// Reads file content until EOF. The provided `path` is used only for error
169    /// reporting purpose.
170    pub async fn read_all(&mut self, path: &RepoPath) -> BackendResult<Vec<u8>> {
171        let mut buf = Vec::new();
172        self.reader
173            .read_to_end(&mut buf)
174            .await
175            .map_err(|err| BackendError::ReadFile {
176                path: path.to_owned(),
177                id: self.id.clone(),
178                source: err.into(),
179            })?;
180        Ok(buf)
181    }
182}
183
184/// Conflicted [`TreeValue::File`]s with file contents.
185pub struct MaterializedFileConflictValue {
186    /// File ids which preserve the shape of the tree conflict, to be used with
187    /// [`Merge::update_from_simplified()`].
188    pub unsimplified_ids: Merge<Option<FileId>>,
189    /// Simplified file ids, in which redundant id pairs are dropped.
190    pub ids: Merge<Option<FileId>>,
191    /// Simplified conflict labels, matching `ids`.
192    pub labels: ConflictLabels,
193    /// File contents corresponding to the simplified `ids`.
194    // TODO: or Vec<(FileId, Box<dyn Read>)> so that caller can stop reading
195    // when null bytes found?
196    pub contents: Merge<BString>,
197    /// Merged executable bit. `None` if there are changes in both executable
198    /// bit and file absence.
199    pub executable: Option<bool>,
200    /// Merged copy id. `None` if no single value could be determined.
201    pub copy_id: Option<CopyId>,
202}
203
204/// Reads the data associated with a `MergedTreeValue` so it can be written to
205/// e.g. the working copy or diff.
206pub async fn materialize_tree_value(
207    store: &Store,
208    path: &RepoPath,
209    value: MergedTreeValue,
210    conflict_labels: &ConflictLabels,
211) -> BackendResult<MaterializedTreeValue> {
212    match materialize_tree_value_no_access_denied(store, path, value, conflict_labels).await {
213        Err(BackendError::ReadAccessDenied { source, .. }) => {
214            Ok(MaterializedTreeValue::AccessDenied(source))
215        }
216        result => result,
217    }
218}
219
220async fn materialize_tree_value_no_access_denied(
221    store: &Store,
222    path: &RepoPath,
223    value: MergedTreeValue,
224    conflict_labels: &ConflictLabels,
225) -> BackendResult<MaterializedTreeValue> {
226    match value.into_resolved() {
227        Ok(None) => Ok(MaterializedTreeValue::Absent),
228        Ok(Some(TreeValue::File {
229            id,
230            executable,
231            copy_id,
232        })) => {
233            let reader = store.read_file(path, &id).await?;
234            Ok(MaterializedTreeValue::File(MaterializedFileValue {
235                id,
236                executable,
237                copy_id,
238                reader,
239            }))
240        }
241        Ok(Some(TreeValue::Symlink(id))) => {
242            let target = store.read_symlink(path, &id).await?;
243            Ok(MaterializedTreeValue::Symlink { id, target })
244        }
245        Ok(Some(TreeValue::GitSubmodule(id))) => Ok(MaterializedTreeValue::GitSubmodule(id)),
246        Ok(Some(TreeValue::Tree(id))) => Ok(MaterializedTreeValue::Tree(id)),
247        Err(conflict) => {
248            match try_materialize_file_conflict_value(store, path, &conflict, conflict_labels)
249                .await?
250            {
251                Some(file) => Ok(MaterializedTreeValue::FileConflict(file)),
252                None => Ok(MaterializedTreeValue::OtherConflict {
253                    id: conflict,
254                    labels: conflict_labels.clone(),
255                }),
256            }
257        }
258    }
259}
260
261/// Suppose `conflict` contains only files or absent entries, reads the file
262/// contents.
263pub async fn try_materialize_file_conflict_value(
264    store: &Store,
265    path: &RepoPath,
266    conflict: &MergedTreeValue,
267    conflict_labels: &ConflictLabels,
268) -> BackendResult<Option<MaterializedFileConflictValue>> {
269    let (Some(unsimplified_ids), Some(executable_bits)) =
270        (conflict.to_file_merge(), conflict.to_executable_merge())
271    else {
272        return Ok(None);
273    };
274    let (labels, ids) = conflict_labels.simplify_with(&unsimplified_ids);
275    let contents = extract_as_single_hunk(&ids, store, path).await?;
276    let executable = resolve_file_executable(&executable_bits);
277    Ok(Some(MaterializedFileConflictValue {
278        unsimplified_ids,
279        ids,
280        labels,
281        contents,
282        executable,
283        copy_id: Some(CopyId::placeholder()),
284    }))
285}
286
287/// Resolves conflicts in file executable bit, returns the original state if the
288/// file is deleted and executable bit is unchanged.
289pub fn resolve_file_executable(merge: &Merge<Option<bool>>) -> Option<bool> {
290    let resolved = merge.resolve_trivial(SameChange::Accept).copied()?;
291    if resolved.is_some() {
292        resolved
293    } else {
294        // If the merge is resolved to None (absent), there should be the same
295        // number of Some(true) and Some(false). Pick the old state if
296        // unambiguous, so the new file inherits the original executable bit.
297        merge.removes().flatten().copied().all_equal_value().ok()
298    }
299}
300
301/// Describes what style should be used when materializing conflicts.
302#[derive(Clone, Copy, PartialEq, Eq, Debug, serde::Deserialize)]
303#[serde(rename_all = "kebab-case")]
304pub enum ConflictMarkerStyle {
305    /// Style which shows a snapshot and a series of diffs to apply.
306    Diff,
307    /// Similar to "diff", but always picks the first side as the snapshot. May
308    /// become the default in a future version.
309    DiffExperimental,
310    /// Style which shows a snapshot for each base and side.
311    Snapshot,
312    /// Style which replicates Git's "diff3" style to support external tools.
313    Git,
314}
315
316impl ConflictMarkerStyle {
317    /// Returns true if this style allows `%%%%%%%` conflict markers.
318    pub fn allows_diff(&self) -> bool {
319        matches!(self, Self::Diff | Self::DiffExperimental)
320    }
321}
322
323/// Options for conflict materialization.
324#[derive(Clone, Debug)]
325pub struct ConflictMaterializeOptions {
326    pub marker_style: ConflictMarkerStyle,
327    pub marker_len: Option<usize>,
328    pub merge: MergeOptions,
329}
330
331/// Characters which can be repeated to form a conflict marker line when
332/// materializing and parsing conflicts.
333#[derive(Clone, Copy, PartialEq, Eq)]
334#[repr(u8)]
335enum ConflictMarkerLineChar {
336    ConflictStart = b'<',
337    ConflictEnd = b'>',
338    Add = b'+',
339    Remove = b'-',
340    Diff = b'%',
341    Note = b'\\',
342    GitAncestor = b'|',
343    GitSeparator = b'=',
344}
345
346impl ConflictMarkerLineChar {
347    /// Get the ASCII byte used for this conflict marker.
348    fn to_byte(self) -> u8 {
349        self as u8
350    }
351
352    /// Parse a byte to see if it corresponds with any kind of conflict marker.
353    fn parse_byte(byte: u8) -> Option<Self> {
354        match byte {
355            b'<' => Some(Self::ConflictStart),
356            b'>' => Some(Self::ConflictEnd),
357            b'+' => Some(Self::Add),
358            b'-' => Some(Self::Remove),
359            b'%' => Some(Self::Diff),
360            b'\\' => Some(Self::Note),
361            b'|' => Some(Self::GitAncestor),
362            b'=' => Some(Self::GitSeparator),
363            _ => None,
364        }
365    }
366}
367
368/// Represents a conflict marker line parsed from the file. Conflict marker
369/// lines consist of a single ASCII character repeated for a certain length.
370struct ConflictMarkerLine {
371    kind: ConflictMarkerLineChar,
372    len: usize,
373}
374
375/// Write a conflict marker to an output file.
376fn write_conflict_marker(
377    output: &mut dyn Write,
378    kind: ConflictMarkerLineChar,
379    len: usize,
380    suffix_text: &str,
381) -> io::Result<()> {
382    let conflict_marker = BString::new(vec![kind.to_byte(); len]);
383
384    if suffix_text.is_empty() {
385        writeln!(output, "{conflict_marker}")
386    } else {
387        writeln!(output, "{conflict_marker} {suffix_text}")
388    }
389}
390
391/// Parse a conflict marker from a line of a file. The conflict marker may have
392/// any length (even less than MIN_CONFLICT_MARKER_LEN).
393fn parse_conflict_marker_any_len(line: &[u8]) -> Option<ConflictMarkerLine> {
394    let first_byte = *line.first()?;
395    let kind = ConflictMarkerLineChar::parse_byte(first_byte)?;
396    let len = line.iter().take_while(|&&b| b == first_byte).count();
397
398    if let Some(next_byte) = line.get(len) {
399        // If there is a character after the marker, it must be ASCII whitespace
400        if !next_byte.is_ascii_whitespace() {
401            return None;
402        }
403    }
404
405    Some(ConflictMarkerLine { kind, len })
406}
407
408/// Parse a conflict marker, expecting it to be at least a certain length. Any
409/// shorter conflict markers are ignored.
410fn parse_conflict_marker(line: &[u8], expected_len: usize) -> Option<ConflictMarkerLineChar> {
411    parse_conflict_marker_any_len(line)
412        .filter(|marker| marker.len >= expected_len)
413        .map(|marker| marker.kind)
414}
415
416/// Given a Merge of files, choose the conflict marker length to use when
417/// materializing conflicts.
418pub fn choose_materialized_conflict_marker_len<T: AsRef<[u8]>>(single_hunk: &Merge<T>) -> usize {
419    let max_existing_marker_len = single_hunk
420        .iter()
421        .flat_map(|file| file.as_ref().lines_with_terminator())
422        .filter_map(parse_conflict_marker_any_len)
423        .map(|marker| marker.len)
424        .max()
425        .unwrap_or_default();
426
427    max_existing_marker_len
428        .saturating_add(CONFLICT_MARKER_LEN_INCREMENT)
429        .max(MIN_CONFLICT_MARKER_LEN)
430}
431
432pub fn materialize_merge_result<T: AsRef<[u8]>>(
433    single_hunk: &Merge<T>,
434    labels: &ConflictLabels,
435    output: &mut dyn Write,
436    options: &ConflictMaterializeOptions,
437) -> io::Result<()> {
438    let merge_result = files::merge_hunks(single_hunk, &options.merge);
439    match &merge_result {
440        MergeResult::Resolved(content) => output.write_all(content),
441        MergeResult::Conflict(hunks) => {
442            let marker_len = options
443                .marker_len
444                .unwrap_or_else(|| choose_materialized_conflict_marker_len(single_hunk));
445            materialize_conflict_hunks(hunks, options.marker_style, marker_len, labels, output)
446        }
447    }
448}
449
450pub fn materialize_merge_result_to_bytes<T: AsRef<[u8]>>(
451    single_hunk: &Merge<T>,
452    labels: &ConflictLabels,
453    options: &ConflictMaterializeOptions,
454) -> BString {
455    let merge_result = files::merge_hunks(single_hunk, &options.merge);
456    match merge_result {
457        MergeResult::Resolved(content) => content,
458        MergeResult::Conflict(hunks) => {
459            let marker_len = options
460                .marker_len
461                .unwrap_or_else(|| choose_materialized_conflict_marker_len(single_hunk));
462            let mut output = Vec::new();
463            materialize_conflict_hunks(
464                &hunks,
465                options.marker_style,
466                marker_len,
467                labels,
468                &mut output,
469            )
470            .expect("writing to an in-memory buffer should never fail");
471            output.into()
472        }
473    }
474}
475
476fn materialize_conflict_hunks(
477    hunks: &[Merge<BString>],
478    conflict_marker_style: ConflictMarkerStyle,
479    conflict_marker_len: usize,
480    labels: &ConflictLabels,
481    output: &mut dyn Write,
482) -> io::Result<()> {
483    let num_conflicts = hunks
484        .iter()
485        .filter(|hunk| hunk.as_resolved().is_none())
486        .count();
487    let mut conflict_index = 0;
488    for hunk in hunks {
489        if let Some(content) = hunk.as_resolved() {
490            output.write_all(content)?;
491        } else {
492            conflict_index += 1;
493            let conflict_info = format!("conflict {conflict_index} of {num_conflicts}");
494
495            match (conflict_marker_style, hunk.as_slice()) {
496                // 2-sided conflicts can use Git-style conflict markers
497                (ConflictMarkerStyle::Git, [left, base, right]) => {
498                    materialize_git_style_conflict(
499                        left,
500                        base,
501                        right,
502                        conflict_marker_len,
503                        labels,
504                        output,
505                    )?;
506                }
507                _ => {
508                    materialize_jj_style_conflict(
509                        hunk,
510                        &conflict_info,
511                        conflict_marker_style,
512                        conflict_marker_len,
513                        labels,
514                        output,
515                    )?;
516                }
517            }
518        }
519    }
520    Ok(())
521}
522
523fn materialize_git_style_conflict(
524    left: &[u8],
525    base: &[u8],
526    right: &[u8],
527    conflict_marker_len: usize,
528    labels: &ConflictLabels,
529    output: &mut dyn Write,
530) -> io::Result<()> {
531    write_conflict_marker(
532        output,
533        ConflictMarkerLineChar::ConflictStart,
534        conflict_marker_len,
535        labels.get_add(0).unwrap_or("side #1"),
536    )?;
537    write_and_ensure_newline(output, left)?;
538
539    write_conflict_marker(
540        output,
541        ConflictMarkerLineChar::GitAncestor,
542        conflict_marker_len,
543        labels.get_remove(0).unwrap_or("base"),
544    )?;
545    write_and_ensure_newline(output, base)?;
546
547    // VS Code doesn't seem to support any trailing text on the separator line
548    write_conflict_marker(
549        output,
550        ConflictMarkerLineChar::GitSeparator,
551        conflict_marker_len,
552        "",
553    )?;
554
555    write_and_ensure_newline(output, right)?;
556    write_conflict_marker(
557        output,
558        ConflictMarkerLineChar::ConflictEnd,
559        conflict_marker_len,
560        labels.get_add(1).unwrap_or("side #2"),
561    )?;
562
563    Ok(())
564}
565
566fn materialize_jj_style_conflict(
567    hunk: &Merge<BString>,
568    conflict_info: &str,
569    conflict_marker_style: ConflictMarkerStyle,
570    conflict_marker_len: usize,
571    labels: &ConflictLabels,
572    output: &mut dyn Write,
573) -> io::Result<()> {
574    let get_side_label = |add_index: usize| -> String {
575        labels.get_add(add_index).map_or_else(
576            || format!("side #{}", add_index + 1),
577            |label| label.to_owned(),
578        )
579    };
580
581    let get_base_label = |base_index: usize| -> String {
582        labels
583            .get_remove(base_index)
584            .map(|label| label.to_owned())
585            .unwrap_or_else(|| {
586                // The vast majority of conflicts one actually tries to resolve manually have 1
587                // base.
588                if hunk.removes().len() == 1 {
589                    "base".to_string()
590                } else {
591                    format!("base #{}", base_index + 1)
592                }
593            })
594    };
595
596    // Write a positive snapshot (side) of a conflict
597    let write_side = |add_index: usize, data: &[u8], output: &mut dyn Write| {
598        write_conflict_marker(
599            output,
600            ConflictMarkerLineChar::Add,
601            conflict_marker_len,
602            &(get_side_label(add_index) + maybe_no_eol_comment(data)),
603        )?;
604        write_and_ensure_newline(output, data)
605    };
606
607    // Write a negative snapshot (base) of a conflict
608    let write_base = |base_index: usize, data: &[u8], output: &mut dyn Write| {
609        write_conflict_marker(
610            output,
611            ConflictMarkerLineChar::Remove,
612            conflict_marker_len,
613            &(get_base_label(base_index) + maybe_no_eol_comment(data)),
614        )?;
615        write_and_ensure_newline(output, data)
616    };
617
618    // Write a diff from a negative term to a positive term
619    let write_diff =
620        |base_index: usize, add_index: usize, diff: &[DiffHunk], output: &mut dyn Write| {
621            let (remove_last_hunk, add_last_hunk) = diff
622                .last()
623                .map_or((BStr::new(""), BStr::new("")), |diff_hunk| {
624                    (diff_hunk.contents[0], diff_hunk.contents[1])
625                });
626            if labels.get_add(add_index).is_none()
627                && labels.get_remove(base_index).is_none()
628                && !has_no_eol(remove_last_hunk)
629                && !has_no_eol(add_last_hunk)
630            {
631                // TODO: remove this format when all conflicts have labels
632                // Use simple conflict markers when there are no conflict labels and no comment
633                // for missing terminating newline.
634                write_conflict_marker(
635                    output,
636                    ConflictMarkerLineChar::Diff,
637                    conflict_marker_len,
638                    &format!(
639                        "diff from {} to {}",
640                        get_base_label(base_index),
641                        get_side_label(add_index)
642                    ),
643                )?;
644            } else {
645                write_conflict_marker(
646                    output,
647                    ConflictMarkerLineChar::Diff,
648                    conflict_marker_len,
649                    &format!(
650                        "diff from: {}{}",
651                        get_base_label(base_index),
652                        maybe_no_eol_comment(remove_last_hunk)
653                    ),
654                )?;
655                write_conflict_marker(
656                    output,
657                    ConflictMarkerLineChar::Note,
658                    conflict_marker_len,
659                    &format!(
660                        "       to: {}{}",
661                        get_side_label(add_index),
662                        maybe_no_eol_comment(add_last_hunk)
663                    ),
664                )?;
665            }
666            write_diff_hunks(diff, output)
667        };
668
669    write_conflict_marker(
670        output,
671        ConflictMarkerLineChar::ConflictStart,
672        conflict_marker_len,
673        conflict_info,
674    )?;
675    let mut snapshot_written = false;
676    // The only conflict marker style which can start with a diff is "diff".
677    if conflict_marker_style != ConflictMarkerStyle::Diff {
678        write_side(0, hunk.first(), output)?;
679        snapshot_written = true;
680    }
681    for (base_index, left) in hunk.removes().enumerate() {
682        let add_index = if snapshot_written {
683            base_index + 1
684        } else {
685            base_index
686        };
687
688        let right1 = hunk.get_add(add_index).unwrap();
689
690        // Write the base and side separately if the conflict marker style doesn't
691        // support diffs.
692        if !conflict_marker_style.allows_diff() {
693            write_base(base_index, left, output)?;
694            write_side(add_index, right1, output)?;
695            continue;
696        }
697
698        let diff1 = ContentDiff::by_line([&left, &right1]).hunks().collect_vec();
699        // If we haven't written a snapshot yet, then we need to decide whether to
700        // format the current side as a snapshot or a diff. We write the current side as
701        // a diff unless the next side has a smaller diff compared to the current base.
702        if !snapshot_written {
703            let right2 = hunk.get_add(add_index + 1).unwrap();
704            let diff2 = ContentDiff::by_line([&left, &right2]).hunks().collect_vec();
705            if diff_size(&diff2) < diff_size(&diff1) {
706                // If the next positive term is a better match, emit the current positive term
707                // as a snapshot and the next positive term as a diff.
708                write_side(add_index, right1, output)?;
709                write_diff(base_index, add_index + 1, &diff2, output)?;
710                snapshot_written = true;
711                continue;
712            }
713        }
714
715        write_diff(base_index, add_index, &diff1, output)?;
716    }
717
718    // If we still didn't emit a snapshot, the last side is the snapshot.
719    if !snapshot_written {
720        let add_index = hunk.num_sides() - 1;
721        write_side(add_index, hunk.get_add(add_index).unwrap(), output)?;
722    }
723    write_conflict_marker(
724        output,
725        ConflictMarkerLineChar::ConflictEnd,
726        conflict_marker_len,
727        &format!("{conflict_info} ends"),
728    )?;
729    Ok(())
730}
731
732fn maybe_no_eol_comment(slice: &[u8]) -> &'static str {
733    if has_no_eol(slice) {
734        NO_EOL_COMMENT
735    } else {
736        ""
737    }
738}
739
740// Write a chunk of data, ensuring that it doesn't end with a line which is
741// missing its terminating newline.
742fn write_and_ensure_newline(output: &mut dyn Write, data: &[u8]) -> io::Result<()> {
743    output.write_all(data)?;
744    if has_no_eol(data) {
745        writeln!(output)?;
746    }
747    Ok(())
748}
749
750// Check whether a slice is missing its terminating newline character.
751fn has_no_eol(slice: &[u8]) -> bool {
752    slice.last().is_some_and(|&last| last != b'\n')
753}
754
755fn diff_size(hunks: &[DiffHunk]) -> usize {
756    hunks
757        .iter()
758        .map(|hunk| match hunk.kind {
759            DiffHunkKind::Matching => 0,
760            DiffHunkKind::Different => hunk.contents.iter().map(|content| content.len()).sum(),
761        })
762        .sum()
763}
764
765pub struct MaterializedTreeDiffEntry {
766    pub path: CopiesTreeDiffEntryPath,
767    pub values: BackendResult<Diff<MaterializedTreeValue>>,
768}
769
770pub fn materialized_diff_stream(
771    store: &Store,
772    tree_diff: BoxStream<'_, CopiesTreeDiffEntry>,
773    conflict_labels: Diff<&ConflictLabels>,
774) -> impl Stream<Item = MaterializedTreeDiffEntry> {
775    tree_diff
776        .map(async |CopiesTreeDiffEntry { path, values }| match values {
777            Err(err) => MaterializedTreeDiffEntry {
778                path,
779                values: Err(err),
780            },
781            Ok(values) => {
782                let before_future = materialize_tree_value(
783                    store,
784                    path.source(),
785                    values.before,
786                    conflict_labels.before,
787                );
788                let after_future = materialize_tree_value(
789                    store,
790                    path.target(),
791                    values.after,
792                    conflict_labels.after,
793                );
794                let values = try_join!(before_future, after_future)
795                    .map(|(before, after)| Diff { before, after });
796                MaterializedTreeDiffEntry { path, values }
797            }
798        })
799        .buffered((store.concurrency() / 2).max(1))
800}
801
802/// Parses conflict markers from a slice.
803///
804/// Returns `None` if there were no valid conflict markers. The caller
805/// has to provide the expected number of merge sides (adds). Conflict
806/// markers that are otherwise valid will be considered invalid if
807/// they don't have the expected arity.
808///
809/// All conflict markers in the file must be at least as long as the expected
810/// length. Any shorter conflict markers will be ignored.
811// TODO: "parse" is not usually the opposite of "materialize", so maybe we
812// should rename them to "serialize" and "deserialize"?
813pub fn parse_conflict(
814    input: &[u8],
815    num_sides: usize,
816    expected_marker_len: usize,
817) -> Option<Vec<Merge<BString>>> {
818    if input.is_empty() {
819        return None;
820    }
821    let mut hunks = vec![];
822    let mut pos = 0;
823    let mut resolved_start = 0;
824    let mut conflict_start = None;
825    let mut conflict_start_len = 0;
826    for line in input.lines_with_terminator() {
827        match parse_conflict_marker(line, expected_marker_len) {
828            Some(ConflictMarkerLineChar::ConflictStart) => {
829                conflict_start = Some(pos);
830                conflict_start_len = line.len();
831            }
832            Some(ConflictMarkerLineChar::ConflictEnd) => {
833                if let Some(conflict_start_index) = conflict_start.take() {
834                    let conflict_body = &input[conflict_start_index + conflict_start_len..pos];
835                    let hunk = parse_conflict_hunk(conflict_body, expected_marker_len);
836                    if hunk.num_sides() == num_sides {
837                        let resolved_slice = &input[resolved_start..conflict_start_index];
838                        if !resolved_slice.is_empty() {
839                            hunks.push(Merge::resolved(BString::from(resolved_slice)));
840                        }
841                        hunks.push(hunk);
842                        resolved_start = pos + line.len();
843                    }
844                }
845            }
846            _ => {}
847        }
848        pos += line.len();
849    }
850
851    if hunks.is_empty() {
852        None
853    } else {
854        if resolved_start < input.len() {
855            hunks.push(Merge::resolved(BString::from(&input[resolved_start..])));
856        }
857        Some(hunks)
858    }
859}
860
861/// This method handles parsing both JJ-style and Git-style conflict markers,
862/// meaning that switching conflict marker styles won't prevent existing files
863/// with other conflict marker styles from being parsed successfully. The
864/// conflict marker style to use for parsing is determined based on the first
865/// line of the hunk.
866fn parse_conflict_hunk(input: &[u8], expected_marker_len: usize) -> Merge<BString> {
867    // If the hunk starts with a conflict marker, find its first character
868    let initial_conflict_marker = input
869        .lines_with_terminator()
870        .next()
871        .and_then(|line| parse_conflict_marker(line, expected_marker_len));
872
873    match initial_conflict_marker {
874        // JJ-style conflicts must start with one of these 3 conflict marker lines
875        Some(
876            ConflictMarkerLineChar::Diff
877            | ConflictMarkerLineChar::Remove
878            | ConflictMarkerLineChar::Add,
879        ) => parse_jj_style_conflict_hunk(input, expected_marker_len),
880        // Git-style conflicts either must not start with a conflict marker line, or must start with
881        // the "|||||||" conflict marker line (if the first side was empty)
882        None | Some(ConflictMarkerLineChar::GitAncestor) => {
883            parse_git_style_conflict_hunk(input, expected_marker_len)
884        }
885        // No other conflict markers are allowed at the start of a hunk
886        Some(_) => Merge::resolved(BString::new(vec![])),
887    }
888}
889
890fn parse_jj_style_conflict_hunk(input: &[u8], expected_marker_len: usize) -> Merge<BString> {
891    enum State {
892        Diff,
893        Remove,
894        Add,
895        Unknown,
896    }
897    let mut state = State::Unknown;
898    let mut removes = vec![];
899    let mut adds = vec![];
900    for line in input.lines_with_terminator() {
901        match parse_conflict_marker(line, expected_marker_len) {
902            Some(ConflictMarkerLineChar::Diff) => {
903                state = State::Diff;
904                removes.push(BString::new(vec![]));
905                adds.push(BString::new(vec![]));
906                continue;
907            }
908            Some(ConflictMarkerLineChar::Remove) => {
909                state = State::Remove;
910                removes.push(BString::new(vec![]));
911                continue;
912            }
913            Some(ConflictMarkerLineChar::Add) => {
914                state = State::Add;
915                adds.push(BString::new(vec![]));
916                continue;
917            }
918            Some(ConflictMarkerLineChar::Note) => {
919                continue;
920            }
921            _ => {}
922        }
923        match state {
924            State::Diff => {
925                if let Some(rest) = line.strip_prefix(b"-") {
926                    removes.last_mut().unwrap().extend_from_slice(rest);
927                } else if let Some(rest) = line.strip_prefix(b"+") {
928                    adds.last_mut().unwrap().extend_from_slice(rest);
929                } else if let Some(rest) = line.strip_prefix(b" ") {
930                    removes.last_mut().unwrap().extend_from_slice(rest);
931                    adds.last_mut().unwrap().extend_from_slice(rest);
932                } else if line == b"\n" || line == b"\r\n" {
933                    // Some editors strip trailing whitespace, so " \n" might become "\n". It would
934                    // be unfortunate if this prevented the conflict from being parsed, so we add
935                    // the empty line to the "remove" and "add" as if there was a space in front
936                    removes.last_mut().unwrap().extend_from_slice(line);
937                    adds.last_mut().unwrap().extend_from_slice(line);
938                } else {
939                    // Doesn't look like a valid conflict
940                    return Merge::resolved(BString::new(vec![]));
941                }
942            }
943            State::Remove => {
944                removes.last_mut().unwrap().extend_from_slice(line);
945            }
946            State::Add => {
947                adds.last_mut().unwrap().extend_from_slice(line);
948            }
949            State::Unknown => {
950                // Doesn't look like a valid conflict
951                return Merge::resolved(BString::new(vec![]));
952            }
953        }
954    }
955
956    if adds.len() == removes.len() + 1 {
957        Merge::from_removes_adds(removes, adds)
958    } else {
959        // Doesn't look like a valid conflict
960        Merge::resolved(BString::new(vec![]))
961    }
962}
963
964fn parse_git_style_conflict_hunk(input: &[u8], expected_marker_len: usize) -> Merge<BString> {
965    #[derive(PartialEq, Eq)]
966    enum State {
967        Left,
968        Base,
969        Right,
970    }
971    let mut state = State::Left;
972    let mut left = BString::new(vec![]);
973    let mut base = BString::new(vec![]);
974    let mut right = BString::new(vec![]);
975    for line in input.lines_with_terminator() {
976        match parse_conflict_marker(line, expected_marker_len) {
977            Some(ConflictMarkerLineChar::GitAncestor) => {
978                if state == State::Left {
979                    state = State::Base;
980                    continue;
981                } else {
982                    // Base must come after left
983                    return Merge::resolved(BString::new(vec![]));
984                }
985            }
986            Some(ConflictMarkerLineChar::GitSeparator) => {
987                if state == State::Base {
988                    state = State::Right;
989                    continue;
990                } else {
991                    // Right must come after base
992                    return Merge::resolved(BString::new(vec![]));
993                }
994            }
995            _ => {}
996        }
997        match state {
998            State::Left => left.extend_from_slice(line),
999            State::Base => base.extend_from_slice(line),
1000            State::Right => right.extend_from_slice(line),
1001        }
1002    }
1003
1004    if state == State::Right {
1005        Merge::from_vec(vec![left, base, right])
1006    } else {
1007        // Doesn't look like a valid conflict
1008        Merge::resolved(BString::new(vec![]))
1009    }
1010}
1011
1012/// Parses conflict markers in `content` and returns an updated version of
1013/// `file_ids` with the new contents. If no (valid) conflict markers remain, a
1014/// single resolves `FileId` will be returned.
1015pub async fn update_from_content(
1016    file_ids: &Merge<Option<FileId>>,
1017    store: &Store,
1018    path: &RepoPath,
1019    content: &[u8],
1020    conflict_marker_len: usize,
1021) -> BackendResult<Merge<Option<FileId>>> {
1022    let simplified_file_ids = file_ids.simplify();
1023
1024    let old_contents = extract_as_single_hunk(&simplified_file_ids, store, path).await?;
1025    let old_hunks = files::merge_hunks(&old_contents, store.merge_options());
1026
1027    // Parse conflicts from the new content using the arity of the simplified
1028    // conflicts.
1029    let mut new_hunks = parse_conflict(
1030        content,
1031        simplified_file_ids.num_sides(),
1032        conflict_marker_len,
1033    );
1034
1035    // If there is a conflict at the end of the file and a term ends with a newline,
1036    // check whether the original term ended with a newline. If it didn't, then
1037    // remove the newline since it was added automatically when materializing.
1038    if let Some(last_hunk) = new_hunks
1039        .as_mut()
1040        .and_then(|hunks| hunks.last_mut())
1041        .filter(|hunk| !hunk.is_resolved())
1042    {
1043        for (original_content, term) in itertools::zip_eq(&old_contents, last_hunk) {
1044            if term.last() == Some(&b'\n') && has_no_eol(original_content) {
1045                term.pop();
1046            }
1047        }
1048    }
1049
1050    // Check if the new hunks are unchanged. This makes sure that unchanged file
1051    // conflicts aren't updated to partially-resolved contents.
1052    let unchanged = match (&old_hunks, &new_hunks) {
1053        (MergeResult::Resolved(old), None) => old == content,
1054        (MergeResult::Conflict(old), Some(new)) => old == new,
1055        (MergeResult::Resolved(_), Some(_)) | (MergeResult::Conflict(_), None) => false,
1056    };
1057    if unchanged {
1058        return Ok(file_ids.clone());
1059    }
1060
1061    let Some(hunks) = new_hunks else {
1062        // Either there are no markers or they don't have the expected arity
1063        let file_id = store.write_file(path, &mut &content[..]).await?;
1064        return Ok(Merge::normal(file_id));
1065    };
1066
1067    let mut contents = simplified_file_ids.map(|_| vec![]);
1068    for hunk in hunks {
1069        if let Some(slice) = hunk.as_resolved() {
1070            for content in &mut contents {
1071                content.extend_from_slice(slice);
1072            }
1073        } else {
1074            for (content, slice) in zip(&mut contents, hunk) {
1075                content.extend(Vec::from(slice));
1076            }
1077        }
1078    }
1079
1080    // Now write the new files contents we found by parsing the file with conflict
1081    // markers.
1082    // TODO: Write these concurrently
1083    let new_file_ids: Vec<Option<FileId>> = zip(&contents, &simplified_file_ids)
1084        .map(|(content, file_id)| -> BackendResult<Option<FileId>> {
1085            if file_id.is_some() || !content.is_empty() {
1086                let file_id = store.write_file(path, &mut content.as_slice()).block_on()?;
1087                Ok(Some(file_id))
1088            } else {
1089                // The missing side of a conflict is still represented by
1090                // the empty string we materialized it as
1091                Ok(None)
1092            }
1093        })
1094        .try_collect()?;
1095
1096    // If the conflict was simplified, expand the conflict to the original
1097    // number of sides.
1098    let new_file_ids = if new_file_ids.len() != file_ids.iter().len() {
1099        file_ids
1100            .clone()
1101            .update_from_simplified(Merge::from_vec(new_file_ids))
1102    } else {
1103        Merge::from_vec(new_file_ids)
1104    };
1105    Ok(new_file_ids)
1106}
1107
1108#[cfg(test)]
1109mod tests {
1110    use super::*;
1111
1112    #[test]
1113    fn test_resolve_file_executable() {
1114        fn resolve<const N: usize>(values: [Option<bool>; N]) -> Option<bool> {
1115            resolve_file_executable(&Merge::from_vec(values.to_vec()))
1116        }
1117
1118        // already resolved
1119        assert_eq!(resolve([None]), None);
1120        assert_eq!(resolve([Some(false)]), Some(false));
1121        assert_eq!(resolve([Some(true)]), Some(true));
1122
1123        // trivially resolved
1124        assert_eq!(resolve([Some(true), Some(true), Some(true)]), Some(true));
1125        assert_eq!(resolve([Some(true), Some(false), Some(false)]), Some(true));
1126        assert_eq!(resolve([Some(false), Some(true), Some(false)]), Some(false));
1127        assert_eq!(resolve([None, None, Some(true)]), Some(true));
1128
1129        // unresolvable
1130        assert_eq!(resolve([Some(false), Some(true), None]), None);
1131
1132        // trivially resolved to absent, so pick the original state
1133        assert_eq!(resolve([Some(true), Some(true), None]), Some(true));
1134        assert_eq!(resolve([None, Some(false), Some(false)]), Some(false));
1135        assert_eq!(
1136            resolve([None, None, Some(true), Some(true), None]),
1137            Some(true)
1138        );
1139
1140        // trivially resolved to absent, and the original state is ambiguous
1141        assert_eq!(
1142            resolve([Some(true), Some(true), None, Some(false), Some(false)]),
1143            None
1144        );
1145        assert_eq!(
1146            resolve([
1147                None,
1148                Some(true),
1149                Some(true),
1150                Some(false),
1151                Some(false),
1152                Some(false),
1153                Some(false),
1154            ]),
1155            None
1156        );
1157    }
1158}