jj_lib/
conflicts.rs

1// Copyright 2020 The Jujutsu Authors
2//
3// Licensed under the Apache License, Version 2.0 (the "License");
4// you may not use this file except in compliance with the License.
5// You may obtain a copy of the License at
6//
7// https://www.apache.org/licenses/LICENSE-2.0
8//
9// Unless required by applicable law or agreed to in writing, software
10// distributed under the License is distributed on an "AS IS" BASIS,
11// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12// See the License for the specific language governing permissions and
13// limitations under the License.
14
15#![expect(missing_docs)]
16
17use std::io;
18use std::io::Write;
19use std::iter::zip;
20use std::pin::Pin;
21
22use bstr::BString;
23use bstr::ByteSlice as _;
24use futures::Stream;
25use futures::StreamExt as _;
26use futures::stream::BoxStream;
27use futures::try_join;
28use itertools::Itertools as _;
29use pollster::FutureExt as _;
30use tokio::io::AsyncRead;
31use tokio::io::AsyncReadExt as _;
32
33use crate::backend::BackendError;
34use crate::backend::BackendResult;
35use crate::backend::CommitId;
36use crate::backend::CopyId;
37use crate::backend::FileId;
38use crate::backend::SymlinkId;
39use crate::backend::TreeId;
40use crate::backend::TreeValue;
41use crate::copies::CopiesTreeDiffEntry;
42use crate::copies::CopiesTreeDiffEntryPath;
43use crate::diff::ContentDiff;
44use crate::diff::DiffHunk;
45use crate::diff::DiffHunkKind;
46use crate::files;
47use crate::files::MergeResult;
48use crate::merge::Merge;
49use crate::merge::MergedTreeValue;
50use crate::merge::SameChange;
51use crate::repo_path::RepoPath;
52use crate::store::Store;
53use crate::tree_merge::MergeOptions;
54
55/// Minimum length of conflict markers.
56pub const MIN_CONFLICT_MARKER_LEN: usize = 7;
57
58/// If a file already contains lines which look like conflict markers of length
59/// N, then the conflict markers we add will be of length (N + increment). This
60/// number is chosen to make the conflict markers noticeably longer than the
61/// existing markers.
62const CONFLICT_MARKER_LEN_INCREMENT: usize = 4;
63
64/// Comment for missing terminating newline in a term of a conflict.
65const NO_EOL_COMMENT: &str = " (no terminating newline)";
66
67/// Comment for missing terminating newline in the "add" side of a diff.
68const ADD_NO_EOL_COMMENT: &str = " (removes terminating newline)";
69
70/// Comment for missing terminating newline in the "remove" side of a diff.
71const REMOVE_NO_EOL_COMMENT: &str = " (adds terminating newline)";
72
73fn write_diff_hunks(hunks: &[DiffHunk], file: &mut dyn Write) -> io::Result<()> {
74    for hunk in hunks {
75        match hunk.kind {
76            DiffHunkKind::Matching => {
77                debug_assert!(hunk.contents.iter().all_equal());
78                for line in hunk.contents[0].lines_with_terminator() {
79                    file.write_all(b" ")?;
80                    write_and_ensure_newline(file, line)?;
81                }
82            }
83            DiffHunkKind::Different => {
84                for line in hunk.contents[0].lines_with_terminator() {
85                    file.write_all(b"-")?;
86                    write_and_ensure_newline(file, line)?;
87                }
88                for line in hunk.contents[1].lines_with_terminator() {
89                    file.write_all(b"+")?;
90                    write_and_ensure_newline(file, line)?;
91                }
92            }
93        }
94    }
95    Ok(())
96}
97
98async fn get_file_contents(
99    store: &Store,
100    path: &RepoPath,
101    term: &Option<FileId>,
102) -> BackendResult<BString> {
103    match term {
104        Some(id) => {
105            let mut reader = store.read_file(path, id).await?;
106            let mut content = vec![];
107            reader
108                .read_to_end(&mut content)
109                .await
110                .map_err(|err| BackendError::ReadFile {
111                    path: path.to_owned(),
112                    id: id.clone(),
113                    source: err.into(),
114                })?;
115            Ok(BString::new(content))
116        }
117        // If the conflict had removed the file on one side, we pretend that the file
118        // was empty there.
119        None => Ok(BString::new(vec![])),
120    }
121}
122
123pub async fn extract_as_single_hunk(
124    merge: &Merge<Option<FileId>>,
125    store: &Store,
126    path: &RepoPath,
127) -> BackendResult<Merge<BString>> {
128    merge
129        .try_map_async(|term| get_file_contents(store, path, term))
130        .await
131}
132
133/// A type similar to `MergedTreeValue` but with associated data to include in
134/// e.g. the working copy or in a diff.
135pub enum MaterializedTreeValue {
136    Absent,
137    AccessDenied(Box<dyn std::error::Error + Send + Sync>),
138    File(MaterializedFileValue),
139    Symlink { id: SymlinkId, target: String },
140    FileConflict(MaterializedFileConflictValue),
141    OtherConflict { id: MergedTreeValue },
142    GitSubmodule(CommitId),
143    Tree(TreeId),
144}
145
146impl MaterializedTreeValue {
147    pub fn is_absent(&self) -> bool {
148        matches!(self, Self::Absent)
149    }
150
151    pub fn is_present(&self) -> bool {
152        !self.is_absent()
153    }
154}
155
156/// [`TreeValue::File`] with file content `reader`.
157pub struct MaterializedFileValue {
158    pub id: FileId,
159    pub executable: bool,
160    pub copy_id: CopyId,
161    pub reader: Pin<Box<dyn AsyncRead + Send>>,
162}
163
164impl MaterializedFileValue {
165    /// Reads file content until EOF. The provided `path` is used only for error
166    /// reporting purpose.
167    pub async fn read_all(&mut self, path: &RepoPath) -> BackendResult<Vec<u8>> {
168        let mut buf = Vec::new();
169        self.reader
170            .read_to_end(&mut buf)
171            .await
172            .map_err(|err| BackendError::ReadFile {
173                path: path.to_owned(),
174                id: self.id.clone(),
175                source: err.into(),
176            })?;
177        Ok(buf)
178    }
179}
180
181/// Conflicted [`TreeValue::File`]s with file contents.
182pub struct MaterializedFileConflictValue {
183    /// File ids which preserve the shape of the tree conflict, to be used with
184    /// [`Merge::update_from_simplified()`].
185    pub unsimplified_ids: Merge<Option<FileId>>,
186    /// Simplified file ids, in which redundant id pairs are dropped.
187    pub ids: Merge<Option<FileId>>,
188    /// File contents corresponding to the simplified `ids`.
189    // TODO: or Vec<(FileId, Box<dyn Read>)> so that caller can stop reading
190    // when null bytes found?
191    pub contents: Merge<BString>,
192    /// Merged executable bit. `None` if there are changes in both executable
193    /// bit and file absence.
194    pub executable: Option<bool>,
195    /// Merged copy id. `None` if no single value could be determined.
196    pub copy_id: Option<CopyId>,
197}
198
199/// Reads the data associated with a `MergedTreeValue` so it can be written to
200/// e.g. the working copy or diff.
201pub async fn materialize_tree_value(
202    store: &Store,
203    path: &RepoPath,
204    value: MergedTreeValue,
205) -> BackendResult<MaterializedTreeValue> {
206    match materialize_tree_value_no_access_denied(store, path, value).await {
207        Err(BackendError::ReadAccessDenied { source, .. }) => {
208            Ok(MaterializedTreeValue::AccessDenied(source))
209        }
210        result => result,
211    }
212}
213
214async fn materialize_tree_value_no_access_denied(
215    store: &Store,
216    path: &RepoPath,
217    value: MergedTreeValue,
218) -> BackendResult<MaterializedTreeValue> {
219    match value.into_resolved() {
220        Ok(None) => Ok(MaterializedTreeValue::Absent),
221        Ok(Some(TreeValue::File {
222            id,
223            executable,
224            copy_id,
225        })) => {
226            let reader = store.read_file(path, &id).await?;
227            Ok(MaterializedTreeValue::File(MaterializedFileValue {
228                id,
229                executable,
230                copy_id,
231                reader,
232            }))
233        }
234        Ok(Some(TreeValue::Symlink(id))) => {
235            let target = store.read_symlink(path, &id).await?;
236            Ok(MaterializedTreeValue::Symlink { id, target })
237        }
238        Ok(Some(TreeValue::GitSubmodule(id))) => Ok(MaterializedTreeValue::GitSubmodule(id)),
239        Ok(Some(TreeValue::Tree(id))) => Ok(MaterializedTreeValue::Tree(id)),
240        Err(conflict) => match try_materialize_file_conflict_value(store, path, &conflict).await? {
241            Some(file) => Ok(MaterializedTreeValue::FileConflict(file)),
242            None => Ok(MaterializedTreeValue::OtherConflict { id: conflict }),
243        },
244    }
245}
246
247/// Suppose `conflict` contains only files or absent entries, reads the file
248/// contents.
249pub async fn try_materialize_file_conflict_value(
250    store: &Store,
251    path: &RepoPath,
252    conflict: &MergedTreeValue,
253) -> BackendResult<Option<MaterializedFileConflictValue>> {
254    let (Some(unsimplified_ids), Some(executable_bits)) =
255        (conflict.to_file_merge(), conflict.to_executable_merge())
256    else {
257        return Ok(None);
258    };
259    let ids = unsimplified_ids.simplify();
260    let contents = extract_as_single_hunk(&ids, store, path).await?;
261    let executable = resolve_file_executable(&executable_bits);
262    Ok(Some(MaterializedFileConflictValue {
263        unsimplified_ids,
264        ids,
265        contents,
266        executable,
267        copy_id: Some(CopyId::placeholder()),
268    }))
269}
270
271/// Resolves conflicts in file executable bit, returns the original state if the
272/// file is deleted and executable bit is unchanged.
273pub fn resolve_file_executable(merge: &Merge<Option<bool>>) -> Option<bool> {
274    let resolved = merge.resolve_trivial(SameChange::Accept).copied()?;
275    if resolved.is_some() {
276        resolved
277    } else {
278        // If the merge is resolved to None (absent), there should be the same
279        // number of Some(true) and Some(false). Pick the old state if
280        // unambiguous, so the new file inherits the original executable bit.
281        merge.removes().flatten().copied().all_equal_value().ok()
282    }
283}
284
285/// Describes what style should be used when materializing conflicts.
286#[derive(Clone, Copy, PartialEq, Eq, Debug, serde::Deserialize)]
287#[serde(rename_all = "kebab-case")]
288pub enum ConflictMarkerStyle {
289    /// Style which shows a snapshot and a series of diffs to apply.
290    Diff,
291    /// Similar to "diff", but always picks the first side as the snapshot. May
292    /// become the default in a future version.
293    DiffExperimental,
294    /// Style which shows a snapshot for each base and side.
295    Snapshot,
296    /// Style which replicates Git's "diff3" style to support external tools.
297    Git,
298}
299
300impl ConflictMarkerStyle {
301    /// Returns true if this style allows `%%%%%%%` conflict markers.
302    pub fn allows_diff(&self) -> bool {
303        matches!(self, Self::Diff | Self::DiffExperimental)
304    }
305}
306
307/// Options for conflict materialization.
308#[derive(Clone, Debug)]
309pub struct ConflictMaterializeOptions {
310    pub marker_style: ConflictMarkerStyle,
311    pub marker_len: Option<usize>,
312    pub merge: MergeOptions,
313}
314
315/// Characters which can be repeated to form a conflict marker line when
316/// materializing and parsing conflicts.
317#[derive(Clone, Copy, PartialEq, Eq)]
318#[repr(u8)]
319enum ConflictMarkerLineChar {
320    ConflictStart = b'<',
321    ConflictEnd = b'>',
322    Add = b'+',
323    Remove = b'-',
324    Diff = b'%',
325    GitAncestor = b'|',
326    GitSeparator = b'=',
327}
328
329impl ConflictMarkerLineChar {
330    /// Get the ASCII byte used for this conflict marker.
331    fn to_byte(self) -> u8 {
332        self as u8
333    }
334
335    /// Parse a byte to see if it corresponds with any kind of conflict marker.
336    fn parse_byte(byte: u8) -> Option<Self> {
337        match byte {
338            b'<' => Some(Self::ConflictStart),
339            b'>' => Some(Self::ConflictEnd),
340            b'+' => Some(Self::Add),
341            b'-' => Some(Self::Remove),
342            b'%' => Some(Self::Diff),
343            b'|' => Some(Self::GitAncestor),
344            b'=' => Some(Self::GitSeparator),
345            _ => None,
346        }
347    }
348}
349
350/// Represents a conflict marker line parsed from the file. Conflict marker
351/// lines consist of a single ASCII character repeated for a certain length.
352struct ConflictMarkerLine {
353    kind: ConflictMarkerLineChar,
354    len: usize,
355}
356
357/// Write a conflict marker to an output file.
358fn write_conflict_marker(
359    output: &mut dyn Write,
360    kind: ConflictMarkerLineChar,
361    len: usize,
362    suffix_text: &str,
363) -> io::Result<()> {
364    let conflict_marker = BString::new(vec![kind.to_byte(); len]);
365
366    if suffix_text.is_empty() {
367        writeln!(output, "{conflict_marker}")
368    } else {
369        writeln!(output, "{conflict_marker} {suffix_text}")
370    }
371}
372
373/// Parse a conflict marker from a line of a file. The conflict marker may have
374/// any length (even less than MIN_CONFLICT_MARKER_LEN).
375fn parse_conflict_marker_any_len(line: &[u8]) -> Option<ConflictMarkerLine> {
376    let first_byte = *line.first()?;
377    let kind = ConflictMarkerLineChar::parse_byte(first_byte)?;
378    let len = line.iter().take_while(|&&b| b == first_byte).count();
379
380    if let Some(next_byte) = line.get(len) {
381        // If there is a character after the marker, it must be ASCII whitespace
382        if !next_byte.is_ascii_whitespace() {
383            return None;
384        }
385    }
386
387    Some(ConflictMarkerLine { kind, len })
388}
389
390/// Parse a conflict marker, expecting it to be at least a certain length. Any
391/// shorter conflict markers are ignored.
392fn parse_conflict_marker(line: &[u8], expected_len: usize) -> Option<ConflictMarkerLineChar> {
393    parse_conflict_marker_any_len(line)
394        .filter(|marker| marker.len >= expected_len)
395        .map(|marker| marker.kind)
396}
397
398/// Given a Merge of files, choose the conflict marker length to use when
399/// materializing conflicts.
400pub fn choose_materialized_conflict_marker_len<T: AsRef<[u8]>>(single_hunk: &Merge<T>) -> usize {
401    let max_existing_marker_len = single_hunk
402        .iter()
403        .flat_map(|file| file.as_ref().lines_with_terminator())
404        .filter_map(parse_conflict_marker_any_len)
405        .map(|marker| marker.len)
406        .max()
407        .unwrap_or_default();
408
409    max_existing_marker_len
410        .saturating_add(CONFLICT_MARKER_LEN_INCREMENT)
411        .max(MIN_CONFLICT_MARKER_LEN)
412}
413
414pub fn materialize_merge_result<T: AsRef<[u8]>>(
415    single_hunk: &Merge<T>,
416    output: &mut dyn Write,
417    options: &ConflictMaterializeOptions,
418) -> io::Result<()> {
419    let merge_result = files::merge_hunks(single_hunk, &options.merge);
420    match &merge_result {
421        MergeResult::Resolved(content) => output.write_all(content),
422        MergeResult::Conflict(hunks) => {
423            let marker_len = options
424                .marker_len
425                .unwrap_or_else(|| choose_materialized_conflict_marker_len(single_hunk));
426            materialize_conflict_hunks(hunks, options.marker_style, marker_len, output)
427        }
428    }
429}
430
431pub fn materialize_merge_result_to_bytes<T: AsRef<[u8]>>(
432    single_hunk: &Merge<T>,
433    options: &ConflictMaterializeOptions,
434) -> BString {
435    let merge_result = files::merge_hunks(single_hunk, &options.merge);
436    match merge_result {
437        MergeResult::Resolved(content) => content,
438        MergeResult::Conflict(hunks) => {
439            let marker_len = options
440                .marker_len
441                .unwrap_or_else(|| choose_materialized_conflict_marker_len(single_hunk));
442            let mut output = Vec::new();
443            materialize_conflict_hunks(&hunks, options.marker_style, marker_len, &mut output)
444                .expect("writing to an in-memory buffer should never fail");
445            output.into()
446        }
447    }
448}
449
450fn materialize_conflict_hunks(
451    hunks: &[Merge<BString>],
452    conflict_marker_style: ConflictMarkerStyle,
453    conflict_marker_len: usize,
454    output: &mut dyn Write,
455) -> io::Result<()> {
456    let num_conflicts = hunks
457        .iter()
458        .filter(|hunk| hunk.as_resolved().is_none())
459        .count();
460    let mut conflict_index = 0;
461    for hunk in hunks {
462        if let Some(content) = hunk.as_resolved() {
463            output.write_all(content)?;
464        } else {
465            conflict_index += 1;
466            let conflict_info = format!("Conflict {conflict_index} of {num_conflicts}");
467
468            match (conflict_marker_style, hunk.as_slice()) {
469                // 2-sided conflicts can use Git-style conflict markers
470                (ConflictMarkerStyle::Git, [left, base, right]) => {
471                    materialize_git_style_conflict(
472                        left,
473                        base,
474                        right,
475                        &conflict_info,
476                        conflict_marker_len,
477                        output,
478                    )?;
479                }
480                _ => {
481                    materialize_jj_style_conflict(
482                        hunk,
483                        &conflict_info,
484                        conflict_marker_style,
485                        conflict_marker_len,
486                        output,
487                    )?;
488                }
489            }
490        }
491    }
492    Ok(())
493}
494
495fn materialize_git_style_conflict(
496    left: &[u8],
497    base: &[u8],
498    right: &[u8],
499    conflict_info: &str,
500    conflict_marker_len: usize,
501    output: &mut dyn Write,
502) -> io::Result<()> {
503    write_conflict_marker(
504        output,
505        ConflictMarkerLineChar::ConflictStart,
506        conflict_marker_len,
507        &format!("Side #1 ({conflict_info})"),
508    )?;
509    write_and_ensure_newline(output, left)?;
510
511    write_conflict_marker(
512        output,
513        ConflictMarkerLineChar::GitAncestor,
514        conflict_marker_len,
515        "Base",
516    )?;
517    write_and_ensure_newline(output, base)?;
518
519    // VS Code doesn't seem to support any trailing text on the separator line
520    write_conflict_marker(
521        output,
522        ConflictMarkerLineChar::GitSeparator,
523        conflict_marker_len,
524        "",
525    )?;
526
527    write_and_ensure_newline(output, right)?;
528    write_conflict_marker(
529        output,
530        ConflictMarkerLineChar::ConflictEnd,
531        conflict_marker_len,
532        &format!("Side #2 ({conflict_info} ends)"),
533    )?;
534
535    Ok(())
536}
537
538fn materialize_jj_style_conflict(
539    hunk: &Merge<BString>,
540    conflict_info: &str,
541    conflict_marker_style: ConflictMarkerStyle,
542    conflict_marker_len: usize,
543    output: &mut dyn Write,
544) -> io::Result<()> {
545    // Write a positive snapshot (side) of a conflict
546    let write_side = |add_index: usize, data: &[u8], output: &mut dyn Write| {
547        write_conflict_marker(
548            output,
549            ConflictMarkerLineChar::Add,
550            conflict_marker_len,
551            &format!(
552                "Contents of side #{}{}",
553                add_index + 1,
554                maybe_no_eol_comment(data)
555            ),
556        )?;
557        write_and_ensure_newline(output, data)
558    };
559
560    // Write a negative snapshot (base) of a conflict
561    let write_base = |base_str: &str, data: &[u8], output: &mut dyn Write| {
562        write_conflict_marker(
563            output,
564            ConflictMarkerLineChar::Remove,
565            conflict_marker_len,
566            &format!("Contents of {base_str}{}", maybe_no_eol_comment(data)),
567        )?;
568        write_and_ensure_newline(output, data)
569    };
570
571    // Write a diff from a negative term to a positive term
572    let write_diff =
573        |base_str: &str, add_index: usize, diff: &[DiffHunk], output: &mut dyn Write| {
574            let no_eol_remove = diff
575                .last()
576                .is_some_and(|diff_hunk| has_no_eol(diff_hunk.contents[0]));
577            let no_eol_add = diff
578                .last()
579                .is_some_and(|diff_hunk| has_no_eol(diff_hunk.contents[1]));
580            let no_eol_comment = match (no_eol_remove, no_eol_add) {
581                (true, true) => NO_EOL_COMMENT,
582                (true, _) => REMOVE_NO_EOL_COMMENT,
583                (_, true) => ADD_NO_EOL_COMMENT,
584                _ => "",
585            };
586            write_conflict_marker(
587                output,
588                ConflictMarkerLineChar::Diff,
589                conflict_marker_len,
590                &format!(
591                    "Changes from {base_str} to side #{}{no_eol_comment}",
592                    add_index + 1
593                ),
594            )?;
595            write_diff_hunks(diff, output)
596        };
597
598    write_conflict_marker(
599        output,
600        ConflictMarkerLineChar::ConflictStart,
601        conflict_marker_len,
602        conflict_info,
603    )?;
604    let mut snapshot_written = false;
605    // The only conflict marker style which can start with a diff is "diff".
606    if conflict_marker_style != ConflictMarkerStyle::Diff {
607        write_side(0, hunk.first(), output)?;
608        snapshot_written = true;
609    }
610    for (base_index, left) in hunk.removes().enumerate() {
611        let add_index = if snapshot_written {
612            base_index + 1
613        } else {
614            base_index
615        };
616
617        // The vast majority of conflicts one actually tries to resolve manually have 1
618        // base.
619        let base_str = if hunk.removes().len() == 1 {
620            "base".to_string()
621        } else {
622            format!("base #{}", base_index + 1)
623        };
624
625        let right1 = hunk.get_add(add_index).unwrap();
626
627        // Write the base and side separately if the conflict marker style doesn't
628        // support diffs.
629        if !conflict_marker_style.allows_diff() {
630            write_base(&base_str, left, output)?;
631            write_side(add_index, right1, output)?;
632            continue;
633        }
634
635        let diff1 = ContentDiff::by_line([&left, &right1]).hunks().collect_vec();
636        // If we haven't written a snapshot yet, then we need to decide whether to
637        // format the current side as a snapshot or a diff. We write the current side as
638        // a diff unless the next side has a smaller diff compared to the current base.
639        if !snapshot_written {
640            let right2 = hunk.get_add(add_index + 1).unwrap();
641            let diff2 = ContentDiff::by_line([&left, &right2]).hunks().collect_vec();
642            if diff_size(&diff2) < diff_size(&diff1) {
643                // If the next positive term is a better match, emit the current positive term
644                // as a snapshot and the next positive term as a diff.
645                write_side(add_index, right1, output)?;
646                write_diff(&base_str, add_index + 1, &diff2, output)?;
647                snapshot_written = true;
648                continue;
649            }
650        }
651
652        write_diff(&base_str, add_index, &diff1, output)?;
653    }
654
655    // If we still didn't emit a snapshot, the last side is the snapshot.
656    if !snapshot_written {
657        let add_index = hunk.num_sides() - 1;
658        write_side(add_index, hunk.get_add(add_index).unwrap(), output)?;
659    }
660    write_conflict_marker(
661        output,
662        ConflictMarkerLineChar::ConflictEnd,
663        conflict_marker_len,
664        &format!("{conflict_info} ends"),
665    )?;
666    Ok(())
667}
668
669fn maybe_no_eol_comment(slice: &[u8]) -> &'static str {
670    if has_no_eol(slice) {
671        NO_EOL_COMMENT
672    } else {
673        ""
674    }
675}
676
677// Write a chunk of data, ensuring that it doesn't end with a line which is
678// missing its terminating newline.
679fn write_and_ensure_newline(output: &mut dyn Write, data: &[u8]) -> io::Result<()> {
680    output.write_all(data)?;
681    if has_no_eol(data) {
682        writeln!(output)?;
683    }
684    Ok(())
685}
686
687// Check whether a slice is missing its terminating newline character.
688fn has_no_eol(slice: &[u8]) -> bool {
689    slice.last().is_some_and(|&last| last != b'\n')
690}
691
692fn diff_size(hunks: &[DiffHunk]) -> usize {
693    hunks
694        .iter()
695        .map(|hunk| match hunk.kind {
696            DiffHunkKind::Matching => 0,
697            DiffHunkKind::Different => hunk.contents.iter().map(|content| content.len()).sum(),
698        })
699        .sum()
700}
701
702pub struct MaterializedTreeDiffEntry {
703    pub path: CopiesTreeDiffEntryPath,
704    pub values: BackendResult<(MaterializedTreeValue, MaterializedTreeValue)>,
705}
706
707pub fn materialized_diff_stream(
708    store: &Store,
709    tree_diff: BoxStream<'_, CopiesTreeDiffEntry>,
710) -> impl Stream<Item = MaterializedTreeDiffEntry> {
711    tree_diff
712        .map(async |CopiesTreeDiffEntry { path, values }| match values {
713            Err(err) => MaterializedTreeDiffEntry {
714                path,
715                values: Err(err),
716            },
717            Ok(values) => {
718                let before_future = materialize_tree_value(store, path.source(), values.before);
719                let after_future = materialize_tree_value(store, path.target(), values.after);
720                let values = try_join!(before_future, after_future);
721                MaterializedTreeDiffEntry { path, values }
722            }
723        })
724        .buffered((store.concurrency() / 2).max(1))
725}
726
727/// Parses conflict markers from a slice.
728///
729/// Returns `None` if there were no valid conflict markers. The caller
730/// has to provide the expected number of merge sides (adds). Conflict
731/// markers that are otherwise valid will be considered invalid if
732/// they don't have the expected arity.
733///
734/// All conflict markers in the file must be at least as long as the expected
735/// length. Any shorter conflict markers will be ignored.
736// TODO: "parse" is not usually the opposite of "materialize", so maybe we
737// should rename them to "serialize" and "deserialize"?
738pub fn parse_conflict(
739    input: &[u8],
740    num_sides: usize,
741    expected_marker_len: usize,
742) -> Option<Vec<Merge<BString>>> {
743    if input.is_empty() {
744        return None;
745    }
746    let mut hunks = vec![];
747    let mut pos = 0;
748    let mut resolved_start = 0;
749    let mut conflict_start = None;
750    let mut conflict_start_len = 0;
751    for line in input.lines_with_terminator() {
752        match parse_conflict_marker(line, expected_marker_len) {
753            Some(ConflictMarkerLineChar::ConflictStart) => {
754                conflict_start = Some(pos);
755                conflict_start_len = line.len();
756            }
757            Some(ConflictMarkerLineChar::ConflictEnd) => {
758                if let Some(conflict_start_index) = conflict_start.take() {
759                    let conflict_body = &input[conflict_start_index + conflict_start_len..pos];
760                    let hunk = parse_conflict_hunk(conflict_body, expected_marker_len);
761                    if hunk.num_sides() == num_sides {
762                        let resolved_slice = &input[resolved_start..conflict_start_index];
763                        if !resolved_slice.is_empty() {
764                            hunks.push(Merge::resolved(BString::from(resolved_slice)));
765                        }
766                        hunks.push(hunk);
767                        resolved_start = pos + line.len();
768                    }
769                }
770            }
771            _ => {}
772        }
773        pos += line.len();
774    }
775
776    if hunks.is_empty() {
777        None
778    } else {
779        if resolved_start < input.len() {
780            hunks.push(Merge::resolved(BString::from(&input[resolved_start..])));
781        }
782        Some(hunks)
783    }
784}
785
786/// This method handles parsing both JJ-style and Git-style conflict markers,
787/// meaning that switching conflict marker styles won't prevent existing files
788/// with other conflict marker styles from being parsed successfully. The
789/// conflict marker style to use for parsing is determined based on the first
790/// line of the hunk.
791fn parse_conflict_hunk(input: &[u8], expected_marker_len: usize) -> Merge<BString> {
792    // If the hunk starts with a conflict marker, find its first character
793    let initial_conflict_marker = input
794        .lines_with_terminator()
795        .next()
796        .and_then(|line| parse_conflict_marker(line, expected_marker_len));
797
798    match initial_conflict_marker {
799        // JJ-style conflicts must start with one of these 3 conflict marker lines
800        Some(
801            ConflictMarkerLineChar::Diff
802            | ConflictMarkerLineChar::Remove
803            | ConflictMarkerLineChar::Add,
804        ) => parse_jj_style_conflict_hunk(input, expected_marker_len),
805        // Git-style conflicts either must not start with a conflict marker line, or must start with
806        // the "|||||||" conflict marker line (if the first side was empty)
807        None | Some(ConflictMarkerLineChar::GitAncestor) => {
808            parse_git_style_conflict_hunk(input, expected_marker_len)
809        }
810        // No other conflict markers are allowed at the start of a hunk
811        Some(_) => Merge::resolved(BString::new(vec![])),
812    }
813}
814
815fn parse_jj_style_conflict_hunk(input: &[u8], expected_marker_len: usize) -> Merge<BString> {
816    enum State {
817        Diff,
818        Remove,
819        Add,
820        Unknown,
821    }
822    let mut state = State::Unknown;
823    let mut removes = vec![];
824    let mut adds = vec![];
825    for line in input.lines_with_terminator() {
826        match parse_conflict_marker(line, expected_marker_len) {
827            Some(ConflictMarkerLineChar::Diff) => {
828                state = State::Diff;
829                removes.push(BString::new(vec![]));
830                adds.push(BString::new(vec![]));
831                continue;
832            }
833            Some(ConflictMarkerLineChar::Remove) => {
834                state = State::Remove;
835                removes.push(BString::new(vec![]));
836                continue;
837            }
838            Some(ConflictMarkerLineChar::Add) => {
839                state = State::Add;
840                adds.push(BString::new(vec![]));
841                continue;
842            }
843            _ => {}
844        }
845        match state {
846            State::Diff => {
847                if let Some(rest) = line.strip_prefix(b"-") {
848                    removes.last_mut().unwrap().extend_from_slice(rest);
849                } else if let Some(rest) = line.strip_prefix(b"+") {
850                    adds.last_mut().unwrap().extend_from_slice(rest);
851                } else if let Some(rest) = line.strip_prefix(b" ") {
852                    removes.last_mut().unwrap().extend_from_slice(rest);
853                    adds.last_mut().unwrap().extend_from_slice(rest);
854                } else if line == b"\n" || line == b"\r\n" {
855                    // Some editors strip trailing whitespace, so " \n" might become "\n". It would
856                    // be unfortunate if this prevented the conflict from being parsed, so we add
857                    // the empty line to the "remove" and "add" as if there was a space in front
858                    removes.last_mut().unwrap().extend_from_slice(line);
859                    adds.last_mut().unwrap().extend_from_slice(line);
860                } else {
861                    // Doesn't look like a valid conflict
862                    return Merge::resolved(BString::new(vec![]));
863                }
864            }
865            State::Remove => {
866                removes.last_mut().unwrap().extend_from_slice(line);
867            }
868            State::Add => {
869                adds.last_mut().unwrap().extend_from_slice(line);
870            }
871            State::Unknown => {
872                // Doesn't look like a valid conflict
873                return Merge::resolved(BString::new(vec![]));
874            }
875        }
876    }
877
878    if adds.len() == removes.len() + 1 {
879        Merge::from_removes_adds(removes, adds)
880    } else {
881        // Doesn't look like a valid conflict
882        Merge::resolved(BString::new(vec![]))
883    }
884}
885
886fn parse_git_style_conflict_hunk(input: &[u8], expected_marker_len: usize) -> Merge<BString> {
887    #[derive(PartialEq, Eq)]
888    enum State {
889        Left,
890        Base,
891        Right,
892    }
893    let mut state = State::Left;
894    let mut left = BString::new(vec![]);
895    let mut base = BString::new(vec![]);
896    let mut right = BString::new(vec![]);
897    for line in input.lines_with_terminator() {
898        match parse_conflict_marker(line, expected_marker_len) {
899            Some(ConflictMarkerLineChar::GitAncestor) => {
900                if state == State::Left {
901                    state = State::Base;
902                    continue;
903                } else {
904                    // Base must come after left
905                    return Merge::resolved(BString::new(vec![]));
906                }
907            }
908            Some(ConflictMarkerLineChar::GitSeparator) => {
909                if state == State::Base {
910                    state = State::Right;
911                    continue;
912                } else {
913                    // Right must come after base
914                    return Merge::resolved(BString::new(vec![]));
915                }
916            }
917            _ => {}
918        }
919        match state {
920            State::Left => left.extend_from_slice(line),
921            State::Base => base.extend_from_slice(line),
922            State::Right => right.extend_from_slice(line),
923        }
924    }
925
926    if state == State::Right {
927        Merge::from_vec(vec![left, base, right])
928    } else {
929        // Doesn't look like a valid conflict
930        Merge::resolved(BString::new(vec![]))
931    }
932}
933
934/// Parses conflict markers in `content` and returns an updated version of
935/// `file_ids` with the new contents. If no (valid) conflict markers remain, a
936/// single resolves `FileId` will be returned.
937pub async fn update_from_content(
938    file_ids: &Merge<Option<FileId>>,
939    store: &Store,
940    path: &RepoPath,
941    content: &[u8],
942    conflict_marker_len: usize,
943) -> BackendResult<Merge<Option<FileId>>> {
944    let simplified_file_ids = file_ids.simplify();
945
946    let old_contents = extract_as_single_hunk(&simplified_file_ids, store, path).await?;
947    let old_hunks = files::merge_hunks(&old_contents, store.merge_options());
948
949    // Parse conflicts from the new content using the arity of the simplified
950    // conflicts.
951    let mut new_hunks = parse_conflict(
952        content,
953        simplified_file_ids.num_sides(),
954        conflict_marker_len,
955    );
956
957    // If there is a conflict at the end of the file and a term ends with a newline,
958    // check whether the original term ended with a newline. If it didn't, then
959    // remove the newline since it was added automatically when materializing.
960    if let Some(last_hunk) = new_hunks
961        .as_mut()
962        .and_then(|hunks| hunks.last_mut())
963        .filter(|hunk| !hunk.is_resolved())
964    {
965        for (original_content, term) in old_contents.iter().zip_eq(last_hunk.iter_mut()) {
966            if term.last() == Some(&b'\n') && has_no_eol(original_content) {
967                term.pop();
968            }
969        }
970    }
971
972    // Check if the new hunks are unchanged. This makes sure that unchanged file
973    // conflicts aren't updated to partially-resolved contents.
974    let unchanged = match (&old_hunks, &new_hunks) {
975        (MergeResult::Resolved(old), None) => old == content,
976        (MergeResult::Conflict(old), Some(new)) => old == new,
977        (MergeResult::Resolved(_), Some(_)) | (MergeResult::Conflict(_), None) => false,
978    };
979    if unchanged {
980        return Ok(file_ids.clone());
981    }
982
983    let Some(hunks) = new_hunks else {
984        // Either there are no markers or they don't have the expected arity
985        let file_id = store.write_file(path, &mut &content[..]).await?;
986        return Ok(Merge::normal(file_id));
987    };
988
989    let mut contents = simplified_file_ids.map(|_| vec![]);
990    for hunk in hunks {
991        if let Some(slice) = hunk.as_resolved() {
992            for content in contents.iter_mut() {
993                content.extend_from_slice(slice);
994            }
995        } else {
996            for (content, slice) in zip(contents.iter_mut(), hunk.into_iter()) {
997                content.extend(Vec::from(slice));
998            }
999        }
1000    }
1001
1002    // If the user edited the empty placeholder for an absent side, we consider the
1003    // conflict resolved.
1004    if zip(contents.iter(), simplified_file_ids.iter())
1005        .any(|(content, file_id)| file_id.is_none() && !content.is_empty())
1006    {
1007        let file_id = store.write_file(path, &mut &content[..]).await?;
1008        return Ok(Merge::normal(file_id));
1009    }
1010
1011    // Now write the new files contents we found by parsing the file with conflict
1012    // markers.
1013    // TODO: Write these concurrently
1014    let new_file_ids: Vec<Option<FileId>> = zip(contents.iter(), simplified_file_ids.iter())
1015        .map(|(content, file_id)| -> BackendResult<Option<FileId>> {
1016            match file_id {
1017                Some(_) => {
1018                    let file_id = store.write_file(path, &mut content.as_slice()).block_on()?;
1019                    Ok(Some(file_id))
1020                }
1021                None => {
1022                    // The missing side of a conflict is still represented by
1023                    // the empty string we materialized it as
1024                    Ok(None)
1025                }
1026            }
1027        })
1028        .try_collect()?;
1029
1030    // If the conflict was simplified, expand the conflict to the original
1031    // number of sides.
1032    let new_file_ids = if new_file_ids.len() != file_ids.iter().len() {
1033        file_ids
1034            .clone()
1035            .update_from_simplified(Merge::from_vec(new_file_ids))
1036    } else {
1037        Merge::from_vec(new_file_ids)
1038    };
1039    Ok(new_file_ids)
1040}
1041
1042#[cfg(test)]
1043mod tests {
1044    use super::*;
1045
1046    #[test]
1047    fn test_resolve_file_executable() {
1048        fn resolve<const N: usize>(values: [Option<bool>; N]) -> Option<bool> {
1049            resolve_file_executable(&Merge::from_vec(values.to_vec()))
1050        }
1051
1052        // already resolved
1053        assert_eq!(resolve([None]), None);
1054        assert_eq!(resolve([Some(false)]), Some(false));
1055        assert_eq!(resolve([Some(true)]), Some(true));
1056
1057        // trivially resolved
1058        assert_eq!(resolve([Some(true), Some(true), Some(true)]), Some(true));
1059        assert_eq!(resolve([Some(true), Some(false), Some(false)]), Some(true));
1060        assert_eq!(resolve([Some(false), Some(true), Some(false)]), Some(false));
1061        assert_eq!(resolve([None, None, Some(true)]), Some(true));
1062
1063        // unresolvable
1064        assert_eq!(resolve([Some(false), Some(true), None]), None);
1065
1066        // trivially resolved to absent, so pick the original state
1067        assert_eq!(resolve([Some(true), Some(true), None]), Some(true));
1068        assert_eq!(resolve([None, Some(false), Some(false)]), Some(false));
1069        assert_eq!(
1070            resolve([None, None, Some(true), Some(true), None]),
1071            Some(true)
1072        );
1073
1074        // trivially resolved to absent, and the original state is ambiguous
1075        assert_eq!(
1076            resolve([Some(true), Some(true), None, Some(false), Some(false)]),
1077            None
1078        );
1079        assert_eq!(
1080            resolve([
1081                None,
1082                Some(true),
1083                Some(true),
1084                Some(false),
1085                Some(false),
1086                Some(false),
1087                Some(false),
1088            ]),
1089            None
1090        );
1091    }
1092}