jj_lib/
conflicts.rs

1// Copyright 2020 The Jujutsu Authors
2//
3// Licensed under the Apache License, Version 2.0 (the "License");
4// you may not use this file except in compliance with the License.
5// You may obtain a copy of the License at
6//
7// https://www.apache.org/licenses/LICENSE-2.0
8//
9// Unless required by applicable law or agreed to in writing, software
10// distributed under the License is distributed on an "AS IS" BASIS,
11// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12// See the License for the specific language governing permissions and
13// limitations under the License.
14
15#![expect(missing_docs)]
16
17use std::io;
18use std::io::Write;
19use std::iter::zip;
20use std::pin::Pin;
21
22use bstr::BString;
23use bstr::ByteSlice as _;
24use futures::Stream;
25use futures::StreamExt as _;
26use futures::stream::BoxStream;
27use futures::try_join;
28use itertools::Itertools as _;
29use pollster::FutureExt as _;
30use tokio::io::AsyncRead;
31use tokio::io::AsyncReadExt as _;
32
33use crate::backend::BackendError;
34use crate::backend::BackendResult;
35use crate::backend::CommitId;
36use crate::backend::CopyId;
37use crate::backend::FileId;
38use crate::backend::SymlinkId;
39use crate::backend::TreeId;
40use crate::backend::TreeValue;
41use crate::copies::CopiesTreeDiffEntry;
42use crate::copies::CopiesTreeDiffEntryPath;
43use crate::diff::ContentDiff;
44use crate::diff::DiffHunk;
45use crate::diff::DiffHunkKind;
46use crate::files;
47use crate::files::MergeResult;
48use crate::merge::Diff;
49use crate::merge::Merge;
50use crate::merge::MergedTreeValue;
51use crate::merge::SameChange;
52use crate::repo_path::RepoPath;
53use crate::store::Store;
54use crate::tree_merge::MergeOptions;
55
56/// Minimum length of conflict markers.
57pub const MIN_CONFLICT_MARKER_LEN: usize = 7;
58
59/// If a file already contains lines which look like conflict markers of length
60/// N, then the conflict markers we add will be of length (N + increment). This
61/// number is chosen to make the conflict markers noticeably longer than the
62/// existing markers.
63const CONFLICT_MARKER_LEN_INCREMENT: usize = 4;
64
65/// Comment for missing terminating newline in a term of a conflict.
66const NO_EOL_COMMENT: &str = " (no terminating newline)";
67
68/// Comment for missing terminating newline in the "add" side of a diff.
69const ADD_NO_EOL_COMMENT: &str = " (removes terminating newline)";
70
71/// Comment for missing terminating newline in the "remove" side of a diff.
72const REMOVE_NO_EOL_COMMENT: &str = " (adds terminating newline)";
73
74fn write_diff_hunks(hunks: &[DiffHunk], file: &mut dyn Write) -> io::Result<()> {
75    for hunk in hunks {
76        match hunk.kind {
77            DiffHunkKind::Matching => {
78                debug_assert!(hunk.contents.iter().all_equal());
79                for line in hunk.contents[0].lines_with_terminator() {
80                    file.write_all(b" ")?;
81                    write_and_ensure_newline(file, line)?;
82                }
83            }
84            DiffHunkKind::Different => {
85                for line in hunk.contents[0].lines_with_terminator() {
86                    file.write_all(b"-")?;
87                    write_and_ensure_newline(file, line)?;
88                }
89                for line in hunk.contents[1].lines_with_terminator() {
90                    file.write_all(b"+")?;
91                    write_and_ensure_newline(file, line)?;
92                }
93            }
94        }
95    }
96    Ok(())
97}
98
99async fn get_file_contents(
100    store: &Store,
101    path: &RepoPath,
102    term: &Option<FileId>,
103) -> BackendResult<BString> {
104    match term {
105        Some(id) => {
106            let mut reader = store.read_file(path, id).await?;
107            let mut content = vec![];
108            reader
109                .read_to_end(&mut content)
110                .await
111                .map_err(|err| BackendError::ReadFile {
112                    path: path.to_owned(),
113                    id: id.clone(),
114                    source: err.into(),
115                })?;
116            Ok(BString::new(content))
117        }
118        // If the conflict had removed the file on one side, we pretend that the file
119        // was empty there.
120        None => Ok(BString::new(vec![])),
121    }
122}
123
124pub async fn extract_as_single_hunk(
125    merge: &Merge<Option<FileId>>,
126    store: &Store,
127    path: &RepoPath,
128) -> BackendResult<Merge<BString>> {
129    merge
130        .try_map_async(|term| get_file_contents(store, path, term))
131        .await
132}
133
134/// A type similar to `MergedTreeValue` but with associated data to include in
135/// e.g. the working copy or in a diff.
136pub enum MaterializedTreeValue {
137    Absent,
138    AccessDenied(Box<dyn std::error::Error + Send + Sync>),
139    File(MaterializedFileValue),
140    Symlink { id: SymlinkId, target: String },
141    FileConflict(MaterializedFileConflictValue),
142    OtherConflict { id: MergedTreeValue },
143    GitSubmodule(CommitId),
144    Tree(TreeId),
145}
146
147impl MaterializedTreeValue {
148    pub fn is_absent(&self) -> bool {
149        matches!(self, Self::Absent)
150    }
151
152    pub fn is_present(&self) -> bool {
153        !self.is_absent()
154    }
155}
156
157/// [`TreeValue::File`] with file content `reader`.
158pub struct MaterializedFileValue {
159    pub id: FileId,
160    pub executable: bool,
161    pub copy_id: CopyId,
162    pub reader: Pin<Box<dyn AsyncRead + Send>>,
163}
164
165impl MaterializedFileValue {
166    /// Reads file content until EOF. The provided `path` is used only for error
167    /// reporting purpose.
168    pub async fn read_all(&mut self, path: &RepoPath) -> BackendResult<Vec<u8>> {
169        let mut buf = Vec::new();
170        self.reader
171            .read_to_end(&mut buf)
172            .await
173            .map_err(|err| BackendError::ReadFile {
174                path: path.to_owned(),
175                id: self.id.clone(),
176                source: err.into(),
177            })?;
178        Ok(buf)
179    }
180}
181
182/// Conflicted [`TreeValue::File`]s with file contents.
183pub struct MaterializedFileConflictValue {
184    /// File ids which preserve the shape of the tree conflict, to be used with
185    /// [`Merge::update_from_simplified()`].
186    pub unsimplified_ids: Merge<Option<FileId>>,
187    /// Simplified file ids, in which redundant id pairs are dropped.
188    pub ids: Merge<Option<FileId>>,
189    /// File contents corresponding to the simplified `ids`.
190    // TODO: or Vec<(FileId, Box<dyn Read>)> so that caller can stop reading
191    // when null bytes found?
192    pub contents: Merge<BString>,
193    /// Merged executable bit. `None` if there are changes in both executable
194    /// bit and file absence.
195    pub executable: Option<bool>,
196    /// Merged copy id. `None` if no single value could be determined.
197    pub copy_id: Option<CopyId>,
198}
199
200/// Reads the data associated with a `MergedTreeValue` so it can be written to
201/// e.g. the working copy or diff.
202pub async fn materialize_tree_value(
203    store: &Store,
204    path: &RepoPath,
205    value: MergedTreeValue,
206) -> BackendResult<MaterializedTreeValue> {
207    match materialize_tree_value_no_access_denied(store, path, value).await {
208        Err(BackendError::ReadAccessDenied { source, .. }) => {
209            Ok(MaterializedTreeValue::AccessDenied(source))
210        }
211        result => result,
212    }
213}
214
215async fn materialize_tree_value_no_access_denied(
216    store: &Store,
217    path: &RepoPath,
218    value: MergedTreeValue,
219) -> BackendResult<MaterializedTreeValue> {
220    match value.into_resolved() {
221        Ok(None) => Ok(MaterializedTreeValue::Absent),
222        Ok(Some(TreeValue::File {
223            id,
224            executable,
225            copy_id,
226        })) => {
227            let reader = store.read_file(path, &id).await?;
228            Ok(MaterializedTreeValue::File(MaterializedFileValue {
229                id,
230                executable,
231                copy_id,
232                reader,
233            }))
234        }
235        Ok(Some(TreeValue::Symlink(id))) => {
236            let target = store.read_symlink(path, &id).await?;
237            Ok(MaterializedTreeValue::Symlink { id, target })
238        }
239        Ok(Some(TreeValue::GitSubmodule(id))) => Ok(MaterializedTreeValue::GitSubmodule(id)),
240        Ok(Some(TreeValue::Tree(id))) => Ok(MaterializedTreeValue::Tree(id)),
241        Err(conflict) => match try_materialize_file_conflict_value(store, path, &conflict).await? {
242            Some(file) => Ok(MaterializedTreeValue::FileConflict(file)),
243            None => Ok(MaterializedTreeValue::OtherConflict { id: conflict }),
244        },
245    }
246}
247
248/// Suppose `conflict` contains only files or absent entries, reads the file
249/// contents.
250pub async fn try_materialize_file_conflict_value(
251    store: &Store,
252    path: &RepoPath,
253    conflict: &MergedTreeValue,
254) -> BackendResult<Option<MaterializedFileConflictValue>> {
255    let (Some(unsimplified_ids), Some(executable_bits)) =
256        (conflict.to_file_merge(), conflict.to_executable_merge())
257    else {
258        return Ok(None);
259    };
260    let ids = unsimplified_ids.simplify();
261    let contents = extract_as_single_hunk(&ids, store, path).await?;
262    let executable = resolve_file_executable(&executable_bits);
263    Ok(Some(MaterializedFileConflictValue {
264        unsimplified_ids,
265        ids,
266        contents,
267        executable,
268        copy_id: Some(CopyId::placeholder()),
269    }))
270}
271
272/// Resolves conflicts in file executable bit, returns the original state if the
273/// file is deleted and executable bit is unchanged.
274pub fn resolve_file_executable(merge: &Merge<Option<bool>>) -> Option<bool> {
275    let resolved = merge.resolve_trivial(SameChange::Accept).copied()?;
276    if resolved.is_some() {
277        resolved
278    } else {
279        // If the merge is resolved to None (absent), there should be the same
280        // number of Some(true) and Some(false). Pick the old state if
281        // unambiguous, so the new file inherits the original executable bit.
282        merge.removes().flatten().copied().all_equal_value().ok()
283    }
284}
285
286/// Describes what style should be used when materializing conflicts.
287#[derive(Clone, Copy, PartialEq, Eq, Debug, serde::Deserialize)]
288#[serde(rename_all = "kebab-case")]
289pub enum ConflictMarkerStyle {
290    /// Style which shows a snapshot and a series of diffs to apply.
291    Diff,
292    /// Similar to "diff", but always picks the first side as the snapshot. May
293    /// become the default in a future version.
294    DiffExperimental,
295    /// Style which shows a snapshot for each base and side.
296    Snapshot,
297    /// Style which replicates Git's "diff3" style to support external tools.
298    Git,
299}
300
301impl ConflictMarkerStyle {
302    /// Returns true if this style allows `%%%%%%%` conflict markers.
303    pub fn allows_diff(&self) -> bool {
304        matches!(self, Self::Diff | Self::DiffExperimental)
305    }
306}
307
308/// Options for conflict materialization.
309#[derive(Clone, Debug)]
310pub struct ConflictMaterializeOptions {
311    pub marker_style: ConflictMarkerStyle,
312    pub marker_len: Option<usize>,
313    pub merge: MergeOptions,
314}
315
316/// Characters which can be repeated to form a conflict marker line when
317/// materializing and parsing conflicts.
318#[derive(Clone, Copy, PartialEq, Eq)]
319#[repr(u8)]
320enum ConflictMarkerLineChar {
321    ConflictStart = b'<',
322    ConflictEnd = b'>',
323    Add = b'+',
324    Remove = b'-',
325    Diff = b'%',
326    GitAncestor = b'|',
327    GitSeparator = b'=',
328}
329
330impl ConflictMarkerLineChar {
331    /// Get the ASCII byte used for this conflict marker.
332    fn to_byte(self) -> u8 {
333        self as u8
334    }
335
336    /// Parse a byte to see if it corresponds with any kind of conflict marker.
337    fn parse_byte(byte: u8) -> Option<Self> {
338        match byte {
339            b'<' => Some(Self::ConflictStart),
340            b'>' => Some(Self::ConflictEnd),
341            b'+' => Some(Self::Add),
342            b'-' => Some(Self::Remove),
343            b'%' => Some(Self::Diff),
344            b'|' => Some(Self::GitAncestor),
345            b'=' => Some(Self::GitSeparator),
346            _ => None,
347        }
348    }
349}
350
351/// Represents a conflict marker line parsed from the file. Conflict marker
352/// lines consist of a single ASCII character repeated for a certain length.
353struct ConflictMarkerLine {
354    kind: ConflictMarkerLineChar,
355    len: usize,
356}
357
358/// Write a conflict marker to an output file.
359fn write_conflict_marker(
360    output: &mut dyn Write,
361    kind: ConflictMarkerLineChar,
362    len: usize,
363    suffix_text: &str,
364) -> io::Result<()> {
365    let conflict_marker = BString::new(vec![kind.to_byte(); len]);
366
367    if suffix_text.is_empty() {
368        writeln!(output, "{conflict_marker}")
369    } else {
370        writeln!(output, "{conflict_marker} {suffix_text}")
371    }
372}
373
374/// Parse a conflict marker from a line of a file. The conflict marker may have
375/// any length (even less than MIN_CONFLICT_MARKER_LEN).
376fn parse_conflict_marker_any_len(line: &[u8]) -> Option<ConflictMarkerLine> {
377    let first_byte = *line.first()?;
378    let kind = ConflictMarkerLineChar::parse_byte(first_byte)?;
379    let len = line.iter().take_while(|&&b| b == first_byte).count();
380
381    if let Some(next_byte) = line.get(len) {
382        // If there is a character after the marker, it must be ASCII whitespace
383        if !next_byte.is_ascii_whitespace() {
384            return None;
385        }
386    }
387
388    Some(ConflictMarkerLine { kind, len })
389}
390
391/// Parse a conflict marker, expecting it to be at least a certain length. Any
392/// shorter conflict markers are ignored.
393fn parse_conflict_marker(line: &[u8], expected_len: usize) -> Option<ConflictMarkerLineChar> {
394    parse_conflict_marker_any_len(line)
395        .filter(|marker| marker.len >= expected_len)
396        .map(|marker| marker.kind)
397}
398
399/// Given a Merge of files, choose the conflict marker length to use when
400/// materializing conflicts.
401pub fn choose_materialized_conflict_marker_len<T: AsRef<[u8]>>(single_hunk: &Merge<T>) -> usize {
402    let max_existing_marker_len = single_hunk
403        .iter()
404        .flat_map(|file| file.as_ref().lines_with_terminator())
405        .filter_map(parse_conflict_marker_any_len)
406        .map(|marker| marker.len)
407        .max()
408        .unwrap_or_default();
409
410    max_existing_marker_len
411        .saturating_add(CONFLICT_MARKER_LEN_INCREMENT)
412        .max(MIN_CONFLICT_MARKER_LEN)
413}
414
415pub fn materialize_merge_result<T: AsRef<[u8]>>(
416    single_hunk: &Merge<T>,
417    output: &mut dyn Write,
418    options: &ConflictMaterializeOptions,
419) -> io::Result<()> {
420    let merge_result = files::merge_hunks(single_hunk, &options.merge);
421    match &merge_result {
422        MergeResult::Resolved(content) => output.write_all(content),
423        MergeResult::Conflict(hunks) => {
424            let marker_len = options
425                .marker_len
426                .unwrap_or_else(|| choose_materialized_conflict_marker_len(single_hunk));
427            materialize_conflict_hunks(hunks, options.marker_style, marker_len, output)
428        }
429    }
430}
431
432pub fn materialize_merge_result_to_bytes<T: AsRef<[u8]>>(
433    single_hunk: &Merge<T>,
434    options: &ConflictMaterializeOptions,
435) -> BString {
436    let merge_result = files::merge_hunks(single_hunk, &options.merge);
437    match merge_result {
438        MergeResult::Resolved(content) => content,
439        MergeResult::Conflict(hunks) => {
440            let marker_len = options
441                .marker_len
442                .unwrap_or_else(|| choose_materialized_conflict_marker_len(single_hunk));
443            let mut output = Vec::new();
444            materialize_conflict_hunks(&hunks, options.marker_style, marker_len, &mut output)
445                .expect("writing to an in-memory buffer should never fail");
446            output.into()
447        }
448    }
449}
450
451fn materialize_conflict_hunks(
452    hunks: &[Merge<BString>],
453    conflict_marker_style: ConflictMarkerStyle,
454    conflict_marker_len: usize,
455    output: &mut dyn Write,
456) -> io::Result<()> {
457    let num_conflicts = hunks
458        .iter()
459        .filter(|hunk| hunk.as_resolved().is_none())
460        .count();
461    let mut conflict_index = 0;
462    for hunk in hunks {
463        if let Some(content) = hunk.as_resolved() {
464            output.write_all(content)?;
465        } else {
466            conflict_index += 1;
467            let conflict_info = format!("Conflict {conflict_index} of {num_conflicts}");
468
469            match (conflict_marker_style, hunk.as_slice()) {
470                // 2-sided conflicts can use Git-style conflict markers
471                (ConflictMarkerStyle::Git, [left, base, right]) => {
472                    materialize_git_style_conflict(
473                        left,
474                        base,
475                        right,
476                        &conflict_info,
477                        conflict_marker_len,
478                        output,
479                    )?;
480                }
481                _ => {
482                    materialize_jj_style_conflict(
483                        hunk,
484                        &conflict_info,
485                        conflict_marker_style,
486                        conflict_marker_len,
487                        output,
488                    )?;
489                }
490            }
491        }
492    }
493    Ok(())
494}
495
496fn materialize_git_style_conflict(
497    left: &[u8],
498    base: &[u8],
499    right: &[u8],
500    conflict_info: &str,
501    conflict_marker_len: usize,
502    output: &mut dyn Write,
503) -> io::Result<()> {
504    write_conflict_marker(
505        output,
506        ConflictMarkerLineChar::ConflictStart,
507        conflict_marker_len,
508        &format!("Side #1 ({conflict_info})"),
509    )?;
510    write_and_ensure_newline(output, left)?;
511
512    write_conflict_marker(
513        output,
514        ConflictMarkerLineChar::GitAncestor,
515        conflict_marker_len,
516        "Base",
517    )?;
518    write_and_ensure_newline(output, base)?;
519
520    // VS Code doesn't seem to support any trailing text on the separator line
521    write_conflict_marker(
522        output,
523        ConflictMarkerLineChar::GitSeparator,
524        conflict_marker_len,
525        "",
526    )?;
527
528    write_and_ensure_newline(output, right)?;
529    write_conflict_marker(
530        output,
531        ConflictMarkerLineChar::ConflictEnd,
532        conflict_marker_len,
533        &format!("Side #2 ({conflict_info} ends)"),
534    )?;
535
536    Ok(())
537}
538
539fn materialize_jj_style_conflict(
540    hunk: &Merge<BString>,
541    conflict_info: &str,
542    conflict_marker_style: ConflictMarkerStyle,
543    conflict_marker_len: usize,
544    output: &mut dyn Write,
545) -> io::Result<()> {
546    // Write a positive snapshot (side) of a conflict
547    let write_side = |add_index: usize, data: &[u8], output: &mut dyn Write| {
548        write_conflict_marker(
549            output,
550            ConflictMarkerLineChar::Add,
551            conflict_marker_len,
552            &format!(
553                "Contents of side #{}{}",
554                add_index + 1,
555                maybe_no_eol_comment(data)
556            ),
557        )?;
558        write_and_ensure_newline(output, data)
559    };
560
561    // Write a negative snapshot (base) of a conflict
562    let write_base = |base_str: &str, data: &[u8], output: &mut dyn Write| {
563        write_conflict_marker(
564            output,
565            ConflictMarkerLineChar::Remove,
566            conflict_marker_len,
567            &format!("Contents of {base_str}{}", maybe_no_eol_comment(data)),
568        )?;
569        write_and_ensure_newline(output, data)
570    };
571
572    // Write a diff from a negative term to a positive term
573    let write_diff =
574        |base_str: &str, add_index: usize, diff: &[DiffHunk], output: &mut dyn Write| {
575            let no_eol_remove = diff
576                .last()
577                .is_some_and(|diff_hunk| has_no_eol(diff_hunk.contents[0]));
578            let no_eol_add = diff
579                .last()
580                .is_some_and(|diff_hunk| has_no_eol(diff_hunk.contents[1]));
581            let no_eol_comment = match (no_eol_remove, no_eol_add) {
582                (true, true) => NO_EOL_COMMENT,
583                (true, _) => REMOVE_NO_EOL_COMMENT,
584                (_, true) => ADD_NO_EOL_COMMENT,
585                _ => "",
586            };
587            write_conflict_marker(
588                output,
589                ConflictMarkerLineChar::Diff,
590                conflict_marker_len,
591                &format!(
592                    "Changes from {base_str} to side #{}{no_eol_comment}",
593                    add_index + 1
594                ),
595            )?;
596            write_diff_hunks(diff, output)
597        };
598
599    write_conflict_marker(
600        output,
601        ConflictMarkerLineChar::ConflictStart,
602        conflict_marker_len,
603        conflict_info,
604    )?;
605    let mut snapshot_written = false;
606    // The only conflict marker style which can start with a diff is "diff".
607    if conflict_marker_style != ConflictMarkerStyle::Diff {
608        write_side(0, hunk.first(), output)?;
609        snapshot_written = true;
610    }
611    for (base_index, left) in hunk.removes().enumerate() {
612        let add_index = if snapshot_written {
613            base_index + 1
614        } else {
615            base_index
616        };
617
618        // The vast majority of conflicts one actually tries to resolve manually have 1
619        // base.
620        let base_str = if hunk.removes().len() == 1 {
621            "base".to_string()
622        } else {
623            format!("base #{}", base_index + 1)
624        };
625
626        let right1 = hunk.get_add(add_index).unwrap();
627
628        // Write the base and side separately if the conflict marker style doesn't
629        // support diffs.
630        if !conflict_marker_style.allows_diff() {
631            write_base(&base_str, left, output)?;
632            write_side(add_index, right1, output)?;
633            continue;
634        }
635
636        let diff1 = ContentDiff::by_line([&left, &right1]).hunks().collect_vec();
637        // If we haven't written a snapshot yet, then we need to decide whether to
638        // format the current side as a snapshot or a diff. We write the current side as
639        // a diff unless the next side has a smaller diff compared to the current base.
640        if !snapshot_written {
641            let right2 = hunk.get_add(add_index + 1).unwrap();
642            let diff2 = ContentDiff::by_line([&left, &right2]).hunks().collect_vec();
643            if diff_size(&diff2) < diff_size(&diff1) {
644                // If the next positive term is a better match, emit the current positive term
645                // as a snapshot and the next positive term as a diff.
646                write_side(add_index, right1, output)?;
647                write_diff(&base_str, add_index + 1, &diff2, output)?;
648                snapshot_written = true;
649                continue;
650            }
651        }
652
653        write_diff(&base_str, add_index, &diff1, output)?;
654    }
655
656    // If we still didn't emit a snapshot, the last side is the snapshot.
657    if !snapshot_written {
658        let add_index = hunk.num_sides() - 1;
659        write_side(add_index, hunk.get_add(add_index).unwrap(), output)?;
660    }
661    write_conflict_marker(
662        output,
663        ConflictMarkerLineChar::ConflictEnd,
664        conflict_marker_len,
665        &format!("{conflict_info} ends"),
666    )?;
667    Ok(())
668}
669
670fn maybe_no_eol_comment(slice: &[u8]) -> &'static str {
671    if has_no_eol(slice) {
672        NO_EOL_COMMENT
673    } else {
674        ""
675    }
676}
677
678// Write a chunk of data, ensuring that it doesn't end with a line which is
679// missing its terminating newline.
680fn write_and_ensure_newline(output: &mut dyn Write, data: &[u8]) -> io::Result<()> {
681    output.write_all(data)?;
682    if has_no_eol(data) {
683        writeln!(output)?;
684    }
685    Ok(())
686}
687
688// Check whether a slice is missing its terminating newline character.
689fn has_no_eol(slice: &[u8]) -> bool {
690    slice.last().is_some_and(|&last| last != b'\n')
691}
692
693fn diff_size(hunks: &[DiffHunk]) -> usize {
694    hunks
695        .iter()
696        .map(|hunk| match hunk.kind {
697            DiffHunkKind::Matching => 0,
698            DiffHunkKind::Different => hunk.contents.iter().map(|content| content.len()).sum(),
699        })
700        .sum()
701}
702
703pub struct MaterializedTreeDiffEntry {
704    pub path: CopiesTreeDiffEntryPath,
705    pub values: BackendResult<Diff<MaterializedTreeValue>>,
706}
707
708pub fn materialized_diff_stream(
709    store: &Store,
710    tree_diff: BoxStream<'_, CopiesTreeDiffEntry>,
711) -> impl Stream<Item = MaterializedTreeDiffEntry> {
712    tree_diff
713        .map(async |CopiesTreeDiffEntry { path, values }| match values {
714            Err(err) => MaterializedTreeDiffEntry {
715                path,
716                values: Err(err),
717            },
718            Ok(values) => {
719                let before_future = materialize_tree_value(store, path.source(), values.before);
720                let after_future = materialize_tree_value(store, path.target(), values.after);
721                let values = try_join!(before_future, after_future)
722                    .map(|(before, after)| Diff { before, after });
723                MaterializedTreeDiffEntry { path, values }
724            }
725        })
726        .buffered((store.concurrency() / 2).max(1))
727}
728
729/// Parses conflict markers from a slice.
730///
731/// Returns `None` if there were no valid conflict markers. The caller
732/// has to provide the expected number of merge sides (adds). Conflict
733/// markers that are otherwise valid will be considered invalid if
734/// they don't have the expected arity.
735///
736/// All conflict markers in the file must be at least as long as the expected
737/// length. Any shorter conflict markers will be ignored.
738// TODO: "parse" is not usually the opposite of "materialize", so maybe we
739// should rename them to "serialize" and "deserialize"?
740pub fn parse_conflict(
741    input: &[u8],
742    num_sides: usize,
743    expected_marker_len: usize,
744) -> Option<Vec<Merge<BString>>> {
745    if input.is_empty() {
746        return None;
747    }
748    let mut hunks = vec![];
749    let mut pos = 0;
750    let mut resolved_start = 0;
751    let mut conflict_start = None;
752    let mut conflict_start_len = 0;
753    for line in input.lines_with_terminator() {
754        match parse_conflict_marker(line, expected_marker_len) {
755            Some(ConflictMarkerLineChar::ConflictStart) => {
756                conflict_start = Some(pos);
757                conflict_start_len = line.len();
758            }
759            Some(ConflictMarkerLineChar::ConflictEnd) => {
760                if let Some(conflict_start_index) = conflict_start.take() {
761                    let conflict_body = &input[conflict_start_index + conflict_start_len..pos];
762                    let hunk = parse_conflict_hunk(conflict_body, expected_marker_len);
763                    if hunk.num_sides() == num_sides {
764                        let resolved_slice = &input[resolved_start..conflict_start_index];
765                        if !resolved_slice.is_empty() {
766                            hunks.push(Merge::resolved(BString::from(resolved_slice)));
767                        }
768                        hunks.push(hunk);
769                        resolved_start = pos + line.len();
770                    }
771                }
772            }
773            _ => {}
774        }
775        pos += line.len();
776    }
777
778    if hunks.is_empty() {
779        None
780    } else {
781        if resolved_start < input.len() {
782            hunks.push(Merge::resolved(BString::from(&input[resolved_start..])));
783        }
784        Some(hunks)
785    }
786}
787
788/// This method handles parsing both JJ-style and Git-style conflict markers,
789/// meaning that switching conflict marker styles won't prevent existing files
790/// with other conflict marker styles from being parsed successfully. The
791/// conflict marker style to use for parsing is determined based on the first
792/// line of the hunk.
793fn parse_conflict_hunk(input: &[u8], expected_marker_len: usize) -> Merge<BString> {
794    // If the hunk starts with a conflict marker, find its first character
795    let initial_conflict_marker = input
796        .lines_with_terminator()
797        .next()
798        .and_then(|line| parse_conflict_marker(line, expected_marker_len));
799
800    match initial_conflict_marker {
801        // JJ-style conflicts must start with one of these 3 conflict marker lines
802        Some(
803            ConflictMarkerLineChar::Diff
804            | ConflictMarkerLineChar::Remove
805            | ConflictMarkerLineChar::Add,
806        ) => parse_jj_style_conflict_hunk(input, expected_marker_len),
807        // Git-style conflicts either must not start with a conflict marker line, or must start with
808        // the "|||||||" conflict marker line (if the first side was empty)
809        None | Some(ConflictMarkerLineChar::GitAncestor) => {
810            parse_git_style_conflict_hunk(input, expected_marker_len)
811        }
812        // No other conflict markers are allowed at the start of a hunk
813        Some(_) => Merge::resolved(BString::new(vec![])),
814    }
815}
816
817fn parse_jj_style_conflict_hunk(input: &[u8], expected_marker_len: usize) -> Merge<BString> {
818    enum State {
819        Diff,
820        Remove,
821        Add,
822        Unknown,
823    }
824    let mut state = State::Unknown;
825    let mut removes = vec![];
826    let mut adds = vec![];
827    for line in input.lines_with_terminator() {
828        match parse_conflict_marker(line, expected_marker_len) {
829            Some(ConflictMarkerLineChar::Diff) => {
830                state = State::Diff;
831                removes.push(BString::new(vec![]));
832                adds.push(BString::new(vec![]));
833                continue;
834            }
835            Some(ConflictMarkerLineChar::Remove) => {
836                state = State::Remove;
837                removes.push(BString::new(vec![]));
838                continue;
839            }
840            Some(ConflictMarkerLineChar::Add) => {
841                state = State::Add;
842                adds.push(BString::new(vec![]));
843                continue;
844            }
845            _ => {}
846        }
847        match state {
848            State::Diff => {
849                if let Some(rest) = line.strip_prefix(b"-") {
850                    removes.last_mut().unwrap().extend_from_slice(rest);
851                } else if let Some(rest) = line.strip_prefix(b"+") {
852                    adds.last_mut().unwrap().extend_from_slice(rest);
853                } else if let Some(rest) = line.strip_prefix(b" ") {
854                    removes.last_mut().unwrap().extend_from_slice(rest);
855                    adds.last_mut().unwrap().extend_from_slice(rest);
856                } else if line == b"\n" || line == b"\r\n" {
857                    // Some editors strip trailing whitespace, so " \n" might become "\n". It would
858                    // be unfortunate if this prevented the conflict from being parsed, so we add
859                    // the empty line to the "remove" and "add" as if there was a space in front
860                    removes.last_mut().unwrap().extend_from_slice(line);
861                    adds.last_mut().unwrap().extend_from_slice(line);
862                } else {
863                    // Doesn't look like a valid conflict
864                    return Merge::resolved(BString::new(vec![]));
865                }
866            }
867            State::Remove => {
868                removes.last_mut().unwrap().extend_from_slice(line);
869            }
870            State::Add => {
871                adds.last_mut().unwrap().extend_from_slice(line);
872            }
873            State::Unknown => {
874                // Doesn't look like a valid conflict
875                return Merge::resolved(BString::new(vec![]));
876            }
877        }
878    }
879
880    if adds.len() == removes.len() + 1 {
881        Merge::from_removes_adds(removes, adds)
882    } else {
883        // Doesn't look like a valid conflict
884        Merge::resolved(BString::new(vec![]))
885    }
886}
887
888fn parse_git_style_conflict_hunk(input: &[u8], expected_marker_len: usize) -> Merge<BString> {
889    #[derive(PartialEq, Eq)]
890    enum State {
891        Left,
892        Base,
893        Right,
894    }
895    let mut state = State::Left;
896    let mut left = BString::new(vec![]);
897    let mut base = BString::new(vec![]);
898    let mut right = BString::new(vec![]);
899    for line in input.lines_with_terminator() {
900        match parse_conflict_marker(line, expected_marker_len) {
901            Some(ConflictMarkerLineChar::GitAncestor) => {
902                if state == State::Left {
903                    state = State::Base;
904                    continue;
905                } else {
906                    // Base must come after left
907                    return Merge::resolved(BString::new(vec![]));
908                }
909            }
910            Some(ConflictMarkerLineChar::GitSeparator) => {
911                if state == State::Base {
912                    state = State::Right;
913                    continue;
914                } else {
915                    // Right must come after base
916                    return Merge::resolved(BString::new(vec![]));
917                }
918            }
919            _ => {}
920        }
921        match state {
922            State::Left => left.extend_from_slice(line),
923            State::Base => base.extend_from_slice(line),
924            State::Right => right.extend_from_slice(line),
925        }
926    }
927
928    if state == State::Right {
929        Merge::from_vec(vec![left, base, right])
930    } else {
931        // Doesn't look like a valid conflict
932        Merge::resolved(BString::new(vec![]))
933    }
934}
935
936/// Parses conflict markers in `content` and returns an updated version of
937/// `file_ids` with the new contents. If no (valid) conflict markers remain, a
938/// single resolves `FileId` will be returned.
939pub async fn update_from_content(
940    file_ids: &Merge<Option<FileId>>,
941    store: &Store,
942    path: &RepoPath,
943    content: &[u8],
944    conflict_marker_len: usize,
945) -> BackendResult<Merge<Option<FileId>>> {
946    let simplified_file_ids = file_ids.simplify();
947
948    let old_contents = extract_as_single_hunk(&simplified_file_ids, store, path).await?;
949    let old_hunks = files::merge_hunks(&old_contents, store.merge_options());
950
951    // Parse conflicts from the new content using the arity of the simplified
952    // conflicts.
953    let mut new_hunks = parse_conflict(
954        content,
955        simplified_file_ids.num_sides(),
956        conflict_marker_len,
957    );
958
959    // If there is a conflict at the end of the file and a term ends with a newline,
960    // check whether the original term ended with a newline. If it didn't, then
961    // remove the newline since it was added automatically when materializing.
962    if let Some(last_hunk) = new_hunks
963        .as_mut()
964        .and_then(|hunks| hunks.last_mut())
965        .filter(|hunk| !hunk.is_resolved())
966    {
967        for (original_content, term) in old_contents.iter().zip_eq(last_hunk.iter_mut()) {
968            if term.last() == Some(&b'\n') && has_no_eol(original_content) {
969                term.pop();
970            }
971        }
972    }
973
974    // Check if the new hunks are unchanged. This makes sure that unchanged file
975    // conflicts aren't updated to partially-resolved contents.
976    let unchanged = match (&old_hunks, &new_hunks) {
977        (MergeResult::Resolved(old), None) => old == content,
978        (MergeResult::Conflict(old), Some(new)) => old == new,
979        (MergeResult::Resolved(_), Some(_)) | (MergeResult::Conflict(_), None) => false,
980    };
981    if unchanged {
982        return Ok(file_ids.clone());
983    }
984
985    let Some(hunks) = new_hunks else {
986        // Either there are no markers or they don't have the expected arity
987        let file_id = store.write_file(path, &mut &content[..]).await?;
988        return Ok(Merge::normal(file_id));
989    };
990
991    let mut contents = simplified_file_ids.map(|_| vec![]);
992    for hunk in hunks {
993        if let Some(slice) = hunk.as_resolved() {
994            for content in contents.iter_mut() {
995                content.extend_from_slice(slice);
996            }
997        } else {
998            for (content, slice) in zip(contents.iter_mut(), hunk.into_iter()) {
999                content.extend(Vec::from(slice));
1000            }
1001        }
1002    }
1003
1004    // If the user edited the empty placeholder for an absent side, we consider the
1005    // conflict resolved.
1006    if zip(contents.iter(), simplified_file_ids.iter())
1007        .any(|(content, file_id)| file_id.is_none() && !content.is_empty())
1008    {
1009        let file_id = store.write_file(path, &mut &content[..]).await?;
1010        return Ok(Merge::normal(file_id));
1011    }
1012
1013    // Now write the new files contents we found by parsing the file with conflict
1014    // markers.
1015    // TODO: Write these concurrently
1016    let new_file_ids: Vec<Option<FileId>> = zip(contents.iter(), simplified_file_ids.iter())
1017        .map(|(content, file_id)| -> BackendResult<Option<FileId>> {
1018            match file_id {
1019                Some(_) => {
1020                    let file_id = store.write_file(path, &mut content.as_slice()).block_on()?;
1021                    Ok(Some(file_id))
1022                }
1023                None => {
1024                    // The missing side of a conflict is still represented by
1025                    // the empty string we materialized it as
1026                    Ok(None)
1027                }
1028            }
1029        })
1030        .try_collect()?;
1031
1032    // If the conflict was simplified, expand the conflict to the original
1033    // number of sides.
1034    let new_file_ids = if new_file_ids.len() != file_ids.iter().len() {
1035        file_ids
1036            .clone()
1037            .update_from_simplified(Merge::from_vec(new_file_ids))
1038    } else {
1039        Merge::from_vec(new_file_ids)
1040    };
1041    Ok(new_file_ids)
1042}
1043
1044#[cfg(test)]
1045mod tests {
1046    use super::*;
1047
1048    #[test]
1049    fn test_resolve_file_executable() {
1050        fn resolve<const N: usize>(values: [Option<bool>; N]) -> Option<bool> {
1051            resolve_file_executable(&Merge::from_vec(values.to_vec()))
1052        }
1053
1054        // already resolved
1055        assert_eq!(resolve([None]), None);
1056        assert_eq!(resolve([Some(false)]), Some(false));
1057        assert_eq!(resolve([Some(true)]), Some(true));
1058
1059        // trivially resolved
1060        assert_eq!(resolve([Some(true), Some(true), Some(true)]), Some(true));
1061        assert_eq!(resolve([Some(true), Some(false), Some(false)]), Some(true));
1062        assert_eq!(resolve([Some(false), Some(true), Some(false)]), Some(false));
1063        assert_eq!(resolve([None, None, Some(true)]), Some(true));
1064
1065        // unresolvable
1066        assert_eq!(resolve([Some(false), Some(true), None]), None);
1067
1068        // trivially resolved to absent, so pick the original state
1069        assert_eq!(resolve([Some(true), Some(true), None]), Some(true));
1070        assert_eq!(resolve([None, Some(false), Some(false)]), Some(false));
1071        assert_eq!(
1072            resolve([None, None, Some(true), Some(true), None]),
1073            Some(true)
1074        );
1075
1076        // trivially resolved to absent, and the original state is ambiguous
1077        assert_eq!(
1078            resolve([Some(true), Some(true), None, Some(false), Some(false)]),
1079            None
1080        );
1081        assert_eq!(
1082            resolve([
1083                None,
1084                Some(true),
1085                Some(true),
1086                Some(false),
1087                Some(false),
1088                Some(false),
1089                Some(false),
1090            ]),
1091            None
1092        );
1093    }
1094}